Browse Source

fix when image tag name is too long, the log is unreasonable

add info when crc check failed.
tags/v1.1.0
jiangshuqiang 5 years ago
parent
commit
f317763313
5 changed files with 75 additions and 38 deletions
  1. +9
    -0
      mindinsight/datavisual/common/exceptions.py
  2. +18
    -6
      mindinsight/datavisual/data_transform/ms_data_loader.py
  3. +33
    -17
      mindinsight/datavisual/data_transform/summary_parser/event_parser.py
  4. +14
    -15
      mindinsight/scripts/parse_summary.py
  5. +1
    -0
      mindinsight/utils/constant.py

+ 9
- 0
mindinsight/datavisual/common/exceptions.py View File

@@ -64,6 +64,15 @@ class CRCFailedError(MindInsightException):
http_code=400) http_code=400)




class CRCLengthFailedError(MindInsightException):
"""CRC fail, record corrupted."""
def __init__(self):
error_msg = 'CRC Length Failed.'
super(CRCLengthFailedError, self).__init__(DataVisualErrors.CRC_LENGTH_FAILED,
error_msg,
http_code=400)


class SummaryLogIsLoading(MindInsightException): class SummaryLogIsLoading(MindInsightException):
"""Data is loading.""" """Data is loading."""




+ 18
- 6
mindinsight/datavisual/data_transform/ms_data_loader.py View File

@@ -20,6 +20,7 @@ Each instance will read an entire run, a run can contain one or
more log file. more log file.
""" """
import re import re
import time
import struct import struct


from google.protobuf.message import DecodeError from google.protobuf.message import DecodeError
@@ -46,6 +47,7 @@ from mindinsight.utils.exceptions import UnknownError
HEADER_SIZE = 8 HEADER_SIZE = 8
CRC_STR_SIZE = 4 CRC_STR_SIZE = 4
MAX_EVENT_STRING = 500000000 MAX_EVENT_STRING = 500000000
RETRY_TIMES = 2




class MSDataLoader: class MSDataLoader:
@@ -368,10 +370,12 @@ class _SummaryParser(_Parser):
Returns: Returns:
bool, True if the summary file is finished loading. bool, True if the summary file is finished loading.
""" """
crc_check_time = 0
while True: while True:
start_offset = file_handler.offset start_offset = file_handler.offset
try: try:
event_str = self._event_load(file_handler) event_str = self._event_load(file_handler)
crc_check_time = 0
if event_str is None: if event_str is None:
file_handler.reset_offset(start_offset) file_handler.reset_offset(start_offset)
return True return True
@@ -399,6 +403,18 @@ class _SummaryParser(_Parser):


future.add_done_callback(exception_no_raise_wrapper(_add_tensor_event_callback)) future.add_done_callback(exception_no_raise_wrapper(_add_tensor_event_callback))
return False return False
except exceptions.CRCLengthFailedError:
if crc_check_time > RETRY_TIMES:
logger.warning(
"Check crc length failed, please check the summary file integrity, "
"the file may be in transfer, file_path: %s, offset=%s.",
file_handler.file_path, start_offset)
return True
logger.info(
"Check crc failed, retrying %d/%d times.", crc_check_time + 1, RETRY_TIMES + 1)
file_handler.reset_offset(start_offset)
crc_check_time += 1
time.sleep(0.5)
except exceptions.CRCFailedError: except exceptions.CRCFailedError:
file_handler.reset_offset(start_offset) file_handler.reset_offset(start_offset)
logger.warning("Check crc faild and ignore this file, file_path=%s, " logger.warning("Check crc faild and ignore this file, file_path=%s, "
@@ -432,9 +448,7 @@ class _SummaryParser(_Parser):
header_crc_str = '' header_crc_str = ''


if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE: if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE:
logger.warning("Check header size and crc, record truncated at offset %s, "
"file_path=%s.", file_handler.offset, file_handler.file_path)
return None
raise exceptions.CRCLengthFailedError
if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE): if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE):
raise exceptions.CRCFailedError() raise exceptions.CRCFailedError()


@@ -450,9 +464,7 @@ class _SummaryParser(_Parser):
event_crc_str = '' event_crc_str = ''


if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE: if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE:
logger.warning("Check event crc, record truncated at offset %d, file_path: %s.",
file_handler.offset, file_handler.file_path)
return None
raise exceptions.CRCLengthFailedError
if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len): if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len):
raise exceptions.CRCFailedError() raise exceptions.CRCFailedError()




+ 33
- 17
mindinsight/datavisual/data_transform/summary_parser/event_parser.py View File

@@ -18,6 +18,7 @@ Scalar Writer.
This module write scalar into a csv file. This module write scalar into a csv file.
""" """
import os import os
import time
import struct import struct


from google.protobuf.message import DecodeError from google.protobuf.message import DecodeError
@@ -36,6 +37,7 @@ MAX_EVENT_STRING = 500000000
SCALAR = 'scalar_value' SCALAR = 'scalar_value'
IMAGE = 'image' IMAGE = 'image'
INFO_INTERVAL = 10 INFO_INTERVAL = 10
RETRY_TIMES = 2




class EventParser(): class EventParser():
@@ -45,7 +47,6 @@ class EventParser():
self._output = output self._output = output
self._scalar_writer = ScalarWriter(self._output) self._scalar_writer = ScalarWriter(self._output)
self._image_writer = ImageWriter(FileHandler.join(self._output, IMAGE)) self._image_writer = ImageWriter(FileHandler.join(self._output, IMAGE))
self._current = 0
self._file_size = 0 self._file_size = 0
self._process_info = 0 self._process_info = 0
self._image_check = False self._image_check = False
@@ -63,15 +64,14 @@ class EventParser():
parse_summary_logger.info("Loading %s.", self.summary_file) parse_summary_logger.info("Loading %s.", self.summary_file)
result = self._load(summary_file_handler) result = self._load(summary_file_handler)


parse_summary_logger.info("Writing scalar.csv")
self._scalar_writer.write()

warning = '' warning = ''
if not self._scalar_check: if not self._scalar_check:
warning = warning + " the summary file contains no scalar value." warning = warning + " the summary file contains no scalar value."
if not self._image_check: if not self._image_check:
warning = warning + " the summary file contains no image." warning = warning + " the summary file contains no image."
if result: if result:
parse_summary_logger.info("Writing parsed data into scalar.csv")
self._scalar_writer.write()
if warning: if warning:
parse_summary_logger.warning(warning) parse_summary_logger.warning(warning)
parse_summary_logger.info("Finished loading %s.", self.summary_file) parse_summary_logger.info("Finished loading %s.", self.summary_file)
@@ -86,9 +86,12 @@ class EventParser():
Returns: Returns:
bool, True if the summary file is finished loading. bool, True if the summary file is finished loading.
""" """
crc_check_time = 0
while True: while True:
start_offset = file_handler.offset
try: try:
event_str = self._event_load(file_handler) event_str = self._event_load(file_handler)
crc_check_time = 0
if event_str is None: if event_str is None:
return True return True
if len(event_str) > MAX_EVENT_STRING: if len(event_str) > MAX_EVENT_STRING:
@@ -96,10 +99,23 @@ class EventParser():
file_handler.file_path, len(event_str), MAX_EVENT_STRING) file_handler.file_path, len(event_str), MAX_EVENT_STRING)
continue continue
self._event_parse(event_str) self._event_parse(event_str)
except exceptions.CRCLengthFailedError:
if crc_check_time > RETRY_TIMES:
parse_summary_logger.error(
"Check crc length failed, please check the summary file integrity, "
"the file may be in transfer, file_path: %s, offset=%s.",
file_handler.file_path, start_offset)
return True
parse_summary_logger.warning(
"Check crc failed, retrying %d/%d times.", crc_check_time + 1, RETRY_TIMES + 1)
file_handler.reset_offset(start_offset)
crc_check_time += 1
time.sleep(0.5)
except exceptions.CRCFailedError: except exceptions.CRCFailedError:
parse_summary_logger.error("Check crc faild, file_path=%s, offset=%s.", file_handler.file_path,
file_handler.offset)
return False
parse_summary_logger.error(
"Check crc failed, the file may have been modified, file_path=%s, offset=%s.",
file_handler.file_path, start_offset)
return True
except (OSError, DecodeError, exceptions.MindInsightException) as ex: except (OSError, DecodeError, exceptions.MindInsightException) as ex:
parse_summary_logger.error("Parse file fail, detail: %r, file path: %s.", str(ex), parse_summary_logger.error("Parse file fail, detail: %r, file path: %s.", str(ex),
file_handler.file_path) file_handler.file_path)
@@ -126,9 +142,7 @@ class EventParser():
header_crc_str = '' header_crc_str = ''


if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE: if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE:
parse_summary_logger.error("Check header size and crc, record truncated at offset %s, file_path=%s.",
file_handler.offset, file_handler.file_path)
return None
raise exceptions.CRCLengthFailedError


if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE): if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE):
raise exceptions.CRCFailedError() raise exceptions.CRCFailedError()
@@ -145,16 +159,18 @@ class EventParser():
event_crc_str = '' event_crc_str = ''


if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE: if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE:
parse_summary_logger.error("Check event crc, record truncated at offset %d, file_path: %s.",
file_handler.offset, file_handler.file_path)
return None
raise exceptions.CRCLengthFailedError

if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len): if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len):
raise exceptions.CRCFailedError() raise exceptions.CRCFailedError()
self._current += HEADER_SIZE + 2 * CRC_STR_SIZE + event_len
if self._current >= self._process_info:
parse_summary_logger.info("Current parsing process: %d/%d, %d%%.", self._current, self._file_size,
100 * self._current // self._file_size)

current_offset = file_handler.offset
if current_offset >= self._process_info:
parse_summary_logger.info("Current parsing process: %d/%d, %d%%.", current_offset, self._file_size,
100 * current_offset // os.path.getsize(self.summary_file))
self._process_info += self._file_size // INFO_INTERVAL self._process_info += self._file_size // INFO_INTERVAL
if self._process_info > os.path.getsize(self.summary_file):
self._process_info = os.path.getsize(self.summary_file)
return event_str return event_str


def _event_parse(self, event_str): def _event_parse(self, event_str):


+ 14
- 15
mindinsight/scripts/parse_summary.py View File

@@ -26,7 +26,7 @@ from mindinsight.datavisual.data_transform.ms_data_loader import _SummaryParser
from mindinsight.datavisual.data_transform.summary_parser.event_parser import EventParser from mindinsight.datavisual.data_transform.summary_parser.event_parser import EventParser




class FileDirAction(argparse.Action):
class DirAction(argparse.Action):
"""File directory action class definition.""" """File directory action class definition."""


@staticmethod @staticmethod
@@ -72,7 +72,7 @@ class OutputDirAction(argparse.Action):
values (object): Argument values with type depending on argument definition. values (object): Argument values with type depending on argument definition.
option_string (str): Optional string for specific argument name. Default: None. option_string (str): Optional string for specific argument name. Default: None.
""" """
output = FileDirAction.check_path(values)
output = DirAction.check_path(values)


setattr(namespace, self.dest, output) setattr(namespace, self.dest, output)


@@ -94,7 +94,7 @@ class Command(BaseCommand):
parser.add_argument( parser.add_argument(
'--summary-dir', '--summary-dir',
type=str, type=str,
action=FileDirAction,
action=DirAction,
default=os.path.realpath(os.getcwd()), default=os.path.realpath(os.getcwd()),
help=""" help="""
Optional, specify path for summary file directory. Optional, specify path for summary file directory.
@@ -120,7 +120,7 @@ class Command(BaseCommand):
""" """
try: try:
date_time = datetime.datetime.now().strftime('output_%Y%m%d_%H%M%S_%f') date_time = datetime.datetime.now().strftime('output_%Y%m%d_%H%M%S_%f')
output_filename = os.path.join(args.output, date_time)
output_path = os.path.join(args.output, date_time)




summary_dir = args.summary_dir summary_dir = args.summary_dir
@@ -140,10 +140,10 @@ class Command(BaseCommand):
summary_file = FileHandler.join(summary_dir, filename) summary_file = FileHandler.join(summary_dir, filename)


if not (self._check_filepath(summary_file) and self._check_create_filepath( if not (self._check_filepath(summary_file) and self._check_create_filepath(
output_filename) and self._check_create_filepath(FileHandler.join(output_filename, 'image'))):
output_path) and self._check_create_filepath(FileHandler.join(output_path, 'image'))):
return return


eventparser = EventParser(summary_file, output_filename)
eventparser = EventParser(summary_file, output_path)
eventparser.parse() eventparser.parse()


except Exception as ex: except Exception as ex:
@@ -158,15 +158,13 @@ class Command(BaseCommand):
Args: Args:
filepath (str): File path. filepath (str): File path.
""" """
if os.path.exists(filepath):
if not os.path.isfile(filepath):
parse_summary_logger.error('Summary file %s is not a valid file.', filepath)
return False
if not os.access(filepath, os.R_OK):
parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath)
return True
parse_summary_logger.error('Summary file %s not exists.', filepath)
return False
if not os.path.isfile(filepath):
parse_summary_logger.error('Summary file %s is not a valid file.', filepath)
return False
if not os.access(filepath, os.R_OK):
parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath)
return False
return True


@staticmethod @staticmethod
def _check_dirpath(filepath): def _check_dirpath(filepath):
@@ -182,6 +180,7 @@ class Command(BaseCommand):
return False return False
if not os.access(filepath, os.R_OK | os.X_OK): if not os.access(filepath, os.R_OK | os.X_OK):
parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath) parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath)
return False
return True return True
parse_summary_logger.error('Summary directory %s not exists.', filepath) parse_summary_logger.error('Summary directory %s not exists.', filepath)
return False return False


+ 1
- 0
mindinsight/utils/constant.py View File

@@ -82,6 +82,7 @@ class DataVisualErrors(Enum):
TENSOR_NOT_EXIST = 18 TENSOR_NOT_EXIST = 18
MAX_RESPONSE_DATA_EXCEEDED_ERROR = 19 MAX_RESPONSE_DATA_EXCEEDED_ERROR = 19
STEP_TENSOR_DATA_NOT_IN_CACHE = 20 STEP_TENSOR_DATA_NOT_IN_CACHE = 20
CRC_LENGTH_FAILED = 21




class ScriptConverterErrors(Enum): class ScriptConverterErrors(Enum):


Loading…
Cancel
Save