add info when crc check failed.tags/v1.1.0
| @@ -64,6 +64,15 @@ class CRCFailedError(MindInsightException): | |||||
| http_code=400) | http_code=400) | ||||
| class CRCLengthFailedError(MindInsightException): | |||||
| """CRC fail, record corrupted.""" | |||||
| def __init__(self): | |||||
| error_msg = 'CRC Length Failed.' | |||||
| super(CRCLengthFailedError, self).__init__(DataVisualErrors.CRC_LENGTH_FAILED, | |||||
| error_msg, | |||||
| http_code=400) | |||||
| class SummaryLogIsLoading(MindInsightException): | class SummaryLogIsLoading(MindInsightException): | ||||
| """Data is loading.""" | """Data is loading.""" | ||||
| @@ -20,6 +20,7 @@ Each instance will read an entire run, a run can contain one or | |||||
| more log file. | more log file. | ||||
| """ | """ | ||||
| import re | import re | ||||
| import time | |||||
| import struct | import struct | ||||
| from google.protobuf.message import DecodeError | from google.protobuf.message import DecodeError | ||||
| @@ -46,6 +47,7 @@ from mindinsight.utils.exceptions import UnknownError | |||||
| HEADER_SIZE = 8 | HEADER_SIZE = 8 | ||||
| CRC_STR_SIZE = 4 | CRC_STR_SIZE = 4 | ||||
| MAX_EVENT_STRING = 500000000 | MAX_EVENT_STRING = 500000000 | ||||
| RETRY_TIMES = 2 | |||||
| class MSDataLoader: | class MSDataLoader: | ||||
| @@ -368,10 +370,12 @@ class _SummaryParser(_Parser): | |||||
| Returns: | Returns: | ||||
| bool, True if the summary file is finished loading. | bool, True if the summary file is finished loading. | ||||
| """ | """ | ||||
| crc_check_time = 0 | |||||
| while True: | while True: | ||||
| start_offset = file_handler.offset | start_offset = file_handler.offset | ||||
| try: | try: | ||||
| event_str = self._event_load(file_handler) | event_str = self._event_load(file_handler) | ||||
| crc_check_time = 0 | |||||
| if event_str is None: | if event_str is None: | ||||
| file_handler.reset_offset(start_offset) | file_handler.reset_offset(start_offset) | ||||
| return True | return True | ||||
| @@ -399,6 +403,18 @@ class _SummaryParser(_Parser): | |||||
| future.add_done_callback(exception_no_raise_wrapper(_add_tensor_event_callback)) | future.add_done_callback(exception_no_raise_wrapper(_add_tensor_event_callback)) | ||||
| return False | return False | ||||
| except exceptions.CRCLengthFailedError: | |||||
| if crc_check_time > RETRY_TIMES: | |||||
| logger.warning( | |||||
| "Check crc length failed, please check the summary file integrity, " | |||||
| "the file may be in transfer, file_path: %s, offset=%s.", | |||||
| file_handler.file_path, start_offset) | |||||
| return True | |||||
| logger.info( | |||||
| "Check crc failed, retrying %d/%d times.", crc_check_time + 1, RETRY_TIMES + 1) | |||||
| file_handler.reset_offset(start_offset) | |||||
| crc_check_time += 1 | |||||
| time.sleep(0.5) | |||||
| except exceptions.CRCFailedError: | except exceptions.CRCFailedError: | ||||
| file_handler.reset_offset(start_offset) | file_handler.reset_offset(start_offset) | ||||
| logger.warning("Check crc faild and ignore this file, file_path=%s, " | logger.warning("Check crc faild and ignore this file, file_path=%s, " | ||||
| @@ -432,9 +448,7 @@ class _SummaryParser(_Parser): | |||||
| header_crc_str = '' | header_crc_str = '' | ||||
| if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE: | if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE: | ||||
| logger.warning("Check header size and crc, record truncated at offset %s, " | |||||
| "file_path=%s.", file_handler.offset, file_handler.file_path) | |||||
| return None | |||||
| raise exceptions.CRCLengthFailedError | |||||
| if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE): | if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE): | ||||
| raise exceptions.CRCFailedError() | raise exceptions.CRCFailedError() | ||||
| @@ -450,9 +464,7 @@ class _SummaryParser(_Parser): | |||||
| event_crc_str = '' | event_crc_str = '' | ||||
| if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE: | if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE: | ||||
| logger.warning("Check event crc, record truncated at offset %d, file_path: %s.", | |||||
| file_handler.offset, file_handler.file_path) | |||||
| return None | |||||
| raise exceptions.CRCLengthFailedError | |||||
| if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len): | if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len): | ||||
| raise exceptions.CRCFailedError() | raise exceptions.CRCFailedError() | ||||
| @@ -18,6 +18,7 @@ Scalar Writer. | |||||
| This module write scalar into a csv file. | This module write scalar into a csv file. | ||||
| """ | """ | ||||
| import os | import os | ||||
| import time | |||||
| import struct | import struct | ||||
| from google.protobuf.message import DecodeError | from google.protobuf.message import DecodeError | ||||
| @@ -36,6 +37,7 @@ MAX_EVENT_STRING = 500000000 | |||||
| SCALAR = 'scalar_value' | SCALAR = 'scalar_value' | ||||
| IMAGE = 'image' | IMAGE = 'image' | ||||
| INFO_INTERVAL = 10 | INFO_INTERVAL = 10 | ||||
| RETRY_TIMES = 2 | |||||
| class EventParser(): | class EventParser(): | ||||
| @@ -45,7 +47,6 @@ class EventParser(): | |||||
| self._output = output | self._output = output | ||||
| self._scalar_writer = ScalarWriter(self._output) | self._scalar_writer = ScalarWriter(self._output) | ||||
| self._image_writer = ImageWriter(FileHandler.join(self._output, IMAGE)) | self._image_writer = ImageWriter(FileHandler.join(self._output, IMAGE)) | ||||
| self._current = 0 | |||||
| self._file_size = 0 | self._file_size = 0 | ||||
| self._process_info = 0 | self._process_info = 0 | ||||
| self._image_check = False | self._image_check = False | ||||
| @@ -63,15 +64,14 @@ class EventParser(): | |||||
| parse_summary_logger.info("Loading %s.", self.summary_file) | parse_summary_logger.info("Loading %s.", self.summary_file) | ||||
| result = self._load(summary_file_handler) | result = self._load(summary_file_handler) | ||||
| parse_summary_logger.info("Writing scalar.csv") | |||||
| self._scalar_writer.write() | |||||
| warning = '' | warning = '' | ||||
| if not self._scalar_check: | if not self._scalar_check: | ||||
| warning = warning + " the summary file contains no scalar value." | warning = warning + " the summary file contains no scalar value." | ||||
| if not self._image_check: | if not self._image_check: | ||||
| warning = warning + " the summary file contains no image." | warning = warning + " the summary file contains no image." | ||||
| if result: | if result: | ||||
| parse_summary_logger.info("Writing parsed data into scalar.csv") | |||||
| self._scalar_writer.write() | |||||
| if warning: | if warning: | ||||
| parse_summary_logger.warning(warning) | parse_summary_logger.warning(warning) | ||||
| parse_summary_logger.info("Finished loading %s.", self.summary_file) | parse_summary_logger.info("Finished loading %s.", self.summary_file) | ||||
| @@ -86,9 +86,12 @@ class EventParser(): | |||||
| Returns: | Returns: | ||||
| bool, True if the summary file is finished loading. | bool, True if the summary file is finished loading. | ||||
| """ | """ | ||||
| crc_check_time = 0 | |||||
| while True: | while True: | ||||
| start_offset = file_handler.offset | |||||
| try: | try: | ||||
| event_str = self._event_load(file_handler) | event_str = self._event_load(file_handler) | ||||
| crc_check_time = 0 | |||||
| if event_str is None: | if event_str is None: | ||||
| return True | return True | ||||
| if len(event_str) > MAX_EVENT_STRING: | if len(event_str) > MAX_EVENT_STRING: | ||||
| @@ -96,10 +99,23 @@ class EventParser(): | |||||
| file_handler.file_path, len(event_str), MAX_EVENT_STRING) | file_handler.file_path, len(event_str), MAX_EVENT_STRING) | ||||
| continue | continue | ||||
| self._event_parse(event_str) | self._event_parse(event_str) | ||||
| except exceptions.CRCLengthFailedError: | |||||
| if crc_check_time > RETRY_TIMES: | |||||
| parse_summary_logger.error( | |||||
| "Check crc length failed, please check the summary file integrity, " | |||||
| "the file may be in transfer, file_path: %s, offset=%s.", | |||||
| file_handler.file_path, start_offset) | |||||
| return True | |||||
| parse_summary_logger.warning( | |||||
| "Check crc failed, retrying %d/%d times.", crc_check_time + 1, RETRY_TIMES + 1) | |||||
| file_handler.reset_offset(start_offset) | |||||
| crc_check_time += 1 | |||||
| time.sleep(0.5) | |||||
| except exceptions.CRCFailedError: | except exceptions.CRCFailedError: | ||||
| parse_summary_logger.error("Check crc faild, file_path=%s, offset=%s.", file_handler.file_path, | |||||
| file_handler.offset) | |||||
| return False | |||||
| parse_summary_logger.error( | |||||
| "Check crc failed, the file may have been modified, file_path=%s, offset=%s.", | |||||
| file_handler.file_path, start_offset) | |||||
| return True | |||||
| except (OSError, DecodeError, exceptions.MindInsightException) as ex: | except (OSError, DecodeError, exceptions.MindInsightException) as ex: | ||||
| parse_summary_logger.error("Parse file fail, detail: %r, file path: %s.", str(ex), | parse_summary_logger.error("Parse file fail, detail: %r, file path: %s.", str(ex), | ||||
| file_handler.file_path) | file_handler.file_path) | ||||
| @@ -126,9 +142,7 @@ class EventParser(): | |||||
| header_crc_str = '' | header_crc_str = '' | ||||
| if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE: | if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE: | ||||
| parse_summary_logger.error("Check header size and crc, record truncated at offset %s, file_path=%s.", | |||||
| file_handler.offset, file_handler.file_path) | |||||
| return None | |||||
| raise exceptions.CRCLengthFailedError | |||||
| if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE): | if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE): | ||||
| raise exceptions.CRCFailedError() | raise exceptions.CRCFailedError() | ||||
| @@ -145,16 +159,18 @@ class EventParser(): | |||||
| event_crc_str = '' | event_crc_str = '' | ||||
| if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE: | if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE: | ||||
| parse_summary_logger.error("Check event crc, record truncated at offset %d, file_path: %s.", | |||||
| file_handler.offset, file_handler.file_path) | |||||
| return None | |||||
| raise exceptions.CRCLengthFailedError | |||||
| if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len): | if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len): | ||||
| raise exceptions.CRCFailedError() | raise exceptions.CRCFailedError() | ||||
| self._current += HEADER_SIZE + 2 * CRC_STR_SIZE + event_len | |||||
| if self._current >= self._process_info: | |||||
| parse_summary_logger.info("Current parsing process: %d/%d, %d%%.", self._current, self._file_size, | |||||
| 100 * self._current // self._file_size) | |||||
| current_offset = file_handler.offset | |||||
| if current_offset >= self._process_info: | |||||
| parse_summary_logger.info("Current parsing process: %d/%d, %d%%.", current_offset, self._file_size, | |||||
| 100 * current_offset // os.path.getsize(self.summary_file)) | |||||
| self._process_info += self._file_size // INFO_INTERVAL | self._process_info += self._file_size // INFO_INTERVAL | ||||
| if self._process_info > os.path.getsize(self.summary_file): | |||||
| self._process_info = os.path.getsize(self.summary_file) | |||||
| return event_str | return event_str | ||||
| def _event_parse(self, event_str): | def _event_parse(self, event_str): | ||||
| @@ -26,7 +26,7 @@ from mindinsight.datavisual.data_transform.ms_data_loader import _SummaryParser | |||||
| from mindinsight.datavisual.data_transform.summary_parser.event_parser import EventParser | from mindinsight.datavisual.data_transform.summary_parser.event_parser import EventParser | ||||
| class FileDirAction(argparse.Action): | |||||
| class DirAction(argparse.Action): | |||||
| """File directory action class definition.""" | """File directory action class definition.""" | ||||
| @staticmethod | @staticmethod | ||||
| @@ -72,7 +72,7 @@ class OutputDirAction(argparse.Action): | |||||
| values (object): Argument values with type depending on argument definition. | values (object): Argument values with type depending on argument definition. | ||||
| option_string (str): Optional string for specific argument name. Default: None. | option_string (str): Optional string for specific argument name. Default: None. | ||||
| """ | """ | ||||
| output = FileDirAction.check_path(values) | |||||
| output = DirAction.check_path(values) | |||||
| setattr(namespace, self.dest, output) | setattr(namespace, self.dest, output) | ||||
| @@ -94,7 +94,7 @@ class Command(BaseCommand): | |||||
| parser.add_argument( | parser.add_argument( | ||||
| '--summary-dir', | '--summary-dir', | ||||
| type=str, | type=str, | ||||
| action=FileDirAction, | |||||
| action=DirAction, | |||||
| default=os.path.realpath(os.getcwd()), | default=os.path.realpath(os.getcwd()), | ||||
| help=""" | help=""" | ||||
| Optional, specify path for summary file directory. | Optional, specify path for summary file directory. | ||||
| @@ -120,7 +120,7 @@ class Command(BaseCommand): | |||||
| """ | """ | ||||
| try: | try: | ||||
| date_time = datetime.datetime.now().strftime('output_%Y%m%d_%H%M%S_%f') | date_time = datetime.datetime.now().strftime('output_%Y%m%d_%H%M%S_%f') | ||||
| output_filename = os.path.join(args.output, date_time) | |||||
| output_path = os.path.join(args.output, date_time) | |||||
| summary_dir = args.summary_dir | summary_dir = args.summary_dir | ||||
| @@ -140,10 +140,10 @@ class Command(BaseCommand): | |||||
| summary_file = FileHandler.join(summary_dir, filename) | summary_file = FileHandler.join(summary_dir, filename) | ||||
| if not (self._check_filepath(summary_file) and self._check_create_filepath( | if not (self._check_filepath(summary_file) and self._check_create_filepath( | ||||
| output_filename) and self._check_create_filepath(FileHandler.join(output_filename, 'image'))): | |||||
| output_path) and self._check_create_filepath(FileHandler.join(output_path, 'image'))): | |||||
| return | return | ||||
| eventparser = EventParser(summary_file, output_filename) | |||||
| eventparser = EventParser(summary_file, output_path) | |||||
| eventparser.parse() | eventparser.parse() | ||||
| except Exception as ex: | except Exception as ex: | ||||
| @@ -158,15 +158,13 @@ class Command(BaseCommand): | |||||
| Args: | Args: | ||||
| filepath (str): File path. | filepath (str): File path. | ||||
| """ | """ | ||||
| if os.path.exists(filepath): | |||||
| if not os.path.isfile(filepath): | |||||
| parse_summary_logger.error('Summary file %s is not a valid file.', filepath) | |||||
| return False | |||||
| if not os.access(filepath, os.R_OK): | |||||
| parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath) | |||||
| return True | |||||
| parse_summary_logger.error('Summary file %s not exists.', filepath) | |||||
| return False | |||||
| if not os.path.isfile(filepath): | |||||
| parse_summary_logger.error('Summary file %s is not a valid file.', filepath) | |||||
| return False | |||||
| if not os.access(filepath, os.R_OK): | |||||
| parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath) | |||||
| return False | |||||
| return True | |||||
| @staticmethod | @staticmethod | ||||
| def _check_dirpath(filepath): | def _check_dirpath(filepath): | ||||
| @@ -182,6 +180,7 @@ class Command(BaseCommand): | |||||
| return False | return False | ||||
| if not os.access(filepath, os.R_OK | os.X_OK): | if not os.access(filepath, os.R_OK | os.X_OK): | ||||
| parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath) | parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath) | ||||
| return False | |||||
| return True | return True | ||||
| parse_summary_logger.error('Summary directory %s not exists.', filepath) | parse_summary_logger.error('Summary directory %s not exists.', filepath) | ||||
| return False | return False | ||||
| @@ -82,6 +82,7 @@ class DataVisualErrors(Enum): | |||||
| TENSOR_NOT_EXIST = 18 | TENSOR_NOT_EXIST = 18 | ||||
| MAX_RESPONSE_DATA_EXCEEDED_ERROR = 19 | MAX_RESPONSE_DATA_EXCEEDED_ERROR = 19 | ||||
| STEP_TENSOR_DATA_NOT_IN_CACHE = 20 | STEP_TENSOR_DATA_NOT_IN_CACHE = 20 | ||||
| CRC_LENGTH_FAILED = 21 | |||||
| class ScriptConverterErrors(Enum): | class ScriptConverterErrors(Enum): | ||||