add info when crc check failed.tags/v1.1.0
| @@ -64,6 +64,15 @@ class CRCFailedError(MindInsightException): | |||
| http_code=400) | |||
| class CRCLengthFailedError(MindInsightException): | |||
| """CRC fail, record corrupted.""" | |||
| def __init__(self): | |||
| error_msg = 'CRC Length Failed.' | |||
| super(CRCLengthFailedError, self).__init__(DataVisualErrors.CRC_LENGTH_FAILED, | |||
| error_msg, | |||
| http_code=400) | |||
| class SummaryLogIsLoading(MindInsightException): | |||
| """Data is loading.""" | |||
| @@ -20,6 +20,7 @@ Each instance will read an entire run, a run can contain one or | |||
| more log file. | |||
| """ | |||
| import re | |||
| import time | |||
| import struct | |||
| from google.protobuf.message import DecodeError | |||
| @@ -46,6 +47,7 @@ from mindinsight.utils.exceptions import UnknownError | |||
| HEADER_SIZE = 8 | |||
| CRC_STR_SIZE = 4 | |||
| MAX_EVENT_STRING = 500000000 | |||
| RETRY_TIMES = 2 | |||
| class MSDataLoader: | |||
| @@ -368,10 +370,12 @@ class _SummaryParser(_Parser): | |||
| Returns: | |||
| bool, True if the summary file is finished loading. | |||
| """ | |||
| crc_check_time = 0 | |||
| while True: | |||
| start_offset = file_handler.offset | |||
| try: | |||
| event_str = self._event_load(file_handler) | |||
| crc_check_time = 0 | |||
| if event_str is None: | |||
| file_handler.reset_offset(start_offset) | |||
| return True | |||
| @@ -399,6 +403,18 @@ class _SummaryParser(_Parser): | |||
| future.add_done_callback(exception_no_raise_wrapper(_add_tensor_event_callback)) | |||
| return False | |||
| except exceptions.CRCLengthFailedError: | |||
| if crc_check_time > RETRY_TIMES: | |||
| logger.warning( | |||
| "Check crc length failed, please check the summary file integrity, " | |||
| "the file may be in transfer, file_path: %s, offset=%s.", | |||
| file_handler.file_path, start_offset) | |||
| return True | |||
| logger.info( | |||
| "Check crc failed, retrying %d/%d times.", crc_check_time + 1, RETRY_TIMES + 1) | |||
| file_handler.reset_offset(start_offset) | |||
| crc_check_time += 1 | |||
| time.sleep(0.5) | |||
| except exceptions.CRCFailedError: | |||
| file_handler.reset_offset(start_offset) | |||
| logger.warning("Check crc faild and ignore this file, file_path=%s, " | |||
| @@ -432,9 +448,7 @@ class _SummaryParser(_Parser): | |||
| header_crc_str = '' | |||
| if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE: | |||
| logger.warning("Check header size and crc, record truncated at offset %s, " | |||
| "file_path=%s.", file_handler.offset, file_handler.file_path) | |||
| return None | |||
| raise exceptions.CRCLengthFailedError | |||
| if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE): | |||
| raise exceptions.CRCFailedError() | |||
| @@ -450,9 +464,7 @@ class _SummaryParser(_Parser): | |||
| event_crc_str = '' | |||
| if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE: | |||
| logger.warning("Check event crc, record truncated at offset %d, file_path: %s.", | |||
| file_handler.offset, file_handler.file_path) | |||
| return None | |||
| raise exceptions.CRCLengthFailedError | |||
| if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len): | |||
| raise exceptions.CRCFailedError() | |||
| @@ -18,6 +18,7 @@ Scalar Writer. | |||
| This module write scalar into a csv file. | |||
| """ | |||
| import os | |||
| import time | |||
| import struct | |||
| from google.protobuf.message import DecodeError | |||
| @@ -36,6 +37,7 @@ MAX_EVENT_STRING = 500000000 | |||
| SCALAR = 'scalar_value' | |||
| IMAGE = 'image' | |||
| INFO_INTERVAL = 10 | |||
| RETRY_TIMES = 2 | |||
| class EventParser(): | |||
| @@ -45,7 +47,6 @@ class EventParser(): | |||
| self._output = output | |||
| self._scalar_writer = ScalarWriter(self._output) | |||
| self._image_writer = ImageWriter(FileHandler.join(self._output, IMAGE)) | |||
| self._current = 0 | |||
| self._file_size = 0 | |||
| self._process_info = 0 | |||
| self._image_check = False | |||
| @@ -63,15 +64,14 @@ class EventParser(): | |||
| parse_summary_logger.info("Loading %s.", self.summary_file) | |||
| result = self._load(summary_file_handler) | |||
| parse_summary_logger.info("Writing scalar.csv") | |||
| self._scalar_writer.write() | |||
| warning = '' | |||
| if not self._scalar_check: | |||
| warning = warning + " the summary file contains no scalar value." | |||
| if not self._image_check: | |||
| warning = warning + " the summary file contains no image." | |||
| if result: | |||
| parse_summary_logger.info("Writing parsed data into scalar.csv") | |||
| self._scalar_writer.write() | |||
| if warning: | |||
| parse_summary_logger.warning(warning) | |||
| parse_summary_logger.info("Finished loading %s.", self.summary_file) | |||
| @@ -86,9 +86,12 @@ class EventParser(): | |||
| Returns: | |||
| bool, True if the summary file is finished loading. | |||
| """ | |||
| crc_check_time = 0 | |||
| while True: | |||
| start_offset = file_handler.offset | |||
| try: | |||
| event_str = self._event_load(file_handler) | |||
| crc_check_time = 0 | |||
| if event_str is None: | |||
| return True | |||
| if len(event_str) > MAX_EVENT_STRING: | |||
| @@ -96,10 +99,23 @@ class EventParser(): | |||
| file_handler.file_path, len(event_str), MAX_EVENT_STRING) | |||
| continue | |||
| self._event_parse(event_str) | |||
| except exceptions.CRCLengthFailedError: | |||
| if crc_check_time > RETRY_TIMES: | |||
| parse_summary_logger.error( | |||
| "Check crc length failed, please check the summary file integrity, " | |||
| "the file may be in transfer, file_path: %s, offset=%s.", | |||
| file_handler.file_path, start_offset) | |||
| return True | |||
| parse_summary_logger.warning( | |||
| "Check crc failed, retrying %d/%d times.", crc_check_time + 1, RETRY_TIMES + 1) | |||
| file_handler.reset_offset(start_offset) | |||
| crc_check_time += 1 | |||
| time.sleep(0.5) | |||
| except exceptions.CRCFailedError: | |||
| parse_summary_logger.error("Check crc faild, file_path=%s, offset=%s.", file_handler.file_path, | |||
| file_handler.offset) | |||
| return False | |||
| parse_summary_logger.error( | |||
| "Check crc failed, the file may have been modified, file_path=%s, offset=%s.", | |||
| file_handler.file_path, start_offset) | |||
| return True | |||
| except (OSError, DecodeError, exceptions.MindInsightException) as ex: | |||
| parse_summary_logger.error("Parse file fail, detail: %r, file path: %s.", str(ex), | |||
| file_handler.file_path) | |||
| @@ -126,9 +142,7 @@ class EventParser(): | |||
| header_crc_str = '' | |||
| if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE: | |||
| parse_summary_logger.error("Check header size and crc, record truncated at offset %s, file_path=%s.", | |||
| file_handler.offset, file_handler.file_path) | |||
| return None | |||
| raise exceptions.CRCLengthFailedError | |||
| if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE): | |||
| raise exceptions.CRCFailedError() | |||
| @@ -145,16 +159,18 @@ class EventParser(): | |||
| event_crc_str = '' | |||
| if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE: | |||
| parse_summary_logger.error("Check event crc, record truncated at offset %d, file_path: %s.", | |||
| file_handler.offset, file_handler.file_path) | |||
| return None | |||
| raise exceptions.CRCLengthFailedError | |||
| if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len): | |||
| raise exceptions.CRCFailedError() | |||
| self._current += HEADER_SIZE + 2 * CRC_STR_SIZE + event_len | |||
| if self._current >= self._process_info: | |||
| parse_summary_logger.info("Current parsing process: %d/%d, %d%%.", self._current, self._file_size, | |||
| 100 * self._current // self._file_size) | |||
| current_offset = file_handler.offset | |||
| if current_offset >= self._process_info: | |||
| parse_summary_logger.info("Current parsing process: %d/%d, %d%%.", current_offset, self._file_size, | |||
| 100 * current_offset // os.path.getsize(self.summary_file)) | |||
| self._process_info += self._file_size // INFO_INTERVAL | |||
| if self._process_info > os.path.getsize(self.summary_file): | |||
| self._process_info = os.path.getsize(self.summary_file) | |||
| return event_str | |||
| def _event_parse(self, event_str): | |||
| @@ -26,7 +26,7 @@ from mindinsight.datavisual.data_transform.ms_data_loader import _SummaryParser | |||
| from mindinsight.datavisual.data_transform.summary_parser.event_parser import EventParser | |||
| class FileDirAction(argparse.Action): | |||
| class DirAction(argparse.Action): | |||
| """File directory action class definition.""" | |||
| @staticmethod | |||
| @@ -72,7 +72,7 @@ class OutputDirAction(argparse.Action): | |||
| values (object): Argument values with type depending on argument definition. | |||
| option_string (str): Optional string for specific argument name. Default: None. | |||
| """ | |||
| output = FileDirAction.check_path(values) | |||
| output = DirAction.check_path(values) | |||
| setattr(namespace, self.dest, output) | |||
| @@ -94,7 +94,7 @@ class Command(BaseCommand): | |||
| parser.add_argument( | |||
| '--summary-dir', | |||
| type=str, | |||
| action=FileDirAction, | |||
| action=DirAction, | |||
| default=os.path.realpath(os.getcwd()), | |||
| help=""" | |||
| Optional, specify path for summary file directory. | |||
| @@ -120,7 +120,7 @@ class Command(BaseCommand): | |||
| """ | |||
| try: | |||
| date_time = datetime.datetime.now().strftime('output_%Y%m%d_%H%M%S_%f') | |||
| output_filename = os.path.join(args.output, date_time) | |||
| output_path = os.path.join(args.output, date_time) | |||
| summary_dir = args.summary_dir | |||
| @@ -140,10 +140,10 @@ class Command(BaseCommand): | |||
| summary_file = FileHandler.join(summary_dir, filename) | |||
| if not (self._check_filepath(summary_file) and self._check_create_filepath( | |||
| output_filename) and self._check_create_filepath(FileHandler.join(output_filename, 'image'))): | |||
| output_path) and self._check_create_filepath(FileHandler.join(output_path, 'image'))): | |||
| return | |||
| eventparser = EventParser(summary_file, output_filename) | |||
| eventparser = EventParser(summary_file, output_path) | |||
| eventparser.parse() | |||
| except Exception as ex: | |||
| @@ -158,15 +158,13 @@ class Command(BaseCommand): | |||
| Args: | |||
| filepath (str): File path. | |||
| """ | |||
| if os.path.exists(filepath): | |||
| if not os.path.isfile(filepath): | |||
| parse_summary_logger.error('Summary file %s is not a valid file.', filepath) | |||
| return False | |||
| if not os.access(filepath, os.R_OK): | |||
| parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath) | |||
| return True | |||
| parse_summary_logger.error('Summary file %s not exists.', filepath) | |||
| return False | |||
| if not os.path.isfile(filepath): | |||
| parse_summary_logger.error('Summary file %s is not a valid file.', filepath) | |||
| return False | |||
| if not os.access(filepath, os.R_OK): | |||
| parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath) | |||
| return False | |||
| return True | |||
| @staticmethod | |||
| def _check_dirpath(filepath): | |||
| @@ -182,6 +180,7 @@ class Command(BaseCommand): | |||
| return False | |||
| if not os.access(filepath, os.R_OK | os.X_OK): | |||
| parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath) | |||
| return False | |||
| return True | |||
| parse_summary_logger.error('Summary directory %s not exists.', filepath) | |||
| return False | |||
| @@ -82,6 +82,7 @@ class DataVisualErrors(Enum): | |||
| TENSOR_NOT_EXIST = 18 | |||
| MAX_RESPONSE_DATA_EXCEEDED_ERROR = 19 | |||
| STEP_TENSOR_DATA_NOT_IN_CACHE = 20 | |||
| CRC_LENGTH_FAILED = 21 | |||
| class ScriptConverterErrors(Enum): | |||