Browse Source

fix when image tag name is too long, the log is unreasonable

add info when crc check failed.
tags/v1.1.0
jiangshuqiang 5 years ago
parent
commit
f317763313
5 changed files with 75 additions and 38 deletions
  1. +9
    -0
      mindinsight/datavisual/common/exceptions.py
  2. +18
    -6
      mindinsight/datavisual/data_transform/ms_data_loader.py
  3. +33
    -17
      mindinsight/datavisual/data_transform/summary_parser/event_parser.py
  4. +14
    -15
      mindinsight/scripts/parse_summary.py
  5. +1
    -0
      mindinsight/utils/constant.py

+ 9
- 0
mindinsight/datavisual/common/exceptions.py View File

@@ -64,6 +64,15 @@ class CRCFailedError(MindInsightException):
http_code=400)


class CRCLengthFailedError(MindInsightException):
"""CRC fail, record corrupted."""
def __init__(self):
error_msg = 'CRC Length Failed.'
super(CRCLengthFailedError, self).__init__(DataVisualErrors.CRC_LENGTH_FAILED,
error_msg,
http_code=400)


class SummaryLogIsLoading(MindInsightException):
"""Data is loading."""



+ 18
- 6
mindinsight/datavisual/data_transform/ms_data_loader.py View File

@@ -20,6 +20,7 @@ Each instance will read an entire run, a run can contain one or
more log file.
"""
import re
import time
import struct

from google.protobuf.message import DecodeError
@@ -46,6 +47,7 @@ from mindinsight.utils.exceptions import UnknownError
HEADER_SIZE = 8
CRC_STR_SIZE = 4
MAX_EVENT_STRING = 500000000
RETRY_TIMES = 2


class MSDataLoader:
@@ -368,10 +370,12 @@ class _SummaryParser(_Parser):
Returns:
bool, True if the summary file is finished loading.
"""
crc_check_time = 0
while True:
start_offset = file_handler.offset
try:
event_str = self._event_load(file_handler)
crc_check_time = 0
if event_str is None:
file_handler.reset_offset(start_offset)
return True
@@ -399,6 +403,18 @@ class _SummaryParser(_Parser):

future.add_done_callback(exception_no_raise_wrapper(_add_tensor_event_callback))
return False
except exceptions.CRCLengthFailedError:
if crc_check_time > RETRY_TIMES:
logger.warning(
"Check crc length failed, please check the summary file integrity, "
"the file may be in transfer, file_path: %s, offset=%s.",
file_handler.file_path, start_offset)
return True
logger.info(
"Check crc failed, retrying %d/%d times.", crc_check_time + 1, RETRY_TIMES + 1)
file_handler.reset_offset(start_offset)
crc_check_time += 1
time.sleep(0.5)
except exceptions.CRCFailedError:
file_handler.reset_offset(start_offset)
logger.warning("Check crc faild and ignore this file, file_path=%s, "
@@ -432,9 +448,7 @@ class _SummaryParser(_Parser):
header_crc_str = ''

if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE:
logger.warning("Check header size and crc, record truncated at offset %s, "
"file_path=%s.", file_handler.offset, file_handler.file_path)
return None
raise exceptions.CRCLengthFailedError
if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE):
raise exceptions.CRCFailedError()

@@ -450,9 +464,7 @@ class _SummaryParser(_Parser):
event_crc_str = ''

if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE:
logger.warning("Check event crc, record truncated at offset %d, file_path: %s.",
file_handler.offset, file_handler.file_path)
return None
raise exceptions.CRCLengthFailedError
if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len):
raise exceptions.CRCFailedError()



+ 33
- 17
mindinsight/datavisual/data_transform/summary_parser/event_parser.py View File

@@ -18,6 +18,7 @@ Scalar Writer.
This module write scalar into a csv file.
"""
import os
import time
import struct

from google.protobuf.message import DecodeError
@@ -36,6 +37,7 @@ MAX_EVENT_STRING = 500000000
SCALAR = 'scalar_value'
IMAGE = 'image'
INFO_INTERVAL = 10
RETRY_TIMES = 2


class EventParser():
@@ -45,7 +47,6 @@ class EventParser():
self._output = output
self._scalar_writer = ScalarWriter(self._output)
self._image_writer = ImageWriter(FileHandler.join(self._output, IMAGE))
self._current = 0
self._file_size = 0
self._process_info = 0
self._image_check = False
@@ -63,15 +64,14 @@ class EventParser():
parse_summary_logger.info("Loading %s.", self.summary_file)
result = self._load(summary_file_handler)

parse_summary_logger.info("Writing scalar.csv")
self._scalar_writer.write()

warning = ''
if not self._scalar_check:
warning = warning + " the summary file contains no scalar value."
if not self._image_check:
warning = warning + " the summary file contains no image."
if result:
parse_summary_logger.info("Writing parsed data into scalar.csv")
self._scalar_writer.write()
if warning:
parse_summary_logger.warning(warning)
parse_summary_logger.info("Finished loading %s.", self.summary_file)
@@ -86,9 +86,12 @@ class EventParser():
Returns:
bool, True if the summary file is finished loading.
"""
crc_check_time = 0
while True:
start_offset = file_handler.offset
try:
event_str = self._event_load(file_handler)
crc_check_time = 0
if event_str is None:
return True
if len(event_str) > MAX_EVENT_STRING:
@@ -96,10 +99,23 @@ class EventParser():
file_handler.file_path, len(event_str), MAX_EVENT_STRING)
continue
self._event_parse(event_str)
except exceptions.CRCLengthFailedError:
if crc_check_time > RETRY_TIMES:
parse_summary_logger.error(
"Check crc length failed, please check the summary file integrity, "
"the file may be in transfer, file_path: %s, offset=%s.",
file_handler.file_path, start_offset)
return True
parse_summary_logger.warning(
"Check crc failed, retrying %d/%d times.", crc_check_time + 1, RETRY_TIMES + 1)
file_handler.reset_offset(start_offset)
crc_check_time += 1
time.sleep(0.5)
except exceptions.CRCFailedError:
parse_summary_logger.error("Check crc faild, file_path=%s, offset=%s.", file_handler.file_path,
file_handler.offset)
return False
parse_summary_logger.error(
"Check crc failed, the file may have been modified, file_path=%s, offset=%s.",
file_handler.file_path, start_offset)
return True
except (OSError, DecodeError, exceptions.MindInsightException) as ex:
parse_summary_logger.error("Parse file fail, detail: %r, file path: %s.", str(ex),
file_handler.file_path)
@@ -126,9 +142,7 @@ class EventParser():
header_crc_str = ''

if len(header_str) != HEADER_SIZE or len(header_crc_str) != CRC_STR_SIZE:
parse_summary_logger.error("Check header size and crc, record truncated at offset %s, file_path=%s.",
file_handler.offset, file_handler.file_path)
return None
raise exceptions.CRCLengthFailedError

if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE):
raise exceptions.CRCFailedError()
@@ -145,16 +159,18 @@ class EventParser():
event_crc_str = ''

if len(event_str) != event_len or len(event_crc_str) != CRC_STR_SIZE:
parse_summary_logger.error("Check event crc, record truncated at offset %d, file_path: %s.",
file_handler.offset, file_handler.file_path)
return None
raise exceptions.CRCLengthFailedError

if not crc32.CheckValueAgainstData(event_crc_str, event_str, event_len):
raise exceptions.CRCFailedError()
self._current += HEADER_SIZE + 2 * CRC_STR_SIZE + event_len
if self._current >= self._process_info:
parse_summary_logger.info("Current parsing process: %d/%d, %d%%.", self._current, self._file_size,
100 * self._current // self._file_size)

current_offset = file_handler.offset
if current_offset >= self._process_info:
parse_summary_logger.info("Current parsing process: %d/%d, %d%%.", current_offset, self._file_size,
100 * current_offset // os.path.getsize(self.summary_file))
self._process_info += self._file_size // INFO_INTERVAL
if self._process_info > os.path.getsize(self.summary_file):
self._process_info = os.path.getsize(self.summary_file)
return event_str

def _event_parse(self, event_str):


+ 14
- 15
mindinsight/scripts/parse_summary.py View File

@@ -26,7 +26,7 @@ from mindinsight.datavisual.data_transform.ms_data_loader import _SummaryParser
from mindinsight.datavisual.data_transform.summary_parser.event_parser import EventParser


class FileDirAction(argparse.Action):
class DirAction(argparse.Action):
"""File directory action class definition."""

@staticmethod
@@ -72,7 +72,7 @@ class OutputDirAction(argparse.Action):
values (object): Argument values with type depending on argument definition.
option_string (str): Optional string for specific argument name. Default: None.
"""
output = FileDirAction.check_path(values)
output = DirAction.check_path(values)

setattr(namespace, self.dest, output)

@@ -94,7 +94,7 @@ class Command(BaseCommand):
parser.add_argument(
'--summary-dir',
type=str,
action=FileDirAction,
action=DirAction,
default=os.path.realpath(os.getcwd()),
help="""
Optional, specify path for summary file directory.
@@ -120,7 +120,7 @@ class Command(BaseCommand):
"""
try:
date_time = datetime.datetime.now().strftime('output_%Y%m%d_%H%M%S_%f')
output_filename = os.path.join(args.output, date_time)
output_path = os.path.join(args.output, date_time)


summary_dir = args.summary_dir
@@ -140,10 +140,10 @@ class Command(BaseCommand):
summary_file = FileHandler.join(summary_dir, filename)

if not (self._check_filepath(summary_file) and self._check_create_filepath(
output_filename) and self._check_create_filepath(FileHandler.join(output_filename, 'image'))):
output_path) and self._check_create_filepath(FileHandler.join(output_path, 'image'))):
return

eventparser = EventParser(summary_file, output_filename)
eventparser = EventParser(summary_file, output_path)
eventparser.parse()

except Exception as ex:
@@ -158,15 +158,13 @@ class Command(BaseCommand):
Args:
filepath (str): File path.
"""
if os.path.exists(filepath):
if not os.path.isfile(filepath):
parse_summary_logger.error('Summary file %s is not a valid file.', filepath)
return False
if not os.access(filepath, os.R_OK):
parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath)
return True
parse_summary_logger.error('Summary file %s not exists.', filepath)
return False
if not os.path.isfile(filepath):
parse_summary_logger.error('Summary file %s is not a valid file.', filepath)
return False
if not os.access(filepath, os.R_OK):
parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath)
return False
return True

@staticmethod
def _check_dirpath(filepath):
@@ -182,6 +180,7 @@ class Command(BaseCommand):
return False
if not os.access(filepath, os.R_OK | os.X_OK):
parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath)
return False
return True
parse_summary_logger.error('Summary directory %s not exists.', filepath)
return False


+ 1
- 0
mindinsight/utils/constant.py View File

@@ -82,6 +82,7 @@ class DataVisualErrors(Enum):
TENSOR_NOT_EXIST = 18
MAX_RESPONSE_DATA_EXCEEDED_ERROR = 19
STEP_TENSOR_DATA_NOT_IN_CACHE = 20
CRC_LENGTH_FAILED = 21


class ScriptConverterErrors(Enum):


Loading…
Cancel
Save