diff --git a/mindinsight/datavisual/data_transform/summary_parser/event_parser.py b/mindinsight/datavisual/data_transform/summary_parser/event_parser.py index 58f95ded..88b2f073 100644 --- a/mindinsight/datavisual/data_transform/summary_parser/event_parser.py +++ b/mindinsight/datavisual/data_transform/summary_parser/event_parser.py @@ -29,20 +29,19 @@ from mindinsight.datavisual.proto_files import lazy_read_pb2 from mindinsight.datavisual.data_access.file_handler import FileHandler from mindinsight.datavisual.data_transform.summary_parser.image_writer import ImageWriter from mindinsight.datavisual.data_transform.summary_parser.scalar_writer import ScalarWriter -from mindinsight.datavisual.data_transform.ms_data_loader import _SummaryParser -from mindinsight.utils.exceptions import UnknownError HEADER_SIZE = 8 CRC_STR_SIZE = 4 MAX_EVENT_STRING = 500000000 SCALAR = 'scalar_value' IMAGE = 'image' +INFO_INTERVAL = 10 class EventParser(): """Parse summary file and save it to csv file and image.""" - def __init__(self, summary_dir, output): - self._summary_dir = summary_dir + def __init__(self, summary_file, output): + self.summary_file = summary_file self._output = output self._scalar_writer = ScalarWriter(self._output) self._image_writer = ImageWriter(FileHandler.join(self._output, IMAGE)) @@ -54,52 +53,28 @@ class EventParser(): def parse(self): """Load summary file and parse file content.""" - try: - if not (self._check_filepath() and self._check_create_filepath( - self._output) and self._check_create_filepath(FileHandler.join(self._output, IMAGE))): - return - summary_parser = _SummaryParser(self._summary_dir) - summary_files = summary_parser.filter_files(os.listdir(self._summary_dir)) + summary_file_handler = FileHandler(self.summary_file, 'rb') - if not summary_files: - parse_summary_logger.error('Path %s has no summary file.', self._summary_dir) - return + self._file_size = os.path.getsize(self.summary_file) + # when current parsed size bigger than self._process_info, print process + self._process_info = self._file_size // INFO_INTERVAL - summary_files = summary_parser.sort_files(summary_files) + parse_summary_logger.info("Loading %s.", self.summary_file) + result = self._load(summary_file_handler) - filename = summary_files[-1] - file_path = FileHandler.join(self._summary_dir, filename) + parse_summary_logger.info("Writing scalar.csv") + self._scalar_writer.write() - if not os.access(file_path, os.R_OK): - parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', file_path) - return - - self._summary_file_handler = FileHandler(file_path, 'rb') - - self._file_size = os.path.getsize(file_path) - # when current parsed size bigger than self._process_info, print process - self._process_info = self._file_size // 10 - - parse_summary_logger.info("loading %s", file_path) - result = self._load(self._summary_file_handler) - - self._scalar_writer.write() - - warning = '' - - if not self._scalar_check: - warning = warning + " the summary file contains no scalar value" - if not self._image_check: - warning = warning + " the summary file contains no image" - if result: - if warning: - parse_summary_logger.warning(warning) - parse_summary_logger.info("parsing summary file finished") - - except Exception as ex: - parse_summary_logger.error("Parse summary file failed, detail: %r", str(ex)) - raise UnknownError(str(ex)) + warning = '' + if not self._scalar_check: + warning = warning + " the summary file contains no scalar value." + if not self._image_check: + warning = warning + " the summary file contains no image." + if result: + if warning: + parse_summary_logger.warning(warning) + parse_summary_logger.info("Finished loading %s.", self.summary_file) def _load(self, file_handler): """ @@ -154,6 +129,7 @@ class EventParser(): parse_summary_logger.error("Check header size and crc, record truncated at offset %s, file_path=%s.", file_handler.offset, file_handler.file_path) return None + if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE): raise exceptions.CRCFailedError() @@ -176,9 +152,9 @@ class EventParser(): raise exceptions.CRCFailedError() self._current += HEADER_SIZE + 2 * CRC_STR_SIZE + event_len if self._current >= self._process_info: - parse_summary_logger.info("current process: %d/%d, %d%%", self._current, self._file_size, + parse_summary_logger.info("Current parsing process: %d/%d, %d%%.", self._current, self._file_size, 100 * self._current // self._file_size) - self._process_info += self._file_size // 10 + self._process_info += self._file_size // INFO_INTERVAL return event_str def _event_parse(self, event_str): @@ -219,27 +195,3 @@ class EventParser(): self._image_writer.add((tag, step, value.image.encoded_image)) self._image_writer.write() self._image_check = True - - def _check_filepath(self): - """Check file path existence, accessible and available""" - if os.path.exists(self._summary_dir): - if not os.path.isdir(self._summary_dir): - parse_summary_logger.error('Path of summary directory is not a valid directory.') - return False - if not os.access(self._summary_dir, os.R_OK | os.X_OK): - parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', - self._summary_dir) - return True - parse_summary_logger.error('Path of summary directory not exists.') - return False - - def _check_create_filepath(self, filepath): - """Check file path existence, accessible and available, if not exist create the file""" - permissions = os.R_OK | os.W_OK | os.X_OK - os.umask(permissions << 3 | permissions) - if os.path.exists(filepath): - parse_summary_logger.error('Path %s has already existed, please choose a new output path.', filepath) - return False - mode = permissions << 6 - os.makedirs(filepath, mode=mode) - return True diff --git a/mindinsight/datavisual/data_transform/summary_parser/image_writer.py b/mindinsight/datavisual/data_transform/summary_parser/image_writer.py index 435ff599..60b6bc97 100644 --- a/mindinsight/datavisual/data_transform/summary_parser/image_writer.py +++ b/mindinsight/datavisual/data_transform/summary_parser/image_writer.py @@ -18,7 +18,7 @@ Image Writer. This module write scalar into a csv file. """ import os -import re +from urllib.parse import quote from mindinsight.datavisual.data_transform.summary_parser.writer import Writer @@ -47,9 +47,7 @@ class ImageWriter(Writer): def write(self): """Write file.""" for i in range(len(self._image_data)): - tag = self._image_data[i][0] - tag = tag.replace('/', '_') - tag = re.sub(r'[^a-zA-Z0-9_]+', '', tag) + tag = quote(self._image_data[i][0], safe="") with os.fdopen(os.open("{}/{}_{}.png".format(self._file_path, tag, self._image_data[i][1]), os.O_WRONLY | os.O_CREAT, 0o600), 'wb') as fp: fp.write(self._image_data[i][2]) diff --git a/mindinsight/datavisual/data_transform/summary_parser/scalar_writer.py b/mindinsight/datavisual/data_transform/summary_parser/scalar_writer.py index 11e9403d..f0375bb2 100644 --- a/mindinsight/datavisual/data_transform/summary_parser/scalar_writer.py +++ b/mindinsight/datavisual/data_transform/summary_parser/scalar_writer.py @@ -46,6 +46,7 @@ class ScalarWriter(Writer): def write(self): """Write file.""" - with os.fdopen(os.open('{}/scalar.csv'.format(self._file_path), os.O_WRONLY | os.O_CREAT, 0o600), 'w') as fp: + with os.fdopen(os.open('{}/scalar.csv'.format(self._file_path), os.O_WRONLY | os.O_CREAT, 0o600), 'w', + encoding='utf-8') as fp: writer = csv.writer(fp, dialect='excel') writer.writerows(self._scalar_data) diff --git a/mindinsight/scripts/parse_summary.py b/mindinsight/scripts/parse_summary.py index 54451575..6cabe810 100644 --- a/mindinsight/scripts/parse_summary.py +++ b/mindinsight/scripts/parse_summary.py @@ -19,12 +19,32 @@ import os import datetime from mindinsight.utils.command import BaseCommand +from mindinsight.utils.exceptions import UnknownError +from mindinsight.datavisual.common.log import parse_summary_logger +from mindinsight.datavisual.data_access.file_handler import FileHandler +from mindinsight.datavisual.data_transform.ms_data_loader import _SummaryParser from mindinsight.datavisual.data_transform.summary_parser.event_parser import EventParser -class FilepathAction(argparse.Action): +class FileDirAction(argparse.Action): """File directory action class definition.""" + @staticmethod + def check_path(file_path): + """ + Check argument for file path. + + Args: + file_path (str): File path. + """ + if file_path.startswith('~'): + file_path = os.path.realpath(os.path.expanduser(file_path)) + + if not file_path.startswith('/'): + file_path = os.path.realpath(FileHandler.join(os.getcwd(), file_path)) + + return os.path.realpath(file_path) + def __call__(self, parser_in, namespace, values, option_string=None): """ Inherited __call__ method from argparse.Action. @@ -35,17 +55,10 @@ class FilepathAction(argparse.Action): values (object): Argument values with type depending on argument definition. option_string (str): Optional string for specific argument name. Default: None. """ - summary_dir = values - if summary_dir.startswith('~'): - summary_dir = os.path.realpath(os.path.expanduser(summary_dir)) + summary_dir = self.check_path(values) - if not summary_dir.startswith('/'): - summary_dir = os.path.realpath(os.path.join(os.getcwd(), summary_dir)) - - summary_dir = os.path.realpath(summary_dir) setattr(namespace, self.dest, summary_dir) - class OutputDirAction(argparse.Action): """File directory action class definition.""" @@ -59,14 +72,8 @@ class OutputDirAction(argparse.Action): values (object): Argument values with type depending on argument definition. option_string (str): Optional string for specific argument name. Default: None. """ - output = values - if output.startswith('~'): - output = os.path.realpath(os.path.expanduser(output)) + output = FileDirAction.check_path(values) - if not output.startswith('/'): - output = os.path.realpath(os.path.join(os.getcwd(), output)) - - output = os.path.realpath(output) setattr(namespace, self.dest, output) @@ -83,10 +90,11 @@ class Command(BaseCommand): Args: parser (ArgumentParser): Specify parser to which arguments are added. """ + parser.add_argument( '--summary-dir', type=str, - action=FilepathAction, + action=FileDirAction, default=os.path.realpath(os.getcwd()), help=""" Optional, specify path for summary file directory. @@ -110,7 +118,87 @@ class Command(BaseCommand): Args: args (Namespace): Parsed arguments to hold customized parameters. """ - date_time = datetime.datetime.now().strftime('output_%Y%m%d_%H%M%S_%f') - date_time = os.path.join(args.output, date_time) - eventparser = EventParser(args.summary_dir, date_time) - eventparser.parse() + try: + date_time = datetime.datetime.now().strftime('output_%Y%m%d_%H%M%S_%f') + output_filename = os.path.join(args.output, date_time) + + + summary_dir = args.summary_dir + if not self._check_dirpath(summary_dir): + return + + summary_parser = _SummaryParser(summary_dir) + summary_files = summary_parser.filter_files(os.listdir(summary_dir)) + + if not summary_files: + parse_summary_logger.error('Path %s has no summary file.', summary_dir) + return + + summary_files = summary_parser.sort_files(summary_files) + filename = summary_files[-1] + + summary_file = FileHandler.join(summary_dir, filename) + + if not (self._check_filepath(summary_file) and self._check_create_filepath( + output_filename) and self._check_create_filepath(FileHandler.join(output_filename, 'image'))): + return + + eventparser = EventParser(summary_file, output_filename) + eventparser.parse() + + except Exception as ex: + parse_summary_logger.error("Parse summary file failed, detail: %r.", str(ex)) + raise UnknownError(str(ex)) + + @staticmethod + def _check_filepath(filepath): + """ + Check file path existence, accessible and available + + Args: + filepath (str): File path. + """ + if os.path.exists(filepath): + if not os.path.isfile(filepath): + parse_summary_logger.error('Summary file %s is not a valid file.', filepath) + return False + if not os.access(filepath, os.R_OK): + parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath) + return True + parse_summary_logger.error('Summary file %s not exists.', filepath) + return False + + @staticmethod + def _check_dirpath(filepath): + """ + Check file path existence, accessible and available + + Args: + filepath (str): File path. + """ + if os.path.exists(filepath): + if not os.path.isdir(filepath): + parse_summary_logger.error('Summary directory %s is not a valid directory.', filepath) + return False + if not os.access(filepath, os.R_OK | os.X_OK): + parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath) + return True + parse_summary_logger.error('Summary directory %s not exists.', filepath) + return False + + @staticmethod + def _check_create_filepath(filepath): + """ + Check file path existence, accessible and available, if not exist create the file + + Args: + filepath (str): File path. + """ + permissions = os.R_OK | os.W_OK | os.X_OK + os.umask(permissions << 3 | permissions) + if os.path.exists(filepath): + parse_summary_logger.error('Path %s has already existed, please choose a new output path.', filepath) + return False + mode = permissions << 6 + os.makedirs(filepath, mode=mode) + return True