Browse Source

Fix when tag include special characters,output file has garbled code

Fix when the message is :current process ...100%,convert not finished

Fix When special characters in the tag, the output image file is incomplete
tags/v1.1.0
jiangshuqiang 5 years ago
parent
commit
adb19c6dd2
4 changed files with 136 additions and 97 deletions
  1. +23
    -71
      mindinsight/datavisual/data_transform/summary_parser/event_parser.py
  2. +2
    -4
      mindinsight/datavisual/data_transform/summary_parser/image_writer.py
  3. +2
    -1
      mindinsight/datavisual/data_transform/summary_parser/scalar_writer.py
  4. +109
    -21
      mindinsight/scripts/parse_summary.py

+ 23
- 71
mindinsight/datavisual/data_transform/summary_parser/event_parser.py View File

@@ -29,20 +29,19 @@ from mindinsight.datavisual.proto_files import lazy_read_pb2
from mindinsight.datavisual.data_access.file_handler import FileHandler
from mindinsight.datavisual.data_transform.summary_parser.image_writer import ImageWriter
from mindinsight.datavisual.data_transform.summary_parser.scalar_writer import ScalarWriter
from mindinsight.datavisual.data_transform.ms_data_loader import _SummaryParser
from mindinsight.utils.exceptions import UnknownError

HEADER_SIZE = 8
CRC_STR_SIZE = 4
MAX_EVENT_STRING = 500000000
SCALAR = 'scalar_value'
IMAGE = 'image'
INFO_INTERVAL = 10


class EventParser():
"""Parse summary file and save it to csv file and image."""
def __init__(self, summary_dir, output):
self._summary_dir = summary_dir
def __init__(self, summary_file, output):
self.summary_file = summary_file
self._output = output
self._scalar_writer = ScalarWriter(self._output)
self._image_writer = ImageWriter(FileHandler.join(self._output, IMAGE))
@@ -54,52 +53,28 @@ class EventParser():

def parse(self):
"""Load summary file and parse file content."""
try:
if not (self._check_filepath() and self._check_create_filepath(
self._output) and self._check_create_filepath(FileHandler.join(self._output, IMAGE))):
return

summary_parser = _SummaryParser(self._summary_dir)
summary_files = summary_parser.filter_files(os.listdir(self._summary_dir))
summary_file_handler = FileHandler(self.summary_file, 'rb')

if not summary_files:
parse_summary_logger.error('Path %s has no summary file.', self._summary_dir)
return
self._file_size = os.path.getsize(self.summary_file)
# when current parsed size bigger than self._process_info, print process
self._process_info = self._file_size // INFO_INTERVAL

summary_files = summary_parser.sort_files(summary_files)
parse_summary_logger.info("Loading %s.", self.summary_file)
result = self._load(summary_file_handler)

filename = summary_files[-1]
file_path = FileHandler.join(self._summary_dir, filename)
parse_summary_logger.info("Writing scalar.csv")
self._scalar_writer.write()

if not os.access(file_path, os.R_OK):
parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', file_path)
return

self._summary_file_handler = FileHandler(file_path, 'rb')

self._file_size = os.path.getsize(file_path)
# when current parsed size bigger than self._process_info, print process
self._process_info = self._file_size // 10

parse_summary_logger.info("loading %s", file_path)
result = self._load(self._summary_file_handler)

self._scalar_writer.write()

warning = ''

if not self._scalar_check:
warning = warning + " the summary file contains no scalar value"
if not self._image_check:
warning = warning + " the summary file contains no image"
if result:
if warning:
parse_summary_logger.warning(warning)
parse_summary_logger.info("parsing summary file finished")

except Exception as ex:
parse_summary_logger.error("Parse summary file failed, detail: %r", str(ex))
raise UnknownError(str(ex))
warning = ''
if not self._scalar_check:
warning = warning + " the summary file contains no scalar value."
if not self._image_check:
warning = warning + " the summary file contains no image."
if result:
if warning:
parse_summary_logger.warning(warning)
parse_summary_logger.info("Finished loading %s.", self.summary_file)

def _load(self, file_handler):
"""
@@ -154,6 +129,7 @@ class EventParser():
parse_summary_logger.error("Check header size and crc, record truncated at offset %s, file_path=%s.",
file_handler.offset, file_handler.file_path)
return None

if not crc32.CheckValueAgainstData(header_crc_str, header_str, HEADER_SIZE):
raise exceptions.CRCFailedError()

@@ -176,9 +152,9 @@ class EventParser():
raise exceptions.CRCFailedError()
self._current += HEADER_SIZE + 2 * CRC_STR_SIZE + event_len
if self._current >= self._process_info:
parse_summary_logger.info("current process: %d/%d, %d%%", self._current, self._file_size,
parse_summary_logger.info("Current parsing process: %d/%d, %d%%.", self._current, self._file_size,
100 * self._current // self._file_size)
self._process_info += self._file_size // 10
self._process_info += self._file_size // INFO_INTERVAL
return event_str

def _event_parse(self, event_str):
@@ -219,27 +195,3 @@ class EventParser():
self._image_writer.add((tag, step, value.image.encoded_image))
self._image_writer.write()
self._image_check = True

def _check_filepath(self):
"""Check file path existence, accessible and available"""
if os.path.exists(self._summary_dir):
if not os.path.isdir(self._summary_dir):
parse_summary_logger.error('Path of summary directory is not a valid directory.')
return False
if not os.access(self._summary_dir, os.R_OK | os.X_OK):
parse_summary_logger.error('Path %s is not accessible, please check the file-authority.',
self._summary_dir)
return True
parse_summary_logger.error('Path of summary directory not exists.')
return False

def _check_create_filepath(self, filepath):
"""Check file path existence, accessible and available, if not exist create the file"""
permissions = os.R_OK | os.W_OK | os.X_OK
os.umask(permissions << 3 | permissions)
if os.path.exists(filepath):
parse_summary_logger.error('Path %s has already existed, please choose a new output path.', filepath)
return False
mode = permissions << 6
os.makedirs(filepath, mode=mode)
return True

+ 2
- 4
mindinsight/datavisual/data_transform/summary_parser/image_writer.py View File

@@ -18,7 +18,7 @@ Image Writer.
This module write scalar into a csv file.
"""
import os
import re
from urllib.parse import quote

from mindinsight.datavisual.data_transform.summary_parser.writer import Writer

@@ -47,9 +47,7 @@ class ImageWriter(Writer):
def write(self):
"""Write file."""
for i in range(len(self._image_data)):
tag = self._image_data[i][0]
tag = tag.replace('/', '_')
tag = re.sub(r'[^a-zA-Z0-9_]+', '', tag)
tag = quote(self._image_data[i][0], safe="")
with os.fdopen(os.open("{}/{}_{}.png".format(self._file_path, tag, self._image_data[i][1]),
os.O_WRONLY | os.O_CREAT, 0o600), 'wb') as fp:
fp.write(self._image_data[i][2])


+ 2
- 1
mindinsight/datavisual/data_transform/summary_parser/scalar_writer.py View File

@@ -46,6 +46,7 @@ class ScalarWriter(Writer):

def write(self):
"""Write file."""
with os.fdopen(os.open('{}/scalar.csv'.format(self._file_path), os.O_WRONLY | os.O_CREAT, 0o600), 'w') as fp:
with os.fdopen(os.open('{}/scalar.csv'.format(self._file_path), os.O_WRONLY | os.O_CREAT, 0o600), 'w',
encoding='utf-8') as fp:
writer = csv.writer(fp, dialect='excel')
writer.writerows(self._scalar_data)

+ 109
- 21
mindinsight/scripts/parse_summary.py View File

@@ -19,12 +19,32 @@ import os
import datetime

from mindinsight.utils.command import BaseCommand
from mindinsight.utils.exceptions import UnknownError
from mindinsight.datavisual.common.log import parse_summary_logger
from mindinsight.datavisual.data_access.file_handler import FileHandler
from mindinsight.datavisual.data_transform.ms_data_loader import _SummaryParser
from mindinsight.datavisual.data_transform.summary_parser.event_parser import EventParser


class FilepathAction(argparse.Action):
class FileDirAction(argparse.Action):
"""File directory action class definition."""

@staticmethod
def check_path(file_path):
"""
Check argument for file path.

Args:
file_path (str): File path.
"""
if file_path.startswith('~'):
file_path = os.path.realpath(os.path.expanduser(file_path))

if not file_path.startswith('/'):
file_path = os.path.realpath(FileHandler.join(os.getcwd(), file_path))

return os.path.realpath(file_path)

def __call__(self, parser_in, namespace, values, option_string=None):
"""
Inherited __call__ method from argparse.Action.
@@ -35,17 +55,10 @@ class FilepathAction(argparse.Action):
values (object): Argument values with type depending on argument definition.
option_string (str): Optional string for specific argument name. Default: None.
"""
summary_dir = values
if summary_dir.startswith('~'):
summary_dir = os.path.realpath(os.path.expanduser(summary_dir))
summary_dir = self.check_path(values)

if not summary_dir.startswith('/'):
summary_dir = os.path.realpath(os.path.join(os.getcwd(), summary_dir))

summary_dir = os.path.realpath(summary_dir)
setattr(namespace, self.dest, summary_dir)


class OutputDirAction(argparse.Action):
"""File directory action class definition."""

@@ -59,14 +72,8 @@ class OutputDirAction(argparse.Action):
values (object): Argument values with type depending on argument definition.
option_string (str): Optional string for specific argument name. Default: None.
"""
output = values
if output.startswith('~'):
output = os.path.realpath(os.path.expanduser(output))
output = FileDirAction.check_path(values)

if not output.startswith('/'):
output = os.path.realpath(os.path.join(os.getcwd(), output))

output = os.path.realpath(output)
setattr(namespace, self.dest, output)


@@ -83,10 +90,11 @@ class Command(BaseCommand):
Args:
parser (ArgumentParser): Specify parser to which arguments are added.
"""

parser.add_argument(
'--summary-dir',
type=str,
action=FilepathAction,
action=FileDirAction,
default=os.path.realpath(os.getcwd()),
help="""
Optional, specify path for summary file directory.
@@ -110,7 +118,87 @@ class Command(BaseCommand):
Args:
args (Namespace): Parsed arguments to hold customized parameters.
"""
date_time = datetime.datetime.now().strftime('output_%Y%m%d_%H%M%S_%f')
date_time = os.path.join(args.output, date_time)
eventparser = EventParser(args.summary_dir, date_time)
eventparser.parse()
try:
date_time = datetime.datetime.now().strftime('output_%Y%m%d_%H%M%S_%f')
output_filename = os.path.join(args.output, date_time)


summary_dir = args.summary_dir
if not self._check_dirpath(summary_dir):
return

summary_parser = _SummaryParser(summary_dir)
summary_files = summary_parser.filter_files(os.listdir(summary_dir))

if not summary_files:
parse_summary_logger.error('Path %s has no summary file.', summary_dir)
return

summary_files = summary_parser.sort_files(summary_files)
filename = summary_files[-1]

summary_file = FileHandler.join(summary_dir, filename)

if not (self._check_filepath(summary_file) and self._check_create_filepath(
output_filename) and self._check_create_filepath(FileHandler.join(output_filename, 'image'))):
return

eventparser = EventParser(summary_file, output_filename)
eventparser.parse()

except Exception as ex:
parse_summary_logger.error("Parse summary file failed, detail: %r.", str(ex))
raise UnknownError(str(ex))

@staticmethod
def _check_filepath(filepath):
"""
Check file path existence, accessible and available

Args:
filepath (str): File path.
"""
if os.path.exists(filepath):
if not os.path.isfile(filepath):
parse_summary_logger.error('Summary file %s is not a valid file.', filepath)
return False
if not os.access(filepath, os.R_OK):
parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath)
return True
parse_summary_logger.error('Summary file %s not exists.', filepath)
return False

@staticmethod
def _check_dirpath(filepath):
"""
Check file path existence, accessible and available

Args:
filepath (str): File path.
"""
if os.path.exists(filepath):
if not os.path.isdir(filepath):
parse_summary_logger.error('Summary directory %s is not a valid directory.', filepath)
return False
if not os.access(filepath, os.R_OK | os.X_OK):
parse_summary_logger.error('Path %s is not accessible, please check the file-authority.', filepath)
return True
parse_summary_logger.error('Summary directory %s not exists.', filepath)
return False

@staticmethod
def _check_create_filepath(filepath):
"""
Check file path existence, accessible and available, if not exist create the file

Args:
filepath (str): File path.
"""
permissions = os.R_OK | os.W_OK | os.X_OK
os.umask(permissions << 3 | permissions)
if os.path.exists(filepath):
parse_summary_logger.error('Path %s has already existed, please choose a new output path.', filepath)
return False
mode = permissions << 6
os.makedirs(filepath, mode=mode)
return True

Loading…
Cancel
Save