# Copyright 2020 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """This module provides python APIs to get lineage summary from summary log.""" import struct from collections import namedtuple from enum import Enum from google.protobuf.json_format import MessageToDict from google.protobuf.message import DecodeError from mindinsight.datavisual.proto_files.mindinsight_lineage_pb2 import LineageEvent from mindinsight.datavisual.utils import crc32 from mindinsight.lineagemgr.common.exceptions.exceptions import MindInsightException, \ LineageVerificationException, LineageSummaryAnalyzeException from mindinsight.lineagemgr.common.log import logger as log from mindinsight.lineagemgr.common.validator.validate_path import safe_normalize_path from mindinsight.lineagemgr.summary.file_handler import FileHandler LineageInfo = namedtuple('LineageInfo', ['train_lineage', 'eval_lineage', 'dataset_graph']) class SummaryTag(Enum): """The tag value of lineage fields.""" # the value is `field_number << 3 | wire_type` WALL_TIME = 'wall_time' STEP = 'step' VERSION = 'version' GRAPH = 'graph' SUMMARY = 'summary' TRAIN_LINEAGE = 'train_lineage' EVAL_LINEAGE = 'evaluation_lineage' DATASET_GRAPH = 'dataset_graph' class SummaryAnalyzer: """ Summary log Analyzer. Args: file_path (str): The path of summary log. Raises: LineageVerificationException: Raise when verification failed. """ HEADER_SIZE = 8 HEADER_CRC_SIZE = 4 BODY_CRC_SIZE = 4 def __init__(self, file_path): self.file_handler = FileHandler(file_path) def load_events(self): """ Load events in summary log. Returns: generator, the event generator. """ while self._has_next(): yield self._read_event() def _has_next(self): """ Check if the file has reached the end. Returns: bool, whether the file has reached the end. """ current_offset = self.file_handler.tell() if current_offset < self.file_handler.size: return True return False def _read_event(self): """ Read event. Returns: LineageEvent, the event body. """ body_size = self._read_header() body_str = self._read_body(body_size) event = LineageEvent().FromString(body_str) return event def _read_header(self): """ Read header information. Returns: int, the length of event body. """ header_str = self.file_handler.read(self.HEADER_SIZE) header_crc_str = self.file_handler.read(self.HEADER_CRC_SIZE) SummaryAnalyzer._check_crc(header_str, header_crc_str) body_len = struct.unpack("