From: @jiang-shuqiang Reviewed-by: @wenkai_dist,@lilongfei15 Signed-off-by: @lilongfei15tags/v1.1.0
| @@ -204,3 +204,12 @@ class TrainJobDetailNotInCacheError(MindInsightException): | |||||
| super().__init__(DataVisualErrors.TRAIN_JOB_DETAIL_NOT_IN_CACHE, | super().__init__(DataVisualErrors.TRAIN_JOB_DETAIL_NOT_IN_CACHE, | ||||
| error_msg, | error_msg, | ||||
| http_code=400) | http_code=400) | ||||
| class TensorTooLargeError(MindInsightException): | |||||
| """The given tensor is too large to shown on UI.""" | |||||
| def __init__(self, error_detail): | |||||
| error_msg = f'Tensor is too large to show on UI. Detail: {error_detail}' | |||||
| super(TensorTooLargeError, self).__init__(DataVisualErrors.TENSOR_TOO_LARGE, | |||||
| error_msg, | |||||
| http_code=400) | |||||
| @@ -482,10 +482,9 @@ class _SummaryParser(_Parser): | |||||
| elif plugin == PluginNameEnum.TENSOR.value: | elif plugin == PluginNameEnum.TENSOR.value: | ||||
| tensor_event_value = TensorContainer(tensor_event_value) | tensor_event_value = TensorContainer(tensor_event_value) | ||||
| if tensor_event_value.size > MAX_TENSOR_COUNT: | |||||
| if tensor_event_value.error_code is not None: | |||||
| logger.warning('tag: %s/tensor, dims: %s, tensor count: %d exceeds %d and drop it.', | logger.warning('tag: %s/tensor, dims: %s, tensor count: %d exceeds %d and drop it.', | ||||
| value.tag, tensor_event_value.dims, tensor_event_value.size, MAX_TENSOR_COUNT) | value.tag, tensor_event_value.dims, tensor_event_value.size, MAX_TENSOR_COUNT) | ||||
| return None | |||||
| elif plugin == PluginNameEnum.IMAGE.value: | elif plugin == PluginNameEnum.IMAGE.value: | ||||
| tensor_event_value = ImageContainer(tensor_event_value) | tensor_event_value = ImageContainer(tensor_event_value) | ||||
| @@ -17,10 +17,12 @@ import numpy as np | |||||
| from mindinsight.datavisual.data_transform.histogram import Histogram, Bucket | from mindinsight.datavisual.data_transform.histogram import Histogram, Bucket | ||||
| from mindinsight.datavisual.utils.utils import calc_histogram_bins | from mindinsight.datavisual.utils.utils import calc_histogram_bins | ||||
| from mindinsight.datavisual.common.exceptions import TensorTooLargeError | |||||
| from mindinsight.utils.exceptions import ParamValueError | from mindinsight.utils.exceptions import ParamValueError | ||||
| from mindinsight.utils.tensor import TensorUtils | from mindinsight.utils.tensor import TensorUtils | ||||
| MAX_TENSOR_COUNT = 10000000 | MAX_TENSOR_COUNT = 10000000 | ||||
| TENSOR_TOO_LARGE_ERROR = TensorTooLargeError("").error_code | |||||
| def calc_original_buckets(np_value, stats): | def calc_original_buckets(np_value, stats): | ||||
| @@ -74,6 +76,10 @@ class TensorContainer: | |||||
| self._dims = tuple(tensor_message.dims) | self._dims = tuple(tensor_message.dims) | ||||
| self._data_type = tensor_message.data_type | self._data_type = tensor_message.data_type | ||||
| self._np_array = self.get_ndarray(tensor_message.float_data) | self._np_array = self.get_ndarray(tensor_message.float_data) | ||||
| self._error_code = None | |||||
| if self._np_array.size > MAX_TENSOR_COUNT: | |||||
| self._error_code = TENSOR_TOO_LARGE_ERROR | |||||
| self._np_array = np.array([]) | |||||
| self._stats = TensorUtils.get_statistics_from_tensor(self._np_array) | self._stats = TensorUtils.get_statistics_from_tensor(self._np_array) | ||||
| original_buckets = calc_original_buckets(self._np_array, self._stats) | original_buckets = calc_original_buckets(self._np_array, self._stats) | ||||
| self._count = sum(bucket.count for bucket in original_buckets) | self._count = sum(bucket.count for bucket in original_buckets) | ||||
| @@ -81,11 +87,17 @@ class TensorContainer: | |||||
| self._min = self._stats.min | self._min = self._stats.min | ||||
| self._histogram = Histogram(tuple(original_buckets), self._max, self._min, self._count) | self._histogram = Histogram(tuple(original_buckets), self._max, self._min, self._count) | ||||
| @property | @property | ||||
| def size(self): | def size(self): | ||||
| """Get size of tensor.""" | """Get size of tensor.""" | ||||
| return self._np_array.size | return self._np_array.size | ||||
| @property | |||||
| def error_code(self): | |||||
| """Get size of tensor.""" | |||||
| return self._error_code | |||||
| @property | @property | ||||
| def dims(self): | def dims(self): | ||||
| """Get dims of tensor.""" | """Get dims of tensor.""" | ||||
| @@ -128,6 +140,8 @@ class TensorContainer: | |||||
| def buckets(self): | def buckets(self): | ||||
| """Get histogram buckets.""" | """Get histogram buckets.""" | ||||
| if self._histogram is None: | |||||
| return None | |||||
| return self._histogram.buckets() | return self._histogram.buckets() | ||||
| def get_ndarray(self, tensor): | def get_ndarray(self, tensor): | ||||
| @@ -23,7 +23,7 @@ from mindinsight.utils.tensor import TensorUtils, MAX_DIMENSIONS_FOR_TENSOR | |||||
| from mindinsight.conf.constants import MAX_TENSOR_RESPONSE_DATA_SIZE | from mindinsight.conf.constants import MAX_TENSOR_RESPONSE_DATA_SIZE | ||||
| from mindinsight.datavisual.common.validation import Validation | from mindinsight.datavisual.common.validation import Validation | ||||
| from mindinsight.datavisual.common.exceptions import StepTensorDataNotInCacheError, TensorNotExistError | from mindinsight.datavisual.common.exceptions import StepTensorDataNotInCacheError, TensorNotExistError | ||||
| from mindinsight.datavisual.common.exceptions import ResponseDataExceedMaxValueError | |||||
| from mindinsight.datavisual.common.exceptions import ResponseDataExceedMaxValueError, TensorTooLargeError | |||||
| from mindinsight.datavisual.data_transform.tensor_container import TensorContainer | from mindinsight.datavisual.data_transform.tensor_container import TensorContainer | ||||
| from mindinsight.datavisual.processors.base_processor import BaseProcessor | from mindinsight.datavisual.processors.base_processor import BaseProcessor | ||||
| from mindinsight.datavisual.proto_files import mindinsight_anf_ir_pb2 as anf_ir_pb2 | from mindinsight.datavisual.proto_files import mindinsight_anf_ir_pb2 as anf_ir_pb2 | ||||
| @@ -153,7 +153,9 @@ class TensorProcessor(BaseProcessor): | |||||
| "data_type": anf_ir_pb2.DataType.Name(value.data_type) | "data_type": anf_ir_pb2.DataType.Name(value.data_type) | ||||
| } | } | ||||
| if detail and detail == 'stats': | if detail and detail == 'stats': | ||||
| stats = TensorUtils.get_statistics_dict(stats=value.stats, overall_stats=value.stats) | |||||
| stats = None | |||||
| if value.error_code is None: | |||||
| stats = TensorUtils.get_statistics_dict(stats=value.stats, overall_stats=value.stats) | |||||
| value_dict.update({"statistics": stats}) | value_dict.update({"statistics": stats}) | ||||
| values.append({ | values.append({ | ||||
| @@ -206,6 +208,8 @@ class TensorProcessor(BaseProcessor): | |||||
| if step != tensor.step: | if step != tensor.step: | ||||
| continue | continue | ||||
| step_in_cache = True | step_in_cache = True | ||||
| if value.error_code is not None: | |||||
| raise TensorTooLargeError("Step: {}".format(tensor.step)) | |||||
| res_data = TensorUtils.get_specific_dims_data(value.ndarray, dims) | res_data = TensorUtils.get_specific_dims_data(value.ndarray, dims) | ||||
| flatten_data = res_data.flatten().tolist() | flatten_data = res_data.flatten().tolist() | ||||
| if len(flatten_data) > MAX_TENSOR_RESPONSE_DATA_SIZE: | if len(flatten_data) > MAX_TENSOR_RESPONSE_DATA_SIZE: | ||||
| @@ -285,6 +289,8 @@ class TensorProcessor(BaseProcessor): | |||||
| for tensor in tensors: | for tensor in tensors: | ||||
| # This value is an instance of TensorContainer | # This value is an instance of TensorContainer | ||||
| value = tensor.value | value = tensor.value | ||||
| if value.error_code is not None: | |||||
| raise TensorTooLargeError("Step: {}".format(tensor.step)) | |||||
| buckets = value.buckets() | buckets = value.buckets() | ||||
| values.append({ | values.append({ | ||||
| "wall_time": tensor.wall_time, | "wall_time": tensor.wall_time, | ||||
| @@ -83,6 +83,7 @@ class DataVisualErrors(Enum): | |||||
| MAX_RESPONSE_DATA_EXCEEDED_ERROR = 19 | MAX_RESPONSE_DATA_EXCEEDED_ERROR = 19 | ||||
| STEP_TENSOR_DATA_NOT_IN_CACHE = 20 | STEP_TENSOR_DATA_NOT_IN_CACHE = 20 | ||||
| CRC_LENGTH_FAILED = 21 | CRC_LENGTH_FAILED = 21 | ||||
| TENSOR_TOO_LARGE = 22 | |||||
| class ScriptConverterErrors(Enum): | class ScriptConverterErrors(Enum): | ||||
| @@ -216,6 +216,8 @@ class TensorUtils: | |||||
| ParamValueError, If the length of param dims is not equal to the length of tensor dims. | ParamValueError, If the length of param dims is not equal to the length of tensor dims. | ||||
| IndexError, If the param dims and tensor shape is unmatched. | IndexError, If the param dims and tensor shape is unmatched. | ||||
| """ | """ | ||||
| if ndarray.size == 0: | |||||
| return ndarray | |||||
| if len(ndarray.shape) != len(dims): | if len(ndarray.shape) != len(dims): | ||||
| raise ParamValueError("Invalid dims. The length of param dims and tensor shape should be the same.") | raise ParamValueError("Invalid dims. The length of param dims and tensor shape should be the same.") | ||||
| try: | try: | ||||