Merge pull request !48 from luopengting/lineage_lpttags/v0.2.0-alpha
| @@ -95,14 +95,15 @@ def filter_summary_lineage(summary_base_dir, search_condition=None): | |||
| Users can filter and sort all lineage information according to the search | |||
| condition. The supported filter fields include `summary_dir`, `network`, | |||
| etc. The filter conditions include `eq`, `lt`, `gt`, `le`, `ge` and `in`. | |||
| At the same time, the combined use of these fields and conditions is | |||
| supported. If you want to sort based on filter fields, the field of | |||
| `sorted_name` and `sorted_type` should be specified. | |||
| If the value type of filter condition is `str`, such as summary_dir and | |||
| lineage_type, then its key can only be `in` and `eq`. At the same time, | |||
| the combined use of these fields and conditions is supported. If you want | |||
| to sort based on filter fields, the field of `sorted_name` and `sorted_type` | |||
| should be specified. | |||
| Users can use `lineage_type` to decide what kind of lineage information to | |||
| query. If the `lineage_type` is `dataset`, the query result is only the | |||
| lineage information related to data augmentation. If the `lineage_type` is | |||
| `model` or `None`, the query result is all lineage information. | |||
| query. If the `lineage_type` is not defined, the query result is all lineage | |||
| information. | |||
| Users can paginate query result based on `offset` and `limit`. The `offset` | |||
| refers to page number. The `limit` refers to the number in one page. | |||
| @@ -147,6 +148,15 @@ def filter_summary_lineage(summary_base_dir, search_condition=None): | |||
| - dataset_mark (dict): The filter condition of dataset mark. | |||
| - lineage_type (dict): The filter condition of lineage type. It decides | |||
| what kind of lineage information to query. Its value can be `dataset` | |||
| or `model`, e.g., {'in': ['dataset', 'model']}, {'eq': 'model'}, etc. | |||
| If its values contain `dataset`, the query result will contain the | |||
| lineage information related to data augmentation. If its values contain | |||
| `model`, the query result will contain model lineage information. | |||
| If it is not defined or it is a dict like {'in': ['dataset', 'model']}, | |||
| the query result is all lineage information. | |||
| - offset (int): Page number, the value range is [0, 100000]. | |||
| - limit (int): The number in one page, the value range is [1, 100]. | |||
| @@ -156,14 +166,8 @@ def filter_summary_lineage(summary_base_dir, search_condition=None): | |||
| - sorted_type (str): Specify sort order. It can be `ascending` or | |||
| `descending`. | |||
| - lineage_type (str): It decides what kind of lineage information to | |||
| query. It can be `dataset` or `model`. If it is `dataset`, | |||
| the query result is only the lineage information related to data | |||
| augmentation. If it is `model` or `None`, the query result is all | |||
| lineage information. | |||
| Returns: | |||
| dict, all lineage information under summary base directory according to | |||
| dict, lineage information under summary base directory according to | |||
| search condition. | |||
| Raises: | |||
| @@ -196,7 +200,9 @@ def filter_summary_lineage(summary_base_dir, search_condition=None): | |||
| >>> 'sorted_type': 'descending', | |||
| >>> 'limit': 3, | |||
| >>> 'offset': 0, | |||
| >>> 'lineage_type': 'model' | |||
| >>> 'lineage_type': { | |||
| >>> 'eq': 'model' | |||
| >>> } | |||
| >>> } | |||
| >>> summary_lineage = filter_summary_lineage(summary_base_dir) | |||
| >>> summary_lineage_filter = filter_summary_lineage(summary_base_dir, search_condition) | |||
| @@ -83,6 +83,8 @@ class LineageErrors(LineageErrorCodes): | |||
| LINEAGE_SEARCH_CONDITION_PARAM_ERROR = 24 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| LINEAGE_PARAM_LINEAGE_TYPE_ERROR = 25 | _MODEL_LINEAGE_API_ERROR_MASK | |||
| # Dataset lineage error codes. | |||
| LINEAGE_PARAM_DATASET_MARK_ERROR = 0 | _DATASET_LINEAGE_ERROR_MASK | |||
| SUMMARY_ANALYZE_ERROR = 0 | _SUMMARY_ANALYZE_ERROR_MASK | |||
| SUMMARY_VERIFICATION_ERROR = 1 | _SUMMARY_ANALYZE_ERROR_MASK | |||
| @@ -156,26 +158,27 @@ class LineageErrorMsg(Enum): | |||
| " 'eq', 'lt', 'gt', 'ge', 'le', 'in'." | |||
| LINEAGE_PARAM_SUMMARY_DIR_ERROR = "The parameter summary_dir is invalid. It should be a dict and the value " \ | |||
| "should be a string" | |||
| "should be a string." | |||
| LINEAGE_TRAIN_DATASET_PATH_ERROR = "The parameter train_dataset_path is invalid." \ | |||
| " It should be a dict and the value should be a string" | |||
| " It should be a dict and the value should be a string." | |||
| LINEAGE_TRAIN_DATASET_COUNT_ERROR = "The parameter train_dataset_count is invalid. It should be a dict " \ | |||
| "and the value should be a integer between 0 and pow(2, 63) -1" | |||
| "and the value should be a integer between 0 and pow(2, 63) -1." | |||
| LINEAGE_TEST_DATASET_PATH_ERROR = "The parameter test_dataset_path is invalid. " \ | |||
| "It should be a dict and the value should be a string" | |||
| "It should be a dict and the value should be a string." | |||
| LINEAGE_TEST_DATASET_COUNT_ERROR = "The parameter test_dataset_count is invalid. It should be a dict " \ | |||
| "and the value should be a integer between 0 and pow(2, 63) -1" | |||
| "and the value should be a integer between 0 and pow(2, 63) -1." | |||
| LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string" | |||
| LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string." | |||
| LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. It should be a dict and the value should be a string" | |||
| LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. " \ | |||
| "It should be a dict and the value should be a string." | |||
| LINEAGE_LOSS_FUNCTION_ERROR = "The parameter loss_function is invalid. " \ | |||
| "It should be a dict and the value should be a string" | |||
| "It should be a dict and the value should be a string." | |||
| LINEAGE_LOSS_ERROR = "The parameter loss is invalid. " \ | |||
| "It should be a float." | |||
| @@ -184,7 +187,7 @@ class LineageErrorMsg(Enum): | |||
| "It should be an integer between 0 and pow(2, 63) -1." | |||
| LINEAGE_LEARNING_RATE_ERROR = "The parameter learning_rate is invalid. " \ | |||
| "It should be a dict and the value should be a float or a integer" | |||
| "It should be a dict and the value should be a float or a integer." | |||
| LINEAGE_PARAM_SORTED_NAME_ERROR = "The parameter sorted_name is invalid. " \ | |||
| "It should be a string." | |||
| @@ -195,6 +198,9 @@ class LineageErrorMsg(Enum): | |||
| LINEAGE_PARAM_LINEAGE_TYPE_ERROR = "The parameter lineage_type is invalid. " \ | |||
| "It should be 'dataset' or 'model'." | |||
| LINEAGE_PARAM_DATASET_MARK_ERROR = "The parameter dataset_mark is invalid. " \ | |||
| "It should be a string." | |||
| SUMMARY_ANALYZE_ERROR = "Failed to analyze summary log. {}" | |||
| SUMMARY_VERIFICATION_ERROR = "Verification failed in summary analysis. {}" | |||
| @@ -129,6 +129,7 @@ class SearchModelConditionParameter(Schema): | |||
| offset = fields.Int(validate=lambda n: 0 <= n <= 100000) | |||
| sorted_name = fields.Str() | |||
| sorted_type = fields.Str(allow_none=True) | |||
| dataset_mark = fields.Dict() | |||
| lineage_type = fields.Dict() | |||
| @staticmethod | |||
| @@ -137,7 +138,7 @@ class SearchModelConditionParameter(Schema): | |||
| for key, value in data.items(): | |||
| if key == "in": | |||
| if not isinstance(value, (list, tuple)): | |||
| raise ValidationError("In operation's value must be list or tuple.") | |||
| raise ValidationError("The value of `in` operation must be list or tuple.") | |||
| else: | |||
| if not isinstance(value, value_type): | |||
| raise ValidationError("Wrong value type.") | |||
| @@ -153,12 +154,20 @@ class SearchModelConditionParameter(Schema): | |||
| for key, value in data.items(): | |||
| if key == "in": | |||
| if not isinstance(value, (list, tuple)): | |||
| raise ValidationError("In operation's value must be list or tuple.") | |||
| raise ValidationError("The value of `in` operation must be list or tuple.") | |||
| else: | |||
| if isinstance(value, bool) or \ | |||
| (not isinstance(value, float) and not isinstance(value, int)): | |||
| raise ValidationError("Wrong value type.") | |||
| @staticmethod | |||
| def check_operation(data): | |||
| """Check input param's compare operation.""" | |||
| if not set(data.keys()).issubset(['in', 'eq']): | |||
| raise ValidationError("Its operation should be `in` or `eq`.") | |||
| if len(data.keys()) > 1: | |||
| raise ValidationError("More than one operation.") | |||
| @validates("loss") | |||
| def check_loss(self, data): | |||
| """Check loss.""" | |||
| @@ -172,11 +181,13 @@ class SearchModelConditionParameter(Schema): | |||
| @validates("loss_function") | |||
| def check_loss_function(self, data): | |||
| """Check loss function.""" | |||
| SearchModelConditionParameter.check_operation(data) | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @validates("train_dataset_path") | |||
| def check_train_dataset_path(self, data): | |||
| """Check train dataset path.""" | |||
| SearchModelConditionParameter.check_operation(data) | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @validates("train_dataset_count") | |||
| @@ -187,6 +198,7 @@ class SearchModelConditionParameter(Schema): | |||
| @validates("test_dataset_path") | |||
| def check_test_dataset_path(self, data): | |||
| """Check test dataset path.""" | |||
| SearchModelConditionParameter.check_operation(data) | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @validates("test_dataset_count") | |||
| @@ -197,11 +209,13 @@ class SearchModelConditionParameter(Schema): | |||
| @validates("network") | |||
| def check_network(self, data): | |||
| """Check network.""" | |||
| SearchModelConditionParameter.check_operation(data) | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @validates("optimizer") | |||
| def check_optimizer(self, data): | |||
| """Check optimizer.""" | |||
| SearchModelConditionParameter.check_operation(data) | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @validates("epoch") | |||
| @@ -222,11 +236,19 @@ class SearchModelConditionParameter(Schema): | |||
| @validates("summary_dir") | |||
| def check_summary_dir(self, data): | |||
| """Check summary dir.""" | |||
| SearchModelConditionParameter.check_operation(data) | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @validates("dataset_mark") | |||
| def check_dataset_mark(self, data): | |||
| """Check dataset mark.""" | |||
| SearchModelConditionParameter.check_operation(data) | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| @validates("lineage_type") | |||
| def check_lineage_type(self, data): | |||
| """Check lineage type.""" | |||
| SearchModelConditionParameter.check_operation(data) | |||
| SearchModelConditionParameter.check_dict_value_type(data, str) | |||
| recv_types = [] | |||
| for key, value in data.items(): | |||
| @@ -243,7 +265,7 @@ class SearchModelConditionParameter(Schema): | |||
| def check_comparision(self, data, **kwargs): | |||
| """Check comparision for all parameters in schema.""" | |||
| for attr, condition in data.items(): | |||
| if attr in ["limit", "offset", "sorted_name", "sorted_type"]: | |||
| if attr in ["limit", "offset", "sorted_name", "sorted_type", 'lineage_type']: | |||
| continue | |||
| if not isinstance(attr, str): | |||
| @@ -256,13 +278,6 @@ class SearchModelConditionParameter(Schema): | |||
| raise LineageParamTypeError("The search_condition element {} should be dict." | |||
| .format(attr)) | |||
| if attr in ["summary_dir", "lineage_type"]: | |||
| if not set(condition.keys()).issubset(['in', 'eq']): | |||
| raise LineageParamValueError("Invalid operation of %s." % attr) | |||
| if len(condition.keys()) > 1: | |||
| raise LineageParamValueError("More than one operation of %s." % attr) | |||
| continue | |||
| for key in condition.keys(): | |||
| if key not in ["eq", "lt", "gt", "le", "ge", "in"]: | |||
| raise LineageParamValueError("The compare condition should be in " | |||
| @@ -63,6 +63,7 @@ SEARCH_MODEL_ERROR_MAPPING = { | |||
| 'model_size': LineageErrors.LINEAGE_PARAM_MODEL_SIZE_ERROR, | |||
| 'sorted_name': LineageErrors.LINEAGE_PARAM_SORTED_NAME_ERROR, | |||
| 'sorted_type': LineageErrors.LINEAGE_PARAM_SORTED_TYPE_ERROR, | |||
| 'dataset_mark': LineageErrors.LINEAGE_PARAM_DATASET_MARK_ERROR, | |||
| 'lineage_type': LineageErrors.LINEAGE_PARAM_LINEAGE_TYPE_ERROR | |||
| } | |||
| @@ -97,6 +98,7 @@ SEARCH_MODEL_ERROR_MSG_MAPPING = { | |||
| 'model_size': LineageErrorMsg.LINEAGE_MODEL_SIZE_ERROR.value, | |||
| 'sorted_name': LineageErrorMsg.LINEAGE_PARAM_SORTED_NAME_ERROR.value, | |||
| 'sorted_type': LineageErrorMsg.LINEAGE_PARAM_SORTED_TYPE_ERROR.value, | |||
| 'dataset_mark': LineageErrorMsg.LINEAGE_PARAM_DATASET_MARK_ERROR.value, | |||
| 'lineage_type': LineageErrorMsg.LINEAGE_PARAM_LINEAGE_TYPE_ERROR.value | |||
| } | |||
| @@ -238,10 +240,14 @@ def validate_search_model_condition(schema, data): | |||
| MindInsightException: If the parameters are invalid. | |||
| """ | |||
| error = schema().validate(data) | |||
| for error_key in error.keys(): | |||
| for (error_key, error_msgs) in error.items(): | |||
| if error_key in SEARCH_MODEL_ERROR_MAPPING.keys(): | |||
| error_code = SEARCH_MODEL_ERROR_MAPPING.get(error_key) | |||
| error_msg = SEARCH_MODEL_ERROR_MSG_MAPPING.get(error_key) | |||
| for err_msg in error_msgs: | |||
| if 'operation' in err_msg.lower(): | |||
| error_msg = f'The parameter {error_key} is invalid. {err_msg}' | |||
| break | |||
| log.error(error_msg) | |||
| raise MindInsightException(error=error_code, message=error_msg) | |||
| @@ -417,7 +423,7 @@ def validate_user_defined_info(user_defined_info): | |||
| "Only str is permitted now.".format(type(key)) | |||
| log.error(error_msg) | |||
| raise LineageParamTypeError(error_msg) | |||
| if not isinstance(key, (int, str, float)): | |||
| if not isinstance(value, (int, str, float)): | |||
| error_msg = "Dict value type {} is not supported in user defined info." \ | |||
| "Only str, int and float are permitted now.".format(type(value)) | |||
| log.error(error_msg) | |||
| @@ -318,13 +318,14 @@ class Querier: | |||
| for offset_result in offset_results: | |||
| for obj_name in ["metric", "user_defined"]: | |||
| obj = getattr(offset_result, obj_name) | |||
| require = True if obj_name == "metric" else False | |||
| if obj and isinstance(obj, dict): | |||
| for key, value in obj.items(): | |||
| label = obj_name + "/" + key | |||
| label = f'{obj_name}/{key}' | |||
| customized[label] = dict() | |||
| customized[label]["label"] = label | |||
| # user defined info is default displayed | |||
| customized[label]["required"] = True | |||
| # user defined info is not displayed by default | |||
| customized[label]["required"] = require | |||
| customized[label]["type"] = type(value).__name__ | |||
| lineage_types = condition.get(ConditionParam.LINEAGE_TYPE.value) | |||
| @@ -37,8 +37,7 @@ FIELD_MAPPING = { | |||
| "batch_size": Field('hyper_parameters', 'batch_size'), | |||
| "loss": Field('algorithm', 'loss'), | |||
| "model_size": Field('model', 'size'), | |||
| "dataset_mark": Field('dataset_mark', None), | |||
| "lineage_type": Field(None, None) | |||
| "dataset_mark": Field('dataset_mark', None) | |||
| } | |||
| @@ -755,7 +755,7 @@ class TestModelApi(TestCase): | |||
| @pytest.mark.env_single | |||
| def test_filter_summary_lineage_exception_7(self): | |||
| """Test the abnormal execution of the filter_summary_lineage interface.""" | |||
| condition_keys = ["summary_dir", "lineage_type"] | |||
| condition_keys = ["summary_dir", "lineage_type", "loss_function", "optimizer", "network", "dataset_mark"] | |||
| for condition_key in condition_keys: | |||
| # the condition type not supported in summary_dir and lineage_type | |||
| search_condition = { | |||
| @@ -765,7 +765,7 @@ class TestModelApi(TestCase): | |||
| } | |||
| self.assertRaisesRegex( | |||
| LineageSearchConditionParamError, | |||
| f'Invalid operation of {condition_key}.', | |||
| f'The parameter {condition_key} is invalid. Its operation should be `in` or `eq`.', | |||
| filter_summary_lineage, | |||
| BASE_SUMMARY_DIR, | |||
| search_condition | |||
| @@ -780,7 +780,7 @@ class TestModelApi(TestCase): | |||
| } | |||
| self.assertRaisesRegex( | |||
| LineageSearchConditionParamError, | |||
| f'More than one operation of {condition_key}.', | |||
| f'The parameter {condition_key} is invalid. More than one operation.', | |||
| filter_summary_lineage, | |||
| BASE_SUMMARY_DIR, | |||
| search_condition | |||
| @@ -793,11 +793,12 @@ class TestModelApi(TestCase): | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_single | |||
| def test_filter_summary_lineage_exception_8(self): | |||
| """Test the abnormal execution of the filter_summary_lineage interface.""" | |||
| invalid_lineage_types = ['xxx', None] | |||
| for lineage_type in invalid_lineage_types: | |||
| search_condition = { | |||
| 'lineage_type': { | |||
| 'in': lineage_type | |||
| 'eq': lineage_type | |||
| } | |||
| } | |||
| self.assertRaisesRegex( | |||
| @@ -815,6 +816,7 @@ class TestModelApi(TestCase): | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.env_single | |||
| def test_filter_summary_lineage_exception_9(self): | |||
| """Test the abnormal execution of the filter_summary_lineage interface.""" | |||
| invalid_sorted_names = ['xxx', 'metric_', 1] | |||
| for sorted_name in invalid_sorted_names: | |||
| search_condition = { | |||
| @@ -82,7 +82,7 @@ class TestMsDataLoader: | |||
| ms_loader = MSDataLoader(summary_dir) | |||
| ms_loader._latest_summary_filename = 'summary.00' | |||
| ms_loader.load() | |||
| assert ms_loader._latest_summary_filename == 'summary.01' | |||
| shutil.rmtree(summary_dir) | |||
| assert ms_loader._latest_summary_file_size == RECORD_LEN | |||
| tag = ms_loader.get_events_data().list_tags_by_plugin('scalar') | |||
| tensors = ms_loader.get_events_data().tensors(tag[0]) | |||
| @@ -101,8 +101,7 @@ class TestValidateSearchModelCondition(TestCase): | |||
| } | |||
| } | |||
| self._assert_raise_of_mindinsight_exception( | |||
| "The parameter learning_rate is invalid. It should be a dict and " | |||
| "the value should be a float or a integer", | |||
| "The value of `in` operation must be list or tuple.", | |||
| condition | |||
| ) | |||
| @@ -136,8 +135,8 @@ class TestValidateSearchModelCondition(TestCase): | |||
| } | |||
| } | |||
| self._assert_raise_of_mindinsight_exception( | |||
| "The parameter loss_function is invalid. It should be a dict and " | |||
| "the value should be a string", | |||
| "The parameter loss_function is invalid. " | |||
| "Its operation should be `in` or `eq`.", | |||
| condition | |||
| ) | |||
| @@ -147,8 +146,7 @@ class TestValidateSearchModelCondition(TestCase): | |||
| } | |||
| } | |||
| self._assert_raise_of_mindinsight_exception( | |||
| "The parameter train_dataset_count is invalid. It should be a dict " | |||
| "and the value should be a integer between 0", | |||
| "The value of `in` operation must be list or tuple.", | |||
| condition | |||
| ) | |||
| @@ -161,8 +159,8 @@ class TestValidateSearchModelCondition(TestCase): | |||
| } | |||
| } | |||
| self._assert_raise_of_mindinsight_exception( | |||
| "The parameter network is invalid. It should be a dict and " | |||
| "the value should be a string", | |||
| "The parameter network is invalid. " | |||
| "Its operation should be `in` or `eq`.", | |||
| condition | |||
| ) | |||