From 670bc720175f06e667399f72c7ee95da34db26b3 Mon Sep 17 00:00:00 2001 From: luopengting Date: Mon, 20 Apr 2020 20:26:35 +0800 Subject: [PATCH] enhance validation for cmp operation, set user-difined' required value as False --- mindinsight/lineagemgr/api/model.py | 34 ++++++++++-------- .../common/exceptions/error_code.py | 24 ++++++++----- .../common/validator/model_parameter.py | 35 +++++++++++++------ .../lineagemgr/common/validator/validate.py | 10 ++++-- mindinsight/lineagemgr/querier/querier.py | 7 ++-- mindinsight/lineagemgr/querier/query_model.py | 3 +- .../st/func/lineagemgr/api/test_model_api.py | 10 +++--- .../data_transform/test_ms_data_loader.py | 2 +- .../common/validator/test_validate.py | 14 ++++---- 9 files changed, 86 insertions(+), 53 deletions(-) diff --git a/mindinsight/lineagemgr/api/model.py b/mindinsight/lineagemgr/api/model.py index 62dd7b8a..ee3e129a 100644 --- a/mindinsight/lineagemgr/api/model.py +++ b/mindinsight/lineagemgr/api/model.py @@ -95,14 +95,15 @@ def filter_summary_lineage(summary_base_dir, search_condition=None): Users can filter and sort all lineage information according to the search condition. The supported filter fields include `summary_dir`, `network`, etc. The filter conditions include `eq`, `lt`, `gt`, `le`, `ge` and `in`. - At the same time, the combined use of these fields and conditions is - supported. If you want to sort based on filter fields, the field of - `sorted_name` and `sorted_type` should be specified. + If the value type of filter condition is `str`, such as summary_dir and + lineage_type, then its key can only be `in` and `eq`. At the same time, + the combined use of these fields and conditions is supported. If you want + to sort based on filter fields, the field of `sorted_name` and `sorted_type` + should be specified. Users can use `lineage_type` to decide what kind of lineage information to - query. If the `lineage_type` is `dataset`, the query result is only the - lineage information related to data augmentation. If the `lineage_type` is - `model` or `None`, the query result is all lineage information. + query. If the `lineage_type` is not defined, the query result is all lineage + information. Users can paginate query result based on `offset` and `limit`. The `offset` refers to page number. The `limit` refers to the number in one page. @@ -147,6 +148,15 @@ def filter_summary_lineage(summary_base_dir, search_condition=None): - dataset_mark (dict): The filter condition of dataset mark. + - lineage_type (dict): The filter condition of lineage type. It decides + what kind of lineage information to query. Its value can be `dataset` + or `model`, e.g., {'in': ['dataset', 'model']}, {'eq': 'model'}, etc. + If its values contain `dataset`, the query result will contain the + lineage information related to data augmentation. If its values contain + `model`, the query result will contain model lineage information. + If it is not defined or it is a dict like {'in': ['dataset', 'model']}, + the query result is all lineage information. + - offset (int): Page number, the value range is [0, 100000]. - limit (int): The number in one page, the value range is [1, 100]. @@ -156,14 +166,8 @@ def filter_summary_lineage(summary_base_dir, search_condition=None): - sorted_type (str): Specify sort order. It can be `ascending` or `descending`. - - lineage_type (str): It decides what kind of lineage information to - query. It can be `dataset` or `model`. If it is `dataset`, - the query result is only the lineage information related to data - augmentation. If it is `model` or `None`, the query result is all - lineage information. - Returns: - dict, all lineage information under summary base directory according to + dict, lineage information under summary base directory according to search condition. Raises: @@ -196,7 +200,9 @@ def filter_summary_lineage(summary_base_dir, search_condition=None): >>> 'sorted_type': 'descending', >>> 'limit': 3, >>> 'offset': 0, - >>> 'lineage_type': 'model' + >>> 'lineage_type': { + >>> 'eq': 'model' + >>> } >>> } >>> summary_lineage = filter_summary_lineage(summary_base_dir) >>> summary_lineage_filter = filter_summary_lineage(summary_base_dir, search_condition) diff --git a/mindinsight/lineagemgr/common/exceptions/error_code.py b/mindinsight/lineagemgr/common/exceptions/error_code.py index 0620112b..f756540b 100644 --- a/mindinsight/lineagemgr/common/exceptions/error_code.py +++ b/mindinsight/lineagemgr/common/exceptions/error_code.py @@ -83,6 +83,8 @@ class LineageErrors(LineageErrorCodes): LINEAGE_SEARCH_CONDITION_PARAM_ERROR = 24 | _MODEL_LINEAGE_API_ERROR_MASK LINEAGE_PARAM_LINEAGE_TYPE_ERROR = 25 | _MODEL_LINEAGE_API_ERROR_MASK + # Dataset lineage error codes. + LINEAGE_PARAM_DATASET_MARK_ERROR = 0 | _DATASET_LINEAGE_ERROR_MASK SUMMARY_ANALYZE_ERROR = 0 | _SUMMARY_ANALYZE_ERROR_MASK SUMMARY_VERIFICATION_ERROR = 1 | _SUMMARY_ANALYZE_ERROR_MASK @@ -156,26 +158,27 @@ class LineageErrorMsg(Enum): " 'eq', 'lt', 'gt', 'ge', 'le', 'in'." LINEAGE_PARAM_SUMMARY_DIR_ERROR = "The parameter summary_dir is invalid. It should be a dict and the value " \ - "should be a string" + "should be a string." LINEAGE_TRAIN_DATASET_PATH_ERROR = "The parameter train_dataset_path is invalid." \ - " It should be a dict and the value should be a string" + " It should be a dict and the value should be a string." LINEAGE_TRAIN_DATASET_COUNT_ERROR = "The parameter train_dataset_count is invalid. It should be a dict " \ - "and the value should be a integer between 0 and pow(2, 63) -1" + "and the value should be a integer between 0 and pow(2, 63) -1." LINEAGE_TEST_DATASET_PATH_ERROR = "The parameter test_dataset_path is invalid. " \ - "It should be a dict and the value should be a string" + "It should be a dict and the value should be a string." LINEAGE_TEST_DATASET_COUNT_ERROR = "The parameter test_dataset_count is invalid. It should be a dict " \ - "and the value should be a integer between 0 and pow(2, 63) -1" + "and the value should be a integer between 0 and pow(2, 63) -1." - LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string" + LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string." - LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. It should be a dict and the value should be a string" + LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. " \ + "It should be a dict and the value should be a string." LINEAGE_LOSS_FUNCTION_ERROR = "The parameter loss_function is invalid. " \ - "It should be a dict and the value should be a string" + "It should be a dict and the value should be a string." LINEAGE_LOSS_ERROR = "The parameter loss is invalid. " \ "It should be a float." @@ -184,7 +187,7 @@ class LineageErrorMsg(Enum): "It should be an integer between 0 and pow(2, 63) -1." LINEAGE_LEARNING_RATE_ERROR = "The parameter learning_rate is invalid. " \ - "It should be a dict and the value should be a float or a integer" + "It should be a dict and the value should be a float or a integer." LINEAGE_PARAM_SORTED_NAME_ERROR = "The parameter sorted_name is invalid. " \ "It should be a string." @@ -195,6 +198,9 @@ class LineageErrorMsg(Enum): LINEAGE_PARAM_LINEAGE_TYPE_ERROR = "The parameter lineage_type is invalid. " \ "It should be 'dataset' or 'model'." + LINEAGE_PARAM_DATASET_MARK_ERROR = "The parameter dataset_mark is invalid. " \ + "It should be a string." + SUMMARY_ANALYZE_ERROR = "Failed to analyze summary log. {}" SUMMARY_VERIFICATION_ERROR = "Verification failed in summary analysis. {}" diff --git a/mindinsight/lineagemgr/common/validator/model_parameter.py b/mindinsight/lineagemgr/common/validator/model_parameter.py index b5fc0cbc..ad768694 100644 --- a/mindinsight/lineagemgr/common/validator/model_parameter.py +++ b/mindinsight/lineagemgr/common/validator/model_parameter.py @@ -129,6 +129,7 @@ class SearchModelConditionParameter(Schema): offset = fields.Int(validate=lambda n: 0 <= n <= 100000) sorted_name = fields.Str() sorted_type = fields.Str(allow_none=True) + dataset_mark = fields.Dict() lineage_type = fields.Dict() @staticmethod @@ -137,7 +138,7 @@ class SearchModelConditionParameter(Schema): for key, value in data.items(): if key == "in": if not isinstance(value, (list, tuple)): - raise ValidationError("In operation's value must be list or tuple.") + raise ValidationError("The value of `in` operation must be list or tuple.") else: if not isinstance(value, value_type): raise ValidationError("Wrong value type.") @@ -153,12 +154,20 @@ class SearchModelConditionParameter(Schema): for key, value in data.items(): if key == "in": if not isinstance(value, (list, tuple)): - raise ValidationError("In operation's value must be list or tuple.") + raise ValidationError("The value of `in` operation must be list or tuple.") else: if isinstance(value, bool) or \ (not isinstance(value, float) and not isinstance(value, int)): raise ValidationError("Wrong value type.") + @staticmethod + def check_operation(data): + """Check input param's compare operation.""" + if not set(data.keys()).issubset(['in', 'eq']): + raise ValidationError("Its operation should be `in` or `eq`.") + if len(data.keys()) > 1: + raise ValidationError("More than one operation.") + @validates("loss") def check_loss(self, data): """Check loss.""" @@ -172,11 +181,13 @@ class SearchModelConditionParameter(Schema): @validates("loss_function") def check_loss_function(self, data): """Check loss function.""" + SearchModelConditionParameter.check_operation(data) SearchModelConditionParameter.check_dict_value_type(data, str) @validates("train_dataset_path") def check_train_dataset_path(self, data): """Check train dataset path.""" + SearchModelConditionParameter.check_operation(data) SearchModelConditionParameter.check_dict_value_type(data, str) @validates("train_dataset_count") @@ -187,6 +198,7 @@ class SearchModelConditionParameter(Schema): @validates("test_dataset_path") def check_test_dataset_path(self, data): """Check test dataset path.""" + SearchModelConditionParameter.check_operation(data) SearchModelConditionParameter.check_dict_value_type(data, str) @validates("test_dataset_count") @@ -197,11 +209,13 @@ class SearchModelConditionParameter(Schema): @validates("network") def check_network(self, data): """Check network.""" + SearchModelConditionParameter.check_operation(data) SearchModelConditionParameter.check_dict_value_type(data, str) @validates("optimizer") def check_optimizer(self, data): """Check optimizer.""" + SearchModelConditionParameter.check_operation(data) SearchModelConditionParameter.check_dict_value_type(data, str) @validates("epoch") @@ -222,11 +236,19 @@ class SearchModelConditionParameter(Schema): @validates("summary_dir") def check_summary_dir(self, data): """Check summary dir.""" + SearchModelConditionParameter.check_operation(data) + SearchModelConditionParameter.check_dict_value_type(data, str) + + @validates("dataset_mark") + def check_dataset_mark(self, data): + """Check dataset mark.""" + SearchModelConditionParameter.check_operation(data) SearchModelConditionParameter.check_dict_value_type(data, str) @validates("lineage_type") def check_lineage_type(self, data): """Check lineage type.""" + SearchModelConditionParameter.check_operation(data) SearchModelConditionParameter.check_dict_value_type(data, str) recv_types = [] for key, value in data.items(): @@ -243,7 +265,7 @@ class SearchModelConditionParameter(Schema): def check_comparision(self, data, **kwargs): """Check comparision for all parameters in schema.""" for attr, condition in data.items(): - if attr in ["limit", "offset", "sorted_name", "sorted_type"]: + if attr in ["limit", "offset", "sorted_name", "sorted_type", 'lineage_type']: continue if not isinstance(attr, str): @@ -256,13 +278,6 @@ class SearchModelConditionParameter(Schema): raise LineageParamTypeError("The search_condition element {} should be dict." .format(attr)) - if attr in ["summary_dir", "lineage_type"]: - if not set(condition.keys()).issubset(['in', 'eq']): - raise LineageParamValueError("Invalid operation of %s." % attr) - if len(condition.keys()) > 1: - raise LineageParamValueError("More than one operation of %s." % attr) - continue - for key in condition.keys(): if key not in ["eq", "lt", "gt", "le", "ge", "in"]: raise LineageParamValueError("The compare condition should be in " diff --git a/mindinsight/lineagemgr/common/validator/validate.py b/mindinsight/lineagemgr/common/validator/validate.py index 07925870..0d579d10 100644 --- a/mindinsight/lineagemgr/common/validator/validate.py +++ b/mindinsight/lineagemgr/common/validator/validate.py @@ -63,6 +63,7 @@ SEARCH_MODEL_ERROR_MAPPING = { 'model_size': LineageErrors.LINEAGE_PARAM_MODEL_SIZE_ERROR, 'sorted_name': LineageErrors.LINEAGE_PARAM_SORTED_NAME_ERROR, 'sorted_type': LineageErrors.LINEAGE_PARAM_SORTED_TYPE_ERROR, + 'dataset_mark': LineageErrors.LINEAGE_PARAM_DATASET_MARK_ERROR, 'lineage_type': LineageErrors.LINEAGE_PARAM_LINEAGE_TYPE_ERROR } @@ -97,6 +98,7 @@ SEARCH_MODEL_ERROR_MSG_MAPPING = { 'model_size': LineageErrorMsg.LINEAGE_MODEL_SIZE_ERROR.value, 'sorted_name': LineageErrorMsg.LINEAGE_PARAM_SORTED_NAME_ERROR.value, 'sorted_type': LineageErrorMsg.LINEAGE_PARAM_SORTED_TYPE_ERROR.value, + 'dataset_mark': LineageErrorMsg.LINEAGE_PARAM_DATASET_MARK_ERROR.value, 'lineage_type': LineageErrorMsg.LINEAGE_PARAM_LINEAGE_TYPE_ERROR.value } @@ -238,10 +240,14 @@ def validate_search_model_condition(schema, data): MindInsightException: If the parameters are invalid. """ error = schema().validate(data) - for error_key in error.keys(): + for (error_key, error_msgs) in error.items(): if error_key in SEARCH_MODEL_ERROR_MAPPING.keys(): error_code = SEARCH_MODEL_ERROR_MAPPING.get(error_key) error_msg = SEARCH_MODEL_ERROR_MSG_MAPPING.get(error_key) + for err_msg in error_msgs: + if 'operation' in err_msg.lower(): + error_msg = f'The parameter {error_key} is invalid. {err_msg}' + break log.error(error_msg) raise MindInsightException(error=error_code, message=error_msg) @@ -417,7 +423,7 @@ def validate_user_defined_info(user_defined_info): "Only str is permitted now.".format(type(key)) log.error(error_msg) raise LineageParamTypeError(error_msg) - if not isinstance(key, (int, str, float)): + if not isinstance(value, (int, str, float)): error_msg = "Dict value type {} is not supported in user defined info." \ "Only str, int and float are permitted now.".format(type(value)) log.error(error_msg) diff --git a/mindinsight/lineagemgr/querier/querier.py b/mindinsight/lineagemgr/querier/querier.py index 63d366fd..1ff6ef47 100644 --- a/mindinsight/lineagemgr/querier/querier.py +++ b/mindinsight/lineagemgr/querier/querier.py @@ -318,13 +318,14 @@ class Querier: for offset_result in offset_results: for obj_name in ["metric", "user_defined"]: obj = getattr(offset_result, obj_name) + require = True if obj_name == "metric" else False if obj and isinstance(obj, dict): for key, value in obj.items(): - label = obj_name + "/" + key + label = f'{obj_name}/{key}' customized[label] = dict() customized[label]["label"] = label - # user defined info is default displayed - customized[label]["required"] = True + # user defined info is not displayed by default + customized[label]["required"] = require customized[label]["type"] = type(value).__name__ lineage_types = condition.get(ConditionParam.LINEAGE_TYPE.value) diff --git a/mindinsight/lineagemgr/querier/query_model.py b/mindinsight/lineagemgr/querier/query_model.py index b797cdc1..d6c92f8c 100644 --- a/mindinsight/lineagemgr/querier/query_model.py +++ b/mindinsight/lineagemgr/querier/query_model.py @@ -37,8 +37,7 @@ FIELD_MAPPING = { "batch_size": Field('hyper_parameters', 'batch_size'), "loss": Field('algorithm', 'loss'), "model_size": Field('model', 'size'), - "dataset_mark": Field('dataset_mark', None), - "lineage_type": Field(None, None) + "dataset_mark": Field('dataset_mark', None) } diff --git a/tests/st/func/lineagemgr/api/test_model_api.py b/tests/st/func/lineagemgr/api/test_model_api.py index c824ee69..a45960e2 100644 --- a/tests/st/func/lineagemgr/api/test_model_api.py +++ b/tests/st/func/lineagemgr/api/test_model_api.py @@ -755,7 +755,7 @@ class TestModelApi(TestCase): @pytest.mark.env_single def test_filter_summary_lineage_exception_7(self): """Test the abnormal execution of the filter_summary_lineage interface.""" - condition_keys = ["summary_dir", "lineage_type"] + condition_keys = ["summary_dir", "lineage_type", "loss_function", "optimizer", "network", "dataset_mark"] for condition_key in condition_keys: # the condition type not supported in summary_dir and lineage_type search_condition = { @@ -765,7 +765,7 @@ class TestModelApi(TestCase): } self.assertRaisesRegex( LineageSearchConditionParamError, - f'Invalid operation of {condition_key}.', + f'The parameter {condition_key} is invalid. Its operation should be `in` or `eq`.', filter_summary_lineage, BASE_SUMMARY_DIR, search_condition @@ -780,7 +780,7 @@ class TestModelApi(TestCase): } self.assertRaisesRegex( LineageSearchConditionParamError, - f'More than one operation of {condition_key}.', + f'The parameter {condition_key} is invalid. More than one operation.', filter_summary_lineage, BASE_SUMMARY_DIR, search_condition @@ -793,11 +793,12 @@ class TestModelApi(TestCase): @pytest.mark.platform_x86_cpu @pytest.mark.env_single def test_filter_summary_lineage_exception_8(self): + """Test the abnormal execution of the filter_summary_lineage interface.""" invalid_lineage_types = ['xxx', None] for lineage_type in invalid_lineage_types: search_condition = { 'lineage_type': { - 'in': lineage_type + 'eq': lineage_type } } self.assertRaisesRegex( @@ -815,6 +816,7 @@ class TestModelApi(TestCase): @pytest.mark.platform_x86_cpu @pytest.mark.env_single def test_filter_summary_lineage_exception_9(self): + """Test the abnormal execution of the filter_summary_lineage interface.""" invalid_sorted_names = ['xxx', 'metric_', 1] for sorted_name in invalid_sorted_names: search_condition = { diff --git a/tests/ut/datavisual/data_transform/test_ms_data_loader.py b/tests/ut/datavisual/data_transform/test_ms_data_loader.py index aa0bbf2f..bcbe329c 100644 --- a/tests/ut/datavisual/data_transform/test_ms_data_loader.py +++ b/tests/ut/datavisual/data_transform/test_ms_data_loader.py @@ -82,7 +82,7 @@ class TestMsDataLoader: ms_loader = MSDataLoader(summary_dir) ms_loader._latest_summary_filename = 'summary.00' ms_loader.load() - assert ms_loader._latest_summary_filename == 'summary.01' + shutil.rmtree(summary_dir) assert ms_loader._latest_summary_file_size == RECORD_LEN tag = ms_loader.get_events_data().list_tags_by_plugin('scalar') tensors = ms_loader.get_events_data().tensors(tag[0]) diff --git a/tests/ut/lineagemgr/common/validator/test_validate.py b/tests/ut/lineagemgr/common/validator/test_validate.py index 64f4aae6..bc5f8820 100644 --- a/tests/ut/lineagemgr/common/validator/test_validate.py +++ b/tests/ut/lineagemgr/common/validator/test_validate.py @@ -101,8 +101,7 @@ class TestValidateSearchModelCondition(TestCase): } } self._assert_raise_of_mindinsight_exception( - "The parameter learning_rate is invalid. It should be a dict and " - "the value should be a float or a integer", + "The value of `in` operation must be list or tuple.", condition ) @@ -136,8 +135,8 @@ class TestValidateSearchModelCondition(TestCase): } } self._assert_raise_of_mindinsight_exception( - "The parameter loss_function is invalid. It should be a dict and " - "the value should be a string", + "The parameter loss_function is invalid. " + "Its operation should be `in` or `eq`.", condition ) @@ -147,8 +146,7 @@ class TestValidateSearchModelCondition(TestCase): } } self._assert_raise_of_mindinsight_exception( - "The parameter train_dataset_count is invalid. It should be a dict " - "and the value should be a integer between 0", + "The value of `in` operation must be list or tuple.", condition ) @@ -161,8 +159,8 @@ class TestValidateSearchModelCondition(TestCase): } } self._assert_raise_of_mindinsight_exception( - "The parameter network is invalid. It should be a dict and " - "the value should be a string", + "The parameter network is invalid. " + "Its operation should be `in` or `eq`.", condition )