From 670bc720175f06e667399f72c7ee95da34db26b3 Mon Sep 17 00:00:00 2001
From: luopengting <luopengting@huawei.com>
Date: Mon, 20 Apr 2020 20:26:35 +0800
Subject: [PATCH] enhance validation for cmp operation, set user-difined'
 required value as False

---
 mindinsight/lineagemgr/api/model.py           | 34 ++++++++++--------
 .../common/exceptions/error_code.py           | 24 ++++++++-----
 .../common/validator/model_parameter.py       | 35 +++++++++++++------
 .../lineagemgr/common/validator/validate.py   | 10 ++++--
 mindinsight/lineagemgr/querier/querier.py     |  7 ++--
 mindinsight/lineagemgr/querier/query_model.py |  3 +-
 .../st/func/lineagemgr/api/test_model_api.py  | 10 +++---
 .../data_transform/test_ms_data_loader.py     |  2 +-
 .../common/validator/test_validate.py         | 14 ++++----
 9 files changed, 86 insertions(+), 53 deletions(-)

diff --git a/mindinsight/lineagemgr/api/model.py b/mindinsight/lineagemgr/api/model.py
index 62dd7b8a..ee3e129a 100644
--- a/mindinsight/lineagemgr/api/model.py
+++ b/mindinsight/lineagemgr/api/model.py
@@ -95,14 +95,15 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
     Users can filter and sort all lineage information according to the search
     condition. The supported filter fields include `summary_dir`, `network`,
     etc. The filter conditions include `eq`, `lt`, `gt`, `le`, `ge` and `in`.
-    At the same time, the combined use of these fields and conditions is
-    supported. If you want to sort based on filter fields, the field of
-    `sorted_name` and `sorted_type` should be specified.
+    If the value type of filter condition is `str`, such as summary_dir and
+    lineage_type, then its key can only be `in` and `eq`. At the same time,
+    the combined use of these fields and conditions is supported. If you want
+    to sort based on filter fields, the field of `sorted_name` and `sorted_type`
+    should be specified.
 
     Users can use `lineage_type` to decide what kind of lineage information to
-    query. If the `lineage_type` is `dataset`, the query result is only the
-    lineage information related to data augmentation. If the `lineage_type` is
-    `model` or `None`, the query result is all lineage information.
+    query. If the `lineage_type` is not defined, the query result is all lineage
+    information.
 
     Users can paginate query result based on `offset` and `limit`. The `offset`
     refers to page number. The `limit` refers to the number in one page.
@@ -147,6 +148,15 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
 
             - dataset_mark (dict): The filter condition of dataset mark.
 
+            - lineage_type (dict): The filter condition of lineage type. It decides
+              what kind of lineage information to query. Its value can be `dataset`
+              or `model`, e.g., {'in': ['dataset', 'model']}, {'eq': 'model'}, etc.
+              If its values contain `dataset`, the query result will contain the
+              lineage information related to data augmentation. If its values contain
+              `model`, the query result will contain model lineage information.
+              If it is not defined or it is a dict like {'in': ['dataset', 'model']},
+              the query result is all lineage information.
+
             - offset (int): Page number, the value range is [0, 100000].
 
             - limit (int): The number in one page, the value range is [1, 100].
@@ -156,14 +166,8 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
             - sorted_type (str): Specify sort order. It can be `ascending` or
               `descending`.
 
-            - lineage_type (str): It decides what kind of lineage information to
-              query. It can be `dataset` or `model`. If it is `dataset`,
-              the query result is only the lineage information related to data
-              augmentation. If it is `model` or `None`, the query result is all
-              lineage information.
-
     Returns:
-        dict, all lineage information under summary base directory according to
+        dict, lineage information under summary base directory according to
         search condition.
 
     Raises:
@@ -196,7 +200,9 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
         >>>     'sorted_type': 'descending',
         >>>     'limit': 3,
         >>>     'offset': 0,
-        >>>     'lineage_type': 'model'
+        >>>     'lineage_type': {
+        >>>         'eq': 'model'
+        >>>     }
         >>> }
         >>> summary_lineage = filter_summary_lineage(summary_base_dir)
         >>> summary_lineage_filter = filter_summary_lineage(summary_base_dir, search_condition)
diff --git a/mindinsight/lineagemgr/common/exceptions/error_code.py b/mindinsight/lineagemgr/common/exceptions/error_code.py
index 0620112b..f756540b 100644
--- a/mindinsight/lineagemgr/common/exceptions/error_code.py
+++ b/mindinsight/lineagemgr/common/exceptions/error_code.py
@@ -83,6 +83,8 @@ class LineageErrors(LineageErrorCodes):
     LINEAGE_SEARCH_CONDITION_PARAM_ERROR = 24 | _MODEL_LINEAGE_API_ERROR_MASK
     LINEAGE_PARAM_LINEAGE_TYPE_ERROR = 25 | _MODEL_LINEAGE_API_ERROR_MASK
 
+    # Dataset lineage error codes.
+    LINEAGE_PARAM_DATASET_MARK_ERROR = 0 | _DATASET_LINEAGE_ERROR_MASK
 
     SUMMARY_ANALYZE_ERROR = 0 | _SUMMARY_ANALYZE_ERROR_MASK
     SUMMARY_VERIFICATION_ERROR = 1 | _SUMMARY_ANALYZE_ERROR_MASK
@@ -156,26 +158,27 @@ class LineageErrorMsg(Enum):
                                       " 'eq', 'lt', 'gt', 'ge', 'le', 'in'."
 
     LINEAGE_PARAM_SUMMARY_DIR_ERROR = "The parameter summary_dir is invalid. It should be a dict and the value " \
-                                      "should be a string"
+                                      "should be a string."
 
     LINEAGE_TRAIN_DATASET_PATH_ERROR = "The parameter train_dataset_path is invalid." \
-                                       " It should be a dict and the value should be a string"
+                                       " It should be a dict and the value should be a string."
 
     LINEAGE_TRAIN_DATASET_COUNT_ERROR = "The parameter train_dataset_count is invalid. It should be a dict " \
-                                        "and the value should be a integer between 0 and pow(2, 63) -1"
+                                        "and the value should be a integer between 0 and pow(2, 63) -1."
 
     LINEAGE_TEST_DATASET_PATH_ERROR = "The parameter test_dataset_path is invalid. " \
-                                      "It should be a dict and the value should be a string"
+                                      "It should be a dict and the value should be a string."
 
     LINEAGE_TEST_DATASET_COUNT_ERROR = "The parameter test_dataset_count is invalid. It should be a dict " \
-                                       "and the value should be a integer between 0 and pow(2, 63) -1"
+                                       "and the value should be a integer between 0 and pow(2, 63) -1."
 
-    LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string"
+    LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string."
 
-    LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. It should be a dict and the value should be a string"
+    LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. " \
+                              "It should be a dict and the value should be a string."
 
     LINEAGE_LOSS_FUNCTION_ERROR = "The parameter loss_function is invalid. " \
-                                  "It should be a dict and the value should be a string"
+                                  "It should be a dict and the value should be a string."
 
     LINEAGE_LOSS_ERROR = "The parameter loss is invalid. " \
                          "It should be a float."
@@ -184,7 +187,7 @@ class LineageErrorMsg(Enum):
                                "It should be an integer between 0 and pow(2, 63) -1."
 
     LINEAGE_LEARNING_RATE_ERROR = "The parameter learning_rate is invalid. " \
-                                  "It should be a dict and the value should be a float or a integer"
+                                  "It should be a dict and the value should be a float or a integer."
 
     LINEAGE_PARAM_SORTED_NAME_ERROR = "The parameter sorted_name is invalid. " \
                                       "It should be a string."
@@ -195,6 +198,9 @@ class LineageErrorMsg(Enum):
     LINEAGE_PARAM_LINEAGE_TYPE_ERROR = "The parameter lineage_type is invalid. " \
                                        "It should be 'dataset' or 'model'."
 
+    LINEAGE_PARAM_DATASET_MARK_ERROR = "The parameter dataset_mark is invalid. " \
+                                       "It should be a string."
+
     SUMMARY_ANALYZE_ERROR = "Failed to analyze summary log. {}"
     SUMMARY_VERIFICATION_ERROR = "Verification failed in summary analysis. {}"
 
diff --git a/mindinsight/lineagemgr/common/validator/model_parameter.py b/mindinsight/lineagemgr/common/validator/model_parameter.py
index b5fc0cbc..ad768694 100644
--- a/mindinsight/lineagemgr/common/validator/model_parameter.py
+++ b/mindinsight/lineagemgr/common/validator/model_parameter.py
@@ -129,6 +129,7 @@ class SearchModelConditionParameter(Schema):
     offset = fields.Int(validate=lambda n: 0 <= n <= 100000)
     sorted_name = fields.Str()
     sorted_type = fields.Str(allow_none=True)
+    dataset_mark = fields.Dict()
     lineage_type = fields.Dict()
 
     @staticmethod
@@ -137,7 +138,7 @@ class SearchModelConditionParameter(Schema):
         for key, value in data.items():
             if key == "in":
                 if not isinstance(value, (list, tuple)):
-                    raise ValidationError("In operation's value must be list or tuple.")
+                    raise ValidationError("The value of `in` operation must be list or tuple.")
             else:
                 if not isinstance(value, value_type):
                     raise ValidationError("Wrong value type.")
@@ -153,12 +154,20 @@ class SearchModelConditionParameter(Schema):
         for key, value in data.items():
             if key == "in":
                 if not isinstance(value, (list, tuple)):
-                    raise ValidationError("In operation's value must be list or tuple.")
+                    raise ValidationError("The value of `in` operation must be list or tuple.")
             else:
                 if isinstance(value, bool) or \
                         (not isinstance(value, float) and not isinstance(value, int)):
                     raise ValidationError("Wrong value type.")
 
+    @staticmethod
+    def check_operation(data):
+        """Check input param's compare operation."""
+        if not set(data.keys()).issubset(['in', 'eq']):
+            raise ValidationError("Its operation should be `in` or `eq`.")
+        if len(data.keys()) > 1:
+            raise ValidationError("More than one operation.")
+
     @validates("loss")
     def check_loss(self, data):
         """Check loss."""
@@ -172,11 +181,13 @@ class SearchModelConditionParameter(Schema):
     @validates("loss_function")
     def check_loss_function(self, data):
         """Check loss function."""
+        SearchModelConditionParameter.check_operation(data)
         SearchModelConditionParameter.check_dict_value_type(data, str)
 
     @validates("train_dataset_path")
     def check_train_dataset_path(self, data):
         """Check train dataset path."""
+        SearchModelConditionParameter.check_operation(data)
         SearchModelConditionParameter.check_dict_value_type(data, str)
 
     @validates("train_dataset_count")
@@ -187,6 +198,7 @@ class SearchModelConditionParameter(Schema):
     @validates("test_dataset_path")
     def check_test_dataset_path(self, data):
         """Check test dataset path."""
+        SearchModelConditionParameter.check_operation(data)
         SearchModelConditionParameter.check_dict_value_type(data, str)
 
     @validates("test_dataset_count")
@@ -197,11 +209,13 @@ class SearchModelConditionParameter(Schema):
     @validates("network")
     def check_network(self, data):
         """Check network."""
+        SearchModelConditionParameter.check_operation(data)
         SearchModelConditionParameter.check_dict_value_type(data, str)
 
     @validates("optimizer")
     def check_optimizer(self, data):
         """Check optimizer."""
+        SearchModelConditionParameter.check_operation(data)
         SearchModelConditionParameter.check_dict_value_type(data, str)
 
     @validates("epoch")
@@ -222,11 +236,19 @@ class SearchModelConditionParameter(Schema):
     @validates("summary_dir")
     def check_summary_dir(self, data):
         """Check summary dir."""
+        SearchModelConditionParameter.check_operation(data)
+        SearchModelConditionParameter.check_dict_value_type(data, str)
+
+    @validates("dataset_mark")
+    def check_dataset_mark(self, data):
+        """Check dataset mark."""
+        SearchModelConditionParameter.check_operation(data)
         SearchModelConditionParameter.check_dict_value_type(data, str)
 
     @validates("lineage_type")
     def check_lineage_type(self, data):
         """Check lineage type."""
+        SearchModelConditionParameter.check_operation(data)
         SearchModelConditionParameter.check_dict_value_type(data, str)
         recv_types = []
         for key, value in data.items():
@@ -243,7 +265,7 @@ class SearchModelConditionParameter(Schema):
     def check_comparision(self, data, **kwargs):
         """Check comparision for all parameters in schema."""
         for attr, condition in data.items():
-            if attr in ["limit", "offset", "sorted_name", "sorted_type"]:
+            if attr in ["limit", "offset", "sorted_name", "sorted_type", 'lineage_type']:
                 continue
 
             if not isinstance(attr, str):
@@ -256,13 +278,6 @@ class SearchModelConditionParameter(Schema):
                 raise LineageParamTypeError("The search_condition element {} should be dict."
                                             .format(attr))
 
-            if attr in ["summary_dir", "lineage_type"]:
-                if not set(condition.keys()).issubset(['in', 'eq']):
-                    raise LineageParamValueError("Invalid operation of %s." % attr)
-                if len(condition.keys()) > 1:
-                    raise LineageParamValueError("More than one operation of %s." % attr)
-                continue
-
             for key in condition.keys():
                 if key not in ["eq", "lt", "gt", "le", "ge", "in"]:
                     raise LineageParamValueError("The compare condition should be in "
diff --git a/mindinsight/lineagemgr/common/validator/validate.py b/mindinsight/lineagemgr/common/validator/validate.py
index 07925870..0d579d10 100644
--- a/mindinsight/lineagemgr/common/validator/validate.py
+++ b/mindinsight/lineagemgr/common/validator/validate.py
@@ -63,6 +63,7 @@ SEARCH_MODEL_ERROR_MAPPING = {
     'model_size': LineageErrors.LINEAGE_PARAM_MODEL_SIZE_ERROR,
     'sorted_name': LineageErrors.LINEAGE_PARAM_SORTED_NAME_ERROR,
     'sorted_type': LineageErrors.LINEAGE_PARAM_SORTED_TYPE_ERROR,
+    'dataset_mark': LineageErrors.LINEAGE_PARAM_DATASET_MARK_ERROR,
     'lineage_type': LineageErrors.LINEAGE_PARAM_LINEAGE_TYPE_ERROR
 }
 
@@ -97,6 +98,7 @@ SEARCH_MODEL_ERROR_MSG_MAPPING = {
     'model_size': LineageErrorMsg.LINEAGE_MODEL_SIZE_ERROR.value,
     'sorted_name': LineageErrorMsg.LINEAGE_PARAM_SORTED_NAME_ERROR.value,
     'sorted_type': LineageErrorMsg.LINEAGE_PARAM_SORTED_TYPE_ERROR.value,
+    'dataset_mark': LineageErrorMsg.LINEAGE_PARAM_DATASET_MARK_ERROR.value,
     'lineage_type': LineageErrorMsg.LINEAGE_PARAM_LINEAGE_TYPE_ERROR.value
 }
 
@@ -238,10 +240,14 @@ def validate_search_model_condition(schema, data):
         MindInsightException: If the parameters are invalid.
     """
     error = schema().validate(data)
-    for error_key in error.keys():
+    for (error_key, error_msgs) in error.items():
         if error_key in SEARCH_MODEL_ERROR_MAPPING.keys():
             error_code = SEARCH_MODEL_ERROR_MAPPING.get(error_key)
             error_msg = SEARCH_MODEL_ERROR_MSG_MAPPING.get(error_key)
+            for err_msg in error_msgs:
+                if 'operation' in err_msg.lower():
+                    error_msg = f'The parameter {error_key} is invalid. {err_msg}'
+                    break
             log.error(error_msg)
             raise MindInsightException(error=error_code, message=error_msg)
 
@@ -417,7 +423,7 @@ def validate_user_defined_info(user_defined_info):
                         "Only str is permitted now.".format(type(key))
             log.error(error_msg)
             raise LineageParamTypeError(error_msg)
-        if not isinstance(key, (int, str, float)):
+        if not isinstance(value, (int, str, float)):
             error_msg = "Dict value type {} is not supported in user defined info." \
                         "Only str, int and float are permitted now.".format(type(value))
             log.error(error_msg)
diff --git a/mindinsight/lineagemgr/querier/querier.py b/mindinsight/lineagemgr/querier/querier.py
index 63d366fd..1ff6ef47 100644
--- a/mindinsight/lineagemgr/querier/querier.py
+++ b/mindinsight/lineagemgr/querier/querier.py
@@ -318,13 +318,14 @@ class Querier:
         for offset_result in offset_results:
             for obj_name in ["metric", "user_defined"]:
                 obj = getattr(offset_result, obj_name)
+                require = True if obj_name == "metric" else False
                 if obj and isinstance(obj, dict):
                     for key, value in obj.items():
-                        label = obj_name + "/" + key
+                        label = f'{obj_name}/{key}'
                         customized[label] = dict()
                         customized[label]["label"] = label
-                        # user defined info is default displayed
-                        customized[label]["required"] = True
+                        # user defined info is not displayed by default
+                        customized[label]["required"] = require
                         customized[label]["type"] = type(value).__name__
 
         lineage_types = condition.get(ConditionParam.LINEAGE_TYPE.value)
diff --git a/mindinsight/lineagemgr/querier/query_model.py b/mindinsight/lineagemgr/querier/query_model.py
index b797cdc1..d6c92f8c 100644
--- a/mindinsight/lineagemgr/querier/query_model.py
+++ b/mindinsight/lineagemgr/querier/query_model.py
@@ -37,8 +37,7 @@ FIELD_MAPPING = {
     "batch_size": Field('hyper_parameters', 'batch_size'),
     "loss": Field('algorithm', 'loss'),
     "model_size": Field('model', 'size'),
-    "dataset_mark": Field('dataset_mark', None),
-    "lineage_type": Field(None, None)
+    "dataset_mark": Field('dataset_mark', None)
 }
 
 
diff --git a/tests/st/func/lineagemgr/api/test_model_api.py b/tests/st/func/lineagemgr/api/test_model_api.py
index c824ee69..a45960e2 100644
--- a/tests/st/func/lineagemgr/api/test_model_api.py
+++ b/tests/st/func/lineagemgr/api/test_model_api.py
@@ -755,7 +755,7 @@ class TestModelApi(TestCase):
     @pytest.mark.env_single
     def test_filter_summary_lineage_exception_7(self):
         """Test the abnormal execution of the filter_summary_lineage interface."""
-        condition_keys = ["summary_dir", "lineage_type"]
+        condition_keys = ["summary_dir", "lineage_type", "loss_function", "optimizer", "network", "dataset_mark"]
         for condition_key in condition_keys:
             # the condition type not supported in summary_dir and lineage_type
             search_condition = {
@@ -765,7 +765,7 @@ class TestModelApi(TestCase):
             }
             self.assertRaisesRegex(
                 LineageSearchConditionParamError,
-                f'Invalid operation of {condition_key}.',
+                f'The parameter {condition_key} is invalid. Its operation should be `in` or `eq`.',
                 filter_summary_lineage,
                 BASE_SUMMARY_DIR,
                 search_condition
@@ -780,7 +780,7 @@ class TestModelApi(TestCase):
             }
             self.assertRaisesRegex(
                 LineageSearchConditionParamError,
-                f'More than one operation of {condition_key}.',
+                f'The parameter {condition_key} is invalid. More than one operation.',
                 filter_summary_lineage,
                 BASE_SUMMARY_DIR,
                 search_condition
@@ -793,11 +793,12 @@ class TestModelApi(TestCase):
     @pytest.mark.platform_x86_cpu
     @pytest.mark.env_single
     def test_filter_summary_lineage_exception_8(self):
+        """Test the abnormal execution of the filter_summary_lineage interface."""
         invalid_lineage_types = ['xxx', None]
         for lineage_type in invalid_lineage_types:
             search_condition = {
                 'lineage_type': {
-                    'in': lineage_type
+                    'eq': lineage_type
                 }
             }
             self.assertRaisesRegex(
@@ -815,6 +816,7 @@ class TestModelApi(TestCase):
     @pytest.mark.platform_x86_cpu
     @pytest.mark.env_single
     def test_filter_summary_lineage_exception_9(self):
+        """Test the abnormal execution of the filter_summary_lineage interface."""
         invalid_sorted_names = ['xxx', 'metric_', 1]
         for sorted_name in invalid_sorted_names:
             search_condition = {
diff --git a/tests/ut/datavisual/data_transform/test_ms_data_loader.py b/tests/ut/datavisual/data_transform/test_ms_data_loader.py
index aa0bbf2f..bcbe329c 100644
--- a/tests/ut/datavisual/data_transform/test_ms_data_loader.py
+++ b/tests/ut/datavisual/data_transform/test_ms_data_loader.py
@@ -82,7 +82,7 @@ class TestMsDataLoader:
         ms_loader = MSDataLoader(summary_dir)
         ms_loader._latest_summary_filename = 'summary.00'
         ms_loader.load()
-        assert ms_loader._latest_summary_filename == 'summary.01'
+        shutil.rmtree(summary_dir)
         assert ms_loader._latest_summary_file_size == RECORD_LEN
         tag = ms_loader.get_events_data().list_tags_by_plugin('scalar')
         tensors = ms_loader.get_events_data().tensors(tag[0])
diff --git a/tests/ut/lineagemgr/common/validator/test_validate.py b/tests/ut/lineagemgr/common/validator/test_validate.py
index 64f4aae6..bc5f8820 100644
--- a/tests/ut/lineagemgr/common/validator/test_validate.py
+++ b/tests/ut/lineagemgr/common/validator/test_validate.py
@@ -101,8 +101,7 @@ class TestValidateSearchModelCondition(TestCase):
             }
         }
         self._assert_raise_of_mindinsight_exception(
-            "The parameter learning_rate is invalid. It should be a dict and "
-            "the value should be a float or a integer",
+            "The value of `in` operation must be list or tuple.",
             condition
         )
 
@@ -136,8 +135,8 @@ class TestValidateSearchModelCondition(TestCase):
             }
         }
         self._assert_raise_of_mindinsight_exception(
-            "The parameter loss_function is invalid. It should be a dict and "
-            "the value should be a string",
+            "The parameter loss_function is invalid. "
+            "Its operation should be `in` or `eq`.",
             condition
         )
 
@@ -147,8 +146,7 @@ class TestValidateSearchModelCondition(TestCase):
             }
         }
         self._assert_raise_of_mindinsight_exception(
-            "The parameter train_dataset_count is invalid. It should be a dict "
-            "and the value should be a integer between 0",
+            "The value of `in` operation must be list or tuple.",
             condition
         )
 
@@ -161,8 +159,8 @@ class TestValidateSearchModelCondition(TestCase):
             }
         }
         self._assert_raise_of_mindinsight_exception(
-            "The parameter network is invalid. It should be a dict and "
-            "the value should be a string",
+            "The parameter network is invalid. "
+            "Its operation should be `in` or `eq`.",
             condition
         )