!48 Enhance param checking and error msg in lineagemgr, set user-define's required as False

Merge pull request !48 from luopengting/lineage_lpt
5 years ago · 284f2d8027
--- a/mindinsight/lineagemgr/api/model.py
+++ b/mindinsight/lineagemgr/api/model.py
@@ -95,14 +95,15 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
    Users can filter and sort all lineage information according to the search
    condition. The supported filter fields include `summary_dir`, `network`,
    etc. The filter conditions include `eq`, `lt`, `gt`, `le`, `ge` and `in`.
    At the same time, the combined use of these fields and conditions is
    supported. If you want to sort based on filter fields, the field of
    `sorted_name` and `sorted_type` should be specified.
    If the value type of filter condition is `str`, such as summary_dir and
    lineage_type, then its key can only be `in` and `eq`. At the same time,
    the combined use of these fields and conditions is supported. If you want
    to sort based on filter fields, the field of `sorted_name` and `sorted_type`
    should be specified.

    Users can use `lineage_type` to decide what kind of lineage information to
    query. If the `lineage_type` is `dataset`, the query result is only the
    lineage information related to data augmentation. If the `lineage_type` is
    `model` or `None`, the query result is all lineage information.
    query. If the `lineage_type` is not defined, the query result is all lineage
    information.

    Users can paginate query result based on `offset` and `limit`. The `offset`
    refers to page number. The `limit` refers to the number in one page.
@@ -147,6 +148,15 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):

            - dataset_mark (dict): The filter condition of dataset mark.

            - lineage_type (dict): The filter condition of lineage type. It decides
              what kind of lineage information to query. Its value can be `dataset`
              or `model`, e.g., {'in': ['dataset', 'model']}, {'eq': 'model'}, etc.
              If its values contain `dataset`, the query result will contain the
              lineage information related to data augmentation. If its values contain
              `model`, the query result will contain model lineage information.
              If it is not defined or it is a dict like {'in': ['dataset', 'model']},
              the query result is all lineage information.

            - offset (int): Page number, the value range is [0, 100000].

            - limit (int): The number in one page, the value range is [1, 100].
@@ -156,14 +166,8 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
            - sorted_type (str): Specify sort order. It can be `ascending` or
              `descending`.

            - lineage_type (str): It decides what kind of lineage information to
              query. It can be `dataset` or `model`. If it is `dataset`,
              the query result is only the lineage information related to data
              augmentation. If it is `model` or `None`, the query result is all
              lineage information.

    Returns:
        dict, all lineage information under summary base directory according to
        dict, lineage information under summary base directory according to
        search condition.

    Raises:
@@ -196,7 +200,9 @@ def filter_summary_lineage(summary_base_dir, search_condition=None):
        >>>     'sorted_type': 'descending',
        >>>     'limit': 3,
        >>>     'offset': 0,
        >>>     'lineage_type': 'model'
        >>>     'lineage_type': {
        >>>         'eq': 'model'
        >>>     }
        >>> }
        >>> summary_lineage = filter_summary_lineage(summary_base_dir)
        >>> summary_lineage_filter = filter_summary_lineage(summary_base_dir, search_condition)
--- a/mindinsight/lineagemgr/common/exceptions/error_code.py
+++ b/mindinsight/lineagemgr/common/exceptions/error_code.py
@@ -83,6 +83,8 @@ class LineageErrors(LineageErrorCodes):
    LINEAGE_SEARCH_CONDITION_PARAM_ERROR = 24 | _MODEL_LINEAGE_API_ERROR_MASK
    LINEAGE_PARAM_LINEAGE_TYPE_ERROR = 25 | _MODEL_LINEAGE_API_ERROR_MASK

    # Dataset lineage error codes.
    LINEAGE_PARAM_DATASET_MARK_ERROR = 0 | _DATASET_LINEAGE_ERROR_MASK

    SUMMARY_ANALYZE_ERROR = 0 | _SUMMARY_ANALYZE_ERROR_MASK
    SUMMARY_VERIFICATION_ERROR = 1 | _SUMMARY_ANALYZE_ERROR_MASK
@@ -156,26 +158,27 @@ class LineageErrorMsg(Enum):
                                      " 'eq', 'lt', 'gt', 'ge', 'le', 'in'."

    LINEAGE_PARAM_SUMMARY_DIR_ERROR = "The parameter summary_dir is invalid. It should be a dict and the value " \
                                      "should be a string"
                                      "should be a string."

    LINEAGE_TRAIN_DATASET_PATH_ERROR = "The parameter train_dataset_path is invalid." \
                                       " It should be a dict and the value should be a string"
                                       " It should be a dict and the value should be a string."

    LINEAGE_TRAIN_DATASET_COUNT_ERROR = "The parameter train_dataset_count is invalid. It should be a dict " \
                                        "and the value should be a integer between 0 and pow(2, 63) -1"
                                        "and the value should be a integer between 0 and pow(2, 63) -1."

    LINEAGE_TEST_DATASET_PATH_ERROR = "The parameter test_dataset_path is invalid. " \
                                      "It should be a dict and the value should be a string"
                                      "It should be a dict and the value should be a string."

    LINEAGE_TEST_DATASET_COUNT_ERROR = "The parameter test_dataset_count is invalid. It should be a dict " \
                                       "and the value should be a integer between 0 and pow(2, 63) -1"
                                       "and the value should be a integer between 0 and pow(2, 63) -1."

    LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string"
    LINEAGE_NETWORK_ERROR = "The parameter network is invalid. It should be a dict and the value should be a string."

    LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. It should be a dict and the value should be a string"
    LINEAGE_OPTIMIZER_ERROR = "The parameter optimizer is invalid. " \
                              "It should be a dict and the value should be a string."

    LINEAGE_LOSS_FUNCTION_ERROR = "The parameter loss_function is invalid. " \
                                  "It should be a dict and the value should be a string"
                                  "It should be a dict and the value should be a string."

    LINEAGE_LOSS_ERROR = "The parameter loss is invalid. " \
                         "It should be a float."
@@ -184,7 +187,7 @@ class LineageErrorMsg(Enum):
                               "It should be an integer between 0 and pow(2, 63) -1."

    LINEAGE_LEARNING_RATE_ERROR = "The parameter learning_rate is invalid. " \
                                  "It should be a dict and the value should be a float or a integer"
                                  "It should be a dict and the value should be a float or a integer."

    LINEAGE_PARAM_SORTED_NAME_ERROR = "The parameter sorted_name is invalid. " \
                                      "It should be a string."
@@ -195,6 +198,9 @@ class LineageErrorMsg(Enum):
    LINEAGE_PARAM_LINEAGE_TYPE_ERROR = "The parameter lineage_type is invalid. " \
                                       "It should be 'dataset' or 'model'."

    LINEAGE_PARAM_DATASET_MARK_ERROR = "The parameter dataset_mark is invalid. " \
                                       "It should be a string."

    SUMMARY_ANALYZE_ERROR = "Failed to analyze summary log. {}"
    SUMMARY_VERIFICATION_ERROR = "Verification failed in summary analysis. {}"

--- a/mindinsight/lineagemgr/common/validator/model_parameter.py
+++ b/mindinsight/lineagemgr/common/validator/model_parameter.py
@@ -129,6 +129,7 @@ class SearchModelConditionParameter(Schema):
    offset = fields.Int(validate=lambda n: 0 <= n <= 100000)
    sorted_name = fields.Str()
    sorted_type = fields.Str(allow_none=True)
    dataset_mark = fields.Dict()
    lineage_type = fields.Dict()

    @staticmethod
@@ -137,7 +138,7 @@ class SearchModelConditionParameter(Schema):
        for key, value in data.items():
            if key == "in":
                if not isinstance(value, (list, tuple)):
                    raise ValidationError("In operation's value must be list or tuple.")
                    raise ValidationError("The value of `in` operation must be list or tuple.")
            else:
                if not isinstance(value, value_type):
                    raise ValidationError("Wrong value type.")
@@ -153,12 +154,20 @@ class SearchModelConditionParameter(Schema):
        for key, value in data.items():
            if key == "in":
                if not isinstance(value, (list, tuple)):
                    raise ValidationError("In operation's value must be list or tuple.")
                    raise ValidationError("The value of `in` operation must be list or tuple.")
            else:
                if isinstance(value, bool) or \
                        (not isinstance(value, float) and not isinstance(value, int)):
                    raise ValidationError("Wrong value type.")

    @staticmethod
    def check_operation(data):
        """Check input param's compare operation."""
        if not set(data.keys()).issubset(['in', 'eq']):
            raise ValidationError("Its operation should be `in` or `eq`.")
        if len(data.keys()) > 1:
            raise ValidationError("More than one operation.")

    @validates("loss")
    def check_loss(self, data):
        """Check loss."""
@@ -172,11 +181,13 @@ class SearchModelConditionParameter(Schema):
    @validates("loss_function")
    def check_loss_function(self, data):
        """Check loss function."""
        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("train_dataset_path")
    def check_train_dataset_path(self, data):
        """Check train dataset path."""
        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("train_dataset_count")
@@ -187,6 +198,7 @@ class SearchModelConditionParameter(Schema):
    @validates("test_dataset_path")
    def check_test_dataset_path(self, data):
        """Check test dataset path."""
        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("test_dataset_count")
@@ -197,11 +209,13 @@ class SearchModelConditionParameter(Schema):
    @validates("network")
    def check_network(self, data):
        """Check network."""
        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("optimizer")
    def check_optimizer(self, data):
        """Check optimizer."""
        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("epoch")
@@ -222,11 +236,19 @@ class SearchModelConditionParameter(Schema):
    @validates("summary_dir")
    def check_summary_dir(self, data):
        """Check summary dir."""
        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("dataset_mark")
    def check_dataset_mark(self, data):
        """Check dataset mark."""
        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)

    @validates("lineage_type")
    def check_lineage_type(self, data):
        """Check lineage type."""
        SearchModelConditionParameter.check_operation(data)
        SearchModelConditionParameter.check_dict_value_type(data, str)
        recv_types = []
        for key, value in data.items():
@@ -243,7 +265,7 @@ class SearchModelConditionParameter(Schema):
    def check_comparision(self, data, **kwargs):
        """Check comparision for all parameters in schema."""
        for attr, condition in data.items():
            if attr in ["limit", "offset", "sorted_name", "sorted_type"]:
            if attr in ["limit", "offset", "sorted_name", "sorted_type", 'lineage_type']:
                continue

            if not isinstance(attr, str):
@@ -256,13 +278,6 @@ class SearchModelConditionParameter(Schema):
                raise LineageParamTypeError("The search_condition element {} should be dict."
                                            .format(attr))

            if attr in ["summary_dir", "lineage_type"]:
                if not set(condition.keys()).issubset(['in', 'eq']):
                    raise LineageParamValueError("Invalid operation of %s." % attr)
                if len(condition.keys()) > 1:
                    raise LineageParamValueError("More than one operation of %s." % attr)
                continue

            for key in condition.keys():
                if key not in ["eq", "lt", "gt", "le", "ge", "in"]:
                    raise LineageParamValueError("The compare condition should be in "
--- a/mindinsight/lineagemgr/common/validator/validate.py
+++ b/mindinsight/lineagemgr/common/validator/validate.py
@@ -63,6 +63,7 @@ SEARCH_MODEL_ERROR_MAPPING = {
    'model_size': LineageErrors.LINEAGE_PARAM_MODEL_SIZE_ERROR,
    'sorted_name': LineageErrors.LINEAGE_PARAM_SORTED_NAME_ERROR,
    'sorted_type': LineageErrors.LINEAGE_PARAM_SORTED_TYPE_ERROR,
    'dataset_mark': LineageErrors.LINEAGE_PARAM_DATASET_MARK_ERROR,
    'lineage_type': LineageErrors.LINEAGE_PARAM_LINEAGE_TYPE_ERROR
 }

@@ -97,6 +98,7 @@ SEARCH_MODEL_ERROR_MSG_MAPPING = {
    'model_size': LineageErrorMsg.LINEAGE_MODEL_SIZE_ERROR.value,
    'sorted_name': LineageErrorMsg.LINEAGE_PARAM_SORTED_NAME_ERROR.value,
    'sorted_type': LineageErrorMsg.LINEAGE_PARAM_SORTED_TYPE_ERROR.value,
    'dataset_mark': LineageErrorMsg.LINEAGE_PARAM_DATASET_MARK_ERROR.value,
    'lineage_type': LineageErrorMsg.LINEAGE_PARAM_LINEAGE_TYPE_ERROR.value
 }

@@ -238,10 +240,14 @@ def validate_search_model_condition(schema, data):
        MindInsightException: If the parameters are invalid.
    """
    error = schema().validate(data)
    for error_key in error.keys():
    for (error_key, error_msgs) in error.items():
        if error_key in SEARCH_MODEL_ERROR_MAPPING.keys():
            error_code = SEARCH_MODEL_ERROR_MAPPING.get(error_key)
            error_msg = SEARCH_MODEL_ERROR_MSG_MAPPING.get(error_key)
            for err_msg in error_msgs:
                if 'operation' in err_msg.lower():
                    error_msg = f'The parameter {error_key} is invalid. {err_msg}'
                    break
            log.error(error_msg)
            raise MindInsightException(error=error_code, message=error_msg)

@@ -417,7 +423,7 @@ def validate_user_defined_info(user_defined_info):
                        "Only str is permitted now.".format(type(key))
            log.error(error_msg)
            raise LineageParamTypeError(error_msg)
        if not isinstance(key, (int, str, float)):
        if not isinstance(value, (int, str, float)):
            error_msg = "Dict value type {} is not supported in user defined info." \
                        "Only str, int and float are permitted now.".format(type(value))
            log.error(error_msg)
--- a/mindinsight/lineagemgr/querier/querier.py
+++ b/mindinsight/lineagemgr/querier/querier.py
@@ -318,13 +318,14 @@ class Querier:
        for offset_result in offset_results:
            for obj_name in ["metric", "user_defined"]:
                obj = getattr(offset_result, obj_name)
                require = True if obj_name == "metric" else False
                if obj and isinstance(obj, dict):
                    for key, value in obj.items():
                        label = obj_name + "/" + key
                        label = f'{obj_name}/{key}'
                        customized[label] = dict()
                        customized[label]["label"] = label
                        # user defined info is default displayed
                        customized[label]["required"] = True
                        # user defined info is not displayed by default
                        customized[label]["required"] = require
                        customized[label]["type"] = type(value).__name__

        lineage_types = condition.get(ConditionParam.LINEAGE_TYPE.value)
--- a/mindinsight/lineagemgr/querier/query_model.py
+++ b/mindinsight/lineagemgr/querier/query_model.py
@@ -37,8 +37,7 @@ FIELD_MAPPING = {
    "batch_size": Field('hyper_parameters', 'batch_size'),
    "loss": Field('algorithm', 'loss'),
    "model_size": Field('model', 'size'),
    "dataset_mark": Field('dataset_mark', None),
    "lineage_type": Field(None, None)
    "dataset_mark": Field('dataset_mark', None)
 }


--- a/tests/st/func/lineagemgr/api/test_model_api.py
+++ b/tests/st/func/lineagemgr/api/test_model_api.py
@@ -755,7 +755,7 @@ class TestModelApi(TestCase):
    @pytest.mark.env_single
    def test_filter_summary_lineage_exception_7(self):
        """Test the abnormal execution of the filter_summary_lineage interface."""
        condition_keys = ["summary_dir", "lineage_type"]
        condition_keys = ["summary_dir", "lineage_type", "loss_function", "optimizer", "network", "dataset_mark"]
        for condition_key in condition_keys:
            # the condition type not supported in summary_dir and lineage_type
            search_condition = {
@@ -765,7 +765,7 @@ class TestModelApi(TestCase):
            }
            self.assertRaisesRegex(
                LineageSearchConditionParamError,
                f'Invalid operation of {condition_key}.',
                f'The parameter {condition_key} is invalid. Its operation should be `in` or `eq`.',
                filter_summary_lineage,
                BASE_SUMMARY_DIR,
                search_condition
@@ -780,7 +780,7 @@ class TestModelApi(TestCase):
            }
            self.assertRaisesRegex(
                LineageSearchConditionParamError,
                f'More than one operation of {condition_key}.',
                f'The parameter {condition_key} is invalid. More than one operation.',
                filter_summary_lineage,
                BASE_SUMMARY_DIR,
                search_condition
@@ -793,11 +793,12 @@ class TestModelApi(TestCase):
    @pytest.mark.platform_x86_cpu
    @pytest.mark.env_single
    def test_filter_summary_lineage_exception_8(self):
        """Test the abnormal execution of the filter_summary_lineage interface."""
        invalid_lineage_types = ['xxx', None]
        for lineage_type in invalid_lineage_types:
            search_condition = {
                'lineage_type': {
                    'in': lineage_type
                    'eq': lineage_type
                }
            }
            self.assertRaisesRegex(
@@ -815,6 +816,7 @@ class TestModelApi(TestCase):
    @pytest.mark.platform_x86_cpu
    @pytest.mark.env_single
    def test_filter_summary_lineage_exception_9(self):
        """Test the abnormal execution of the filter_summary_lineage interface."""
        invalid_sorted_names = ['xxx', 'metric_', 1]
        for sorted_name in invalid_sorted_names:
            search_condition = {
--- a/tests/ut/datavisual/data_transform/test_ms_data_loader.py
+++ b/tests/ut/datavisual/data_transform/test_ms_data_loader.py
@@ -82,7 +82,7 @@ class TestMsDataLoader:
        ms_loader = MSDataLoader(summary_dir)
        ms_loader._latest_summary_filename = 'summary.00'
        ms_loader.load()
        assert ms_loader._latest_summary_filename == 'summary.01'
        shutil.rmtree(summary_dir)
        assert ms_loader._latest_summary_file_size == RECORD_LEN
        tag = ms_loader.get_events_data().list_tags_by_plugin('scalar')
        tensors = ms_loader.get_events_data().tensors(tag[0])
--- a/tests/ut/lineagemgr/common/validator/test_validate.py
+++ b/tests/ut/lineagemgr/common/validator/test_validate.py
@@ -101,8 +101,7 @@ class TestValidateSearchModelCondition(TestCase):
            }
        }
        self._assert_raise_of_mindinsight_exception(
            "The parameter learning_rate is invalid. It should be a dict and "
            "the value should be a float or a integer",
            "The value of `in` operation must be list or tuple.",
            condition
        )

@@ -136,8 +135,8 @@ class TestValidateSearchModelCondition(TestCase):
            }
        }
        self._assert_raise_of_mindinsight_exception(
            "The parameter loss_function is invalid. It should be a dict and "
            "the value should be a string",
            "The parameter loss_function is invalid. "
            "Its operation should be `in` or `eq`.",
            condition
        )

@@ -147,8 +146,7 @@ class TestValidateSearchModelCondition(TestCase):
            }
        }
        self._assert_raise_of_mindinsight_exception(
            "The parameter train_dataset_count is invalid. It should be a dict "
            "and the value should be a integer between 0",
            "The value of `in` operation must be list or tuple.",
            condition
        )

@@ -161,8 +159,8 @@ class TestValidateSearchModelCondition(TestCase):
            }
        }
        self._assert_raise_of_mindinsight_exception(
            "The parameter network is invalid. It should be a dict and "
            "the value should be a string",
            "The parameter network is invalid. "
            "Its operation should be `in` or `eq`.",
            condition
        )