!941 update debugger version from 1.0 to 1.1, delete old version conditions and old condition api and fix bug

From: @jiang-shuqiang Reviewed-by: @wenkai_dist,@ouwenchang Signed-off-by: @ouwenchang
5 years ago · c57e63c3c4
--- a/mindinsight/backend/conditionmgr/conditionmgr_api.py
+++ b/mindinsight/backend/conditionmgr/conditionmgr_api.py
@@ -26,13 +26,6 @@ BLUEPRINT = Blueprint("conditionmgr", __name__,
                      url_prefix=settings.URL_PATH_PREFIX + settings.API_PREFIX)


@BLUEPRINT.route("/conditionmgr/train-jobs/<train_id>/conditions", methods=["GET"])
 def get_conditions(train_id):
    """get conditions"""
    reply = _wrap_reply(BACKEND_SERVER.get_conditions, train_id)
    return reply


@BLUEPRINT.route("/conditionmgr/train-jobs/<train_id>/condition-collections", methods=["GET"])
 def get_condition_collections(train_id):
    """get condition collections"""
--- a/mindinsight/conf/constants.py
+++ b/mindinsight/conf/constants.py
@@ -64,4 +64,4 @@ MAX_HISTOGRAM_STEP_SIZE_PER_TAG = 50
 MAX_TENSOR_STEP_SIZE_PER_TAG = 20
 MAX_TENSOR_RESPONSE_DATA_SIZE = 100000

 ENABLE_RECOMMENDED_WATCHPOINTS = False
 ENABLE_RECOMMENDED_WATCHPOINTS = True
--- a/mindinsight/debugger/conditionmgr/condition.py
+++ b/mindinsight/debugger/conditionmgr/condition.py
@@ -18,8 +18,6 @@ Management of all conditions.
 This module is used to register all conditions, as well as their parameters.
 This module also provide the available conditions to condition_collections api.
 """
 import math

 from enum import Enum
 from mindinsight.debugger.conditionmgr.log import logger

@@ -35,17 +33,6 @@ class ConditionIdEnum(Enum):
    GRADIENT_EXPLODING = "gradient_exploding"
    TENSOR_OVERFLOW = "tensor_overflow"
    OPERATOR_OVERFLOW = "operator_overflow"
    NAN = "nan"
    OVERFLOW_ASCEND_CHIP = "overflow"
    INF = "inf"
    MAX_GT = "max_gt"
    MAX_LT = "max_lt"
    MIN_GT = "min_gt"
    MIN_LT = "min_lt"
    MAX_MIN_GT = "max_min_gt"
    MAX_MIN_LT = "max_min_lt"
    MEAN_GT = "mean_gt"
    MEAN_LT = "mean_lt"
    TENSOR_INITIALIZATION = "tensor_initialization"
    TENSOR_TOO_LARGE = "tensor_too_large"
    TENSOR_TOO_SMALL = "tensor_too_small"
@@ -287,7 +274,3 @@ def check_abs_param_range(value):
    if 0 <= value < float("inf"):
        return True
    return False


 def check_not_nan(value):
    return not math.isnan(value)
--- a/mindinsight/debugger/conditionmgr/condition_list.py
+++ b/mindinsight/debugger/conditionmgr/condition_list.py
@@ -29,7 +29,6 @@ from mindinsight.debugger.conditionmgr.condition import check_initialization_ava
 from mindinsight.debugger.conditionmgr.condition import check_normal_param_range
 from mindinsight.debugger.conditionmgr.condition import check_percentage_param_range
 from mindinsight.debugger.conditionmgr.condition import check_abs_param_range
 from mindinsight.debugger.conditionmgr.condition import check_not_nan


 CONDITION_LIST = [
@@ -67,7 +66,7 @@ CONDITION_LIST = [
        # Send this condition to MindSpore will use WatchCondition.Condition.tensor_general_overflow
        optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
        parameters=[],
        supported_target_type=TargetTypeEnum.TENSOR,
        supported_target_type=TargetTypeEnum.WEIGHT,
        supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU),
        minimum_debugger_capability=(1, 1)
    ),
@@ -225,164 +224,6 @@ CONDITION_LIST = [
        supported_platforms=(PlatformEnum.ASCEND,),
        minimum_debugger_capability=(1, 1)
    ),
    Condition(
        condition_id=ConditionIdEnum.NAN,
        abbr="NAN",
        # Send this condition to MindSpore will use WatchCondition.Condition.nan
        optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
        parameters=[],
        supported_target_type=TargetTypeEnum.TENSOR,
        supported_platforms=(PlatformEnum.GPU,),
        minimum_debugger_capability=(1, 0)
    ),
    Condition(
        condition_id=ConditionIdEnum.OVERFLOW_ASCEND_CHIP,
        abbr="OVERFLOW",
        # Send this condition to MindSpore will use WatchCondition.Condition.overflow
        optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
        parameters=[],
        supported_target_type=TargetTypeEnum.TENSOR,
        supported_platforms=(PlatformEnum.ASCEND,),
        minimum_debugger_capability=(1, 0)
    ),
    Condition(
        condition_id=ConditionIdEnum.INF,
        abbr="INF",
        # Send this condition to MindSpore will use WatchCondition.Condition.inf
        optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
        parameters=[],
        supported_target_type=TargetTypeEnum.TENSOR,
        supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU),
        minimum_debugger_capability=(1, 0)
    ),
    Condition(
        condition_id=ConditionIdEnum.MAX_GT,
        abbr="MAX>",
        # Send this condition to MindSpore will use WatchCondition.Condition.max_gt
        optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
        parameters=[
            ConditionParameter(
                name="param",
                value_type=ValueTypeEnum.FLOAT64,
                valid_test_func=check_normal_param_range
            )
        ],
        supported_target_type=TargetTypeEnum.TENSOR,
        supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU),
        minimum_debugger_capability=(1, 0)
    ),
    Condition(
        condition_id=ConditionIdEnum.MAX_LT,
        abbr="MAX<",
        # Send this condition to MindSpore will use WatchCondition.Condition.max_lt
        optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
        parameters=[
            ConditionParameter(
                name="param",
                value_type=ValueTypeEnum.FLOAT64,
                valid_test_func=check_normal_param_range
            )
        ],
        supported_target_type=TargetTypeEnum.TENSOR,
        supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU),
        minimum_debugger_capability=(1, 0)
    ),
    Condition(
        condition_id=ConditionIdEnum.MIN_GT,
        abbr="MIN>",
        # Send this condition to MindSpore will use WatchCondition.Condition.min_gt
        optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
        parameters=[
            ConditionParameter(
                name="param",
                value_type=ValueTypeEnum.FLOAT64,
                valid_test_func=check_normal_param_range
            )
        ],
        supported_target_type=TargetTypeEnum.TENSOR,
        supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU),
        minimum_debugger_capability=(1, 0)
    ),
    Condition(
        condition_id=ConditionIdEnum.MIN_LT,
        abbr="MIN<",
        # Send this condition to MindSpore will use WatchCondition.Condition.min_lt
        optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
        parameters=[
            ConditionParameter(
                name="param",
                value_type=ValueTypeEnum.FLOAT64,
                valid_test_func=check_normal_param_range
            )
        ],
        supported_target_type=TargetTypeEnum.TENSOR,
        supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU),
        minimum_debugger_capability=(1, 0)
    ),
    Condition(
        condition_id=ConditionIdEnum.MAX_MIN_GT,
        abbr="MAX-MIN>",
        # Send this condition to MindSpore will use WatchCondition.Condition.max_min_gt
        optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
        parameters=[
            ConditionParameter(
                name="param",
                value_type=ValueTypeEnum.FLOAT64,
                valid_test_func=check_normal_param_range
            )
        ],
        supported_target_type=TargetTypeEnum.TENSOR,
        supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU),
        minimum_debugger_capability=(1, 0)
    ),
    Condition(
        condition_id=ConditionIdEnum.MAX_MIN_LT,
        abbr="MAX-Min<",
        # Send this condition to MindSpore will use WatchCondition.Condition.max_min_lt
        optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
        parameters=[
            ConditionParameter(
                name="param",
                value_type=ValueTypeEnum.FLOAT64,
                valid_test_func=check_normal_param_range
            )
        ],
        supported_target_type=TargetTypeEnum.TENSOR,
        supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU),
        minimum_debugger_capability=(1, 0)
    ),
    Condition(
        condition_id=ConditionIdEnum.MEAN_GT,
        abbr="MEAN>",
        # Send this condition to MindSpore will use WatchCondition.Condition.mean_gt
        optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
        parameters=[
            ConditionParameter(
                name="param",
                value_type=ValueTypeEnum.FLOAT64,
                valid_test_func=check_normal_param_range
            )
        ],
        supported_target_type=TargetTypeEnum.TENSOR,
        supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU),
        minimum_debugger_capability=(1, 0)
    ),
    Condition(
        condition_id=ConditionIdEnum.MEAN_LT,
        abbr="MEAN<",
        # Send this condition to MindSpore will use WatchCondition.Condition.mean_lt
        optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
        parameters=[
            ConditionParameter(
                name="param",
                value_type=ValueTypeEnum.FLOAT64,
                valid_test_func=check_normal_param_range
            )
        ],
        supported_target_type=TargetTypeEnum.TENSOR,
        supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU),
        minimum_debugger_capability=(1, 0)
    ),
    Condition(
        condition_id=ConditionIdEnum.TENSOR_INITIALIZATION,
        abbr="TI",
@@ -578,13 +419,13 @@ CONDITION_LIST = [
            ConditionParameter(
                name="range_start_inclusive",
                value_type=ValueTypeEnum.FLOAT64,
                valid_test_func=check_not_nan,
                valid_test_func=check_normal_param_range,
                param_type=ParamTypeEnum.SUPPORT_PARAM
            ),
            ConditionParameter(
                name="range_end_inclusive",
                value_type=ValueTypeEnum.FLOAT64,
                valid_test_func=check_not_nan,
                valid_test_func=check_normal_param_range,
                param_type=ParamTypeEnum.SUPPORT_PARAM
            ),
            ConditionParameter(
@@ -623,13 +464,13 @@ CONDITION_LIST = [
            ConditionParameter(
                name="range_start_inclusive",
                value_type=ValueTypeEnum.FLOAT64,
                valid_test_func=check_not_nan,
                valid_test_func=check_normal_param_range,
                param_type=ParamTypeEnum.SUPPORT_PARAM
            ),
            ConditionParameter(
                name="range_end_inclusive",
                value_type=ValueTypeEnum.FLOAT64,
                valid_test_func=check_not_nan,
                valid_test_func=check_normal_param_range,
                param_type=ParamTypeEnum.SUPPORT_PARAM
            ),
            ConditionParameter(
--- a/mindinsight/debugger/conditionmgr/conditionmgr.py
+++ b/mindinsight/debugger/conditionmgr/conditionmgr.py
@@ -46,30 +46,6 @@ class ConditionMgr:
        for condition in conditions:
            self.register_condition(condition)

    def get_all(self, condition_context):
        """Get all register conditions."""
        conditions = []
        for condition in self.conditions.values():
            parameters = []
            if not condition.is_available(condition_context):
                continue
            for param in condition.parameters:
                if not param.visible_on_ui:
                    continue
                parameters.append({
                    "name": param.name,
                    "type": param.type.name,
                    "support_disable": param.support_disable,
                    "default_value": param.default_value
                })
            conditions.append({
                "id": condition.id,
                "parameters": parameters,
                "supported_target_type": condition.supported_target_type.name
            })
        conditions = sorted(conditions, key=lambda x: x.get('id'))
        return {"conditions": conditions}

    def get_condition(self, condition_id) -> Condition:
        """Get condition by condition id"""
        return self.conditions[condition_id]
@@ -126,9 +102,9 @@ class ConditionMgr:
            })

        reply = []
        self.check_and_sort(collections, TargetTypeEnum.ACTIVATION.value, reply)
        self.check_and_sort(collections, TargetTypeEnum.GRADIENT.value, reply)
        self.check_and_sort(collections, TargetTypeEnum.TENSOR.value, reply)
        self.check_and_sort(collections, TargetTypeEnum.WEIGHT.value, reply)
        self.check_and_sort(collections, TargetTypeEnum.ACTIVATION.value, reply)
        self.check_and_sort(collections, TargetTypeEnum.GRADIENT.value, reply)

        return reply
--- a/mindinsight/debugger/conditionmgr/recommender.py
+++ b/mindinsight/debugger/conditionmgr/recommender.py
@@ -106,7 +106,7 @@ def recommend_watchpoints(condition_mgr: ConditionMgr, graph_stream, condition_c

    # add tensor watch points
    merged_info = get_basic_node_info(TargetTypeEnum.TENSOR.value, graph_stream)
    _recommend_overflow_ascend_chip(merged_info, condition_mgr, watch_points, condition_context)
    _recommend_operator_overflow(merged_info, condition_mgr, watch_points, condition_context)
    _recommend_tensor_overflow(merged_info, condition_mgr, watch_points, condition_context)
    _recommend_tensor_all_zero(merged_info, condition_mgr, watch_points, condition_context)

@@ -165,21 +165,21 @@ def _recommend_tensor_overflow(basic_info_nodes, condition_mgr, watch_points, co
    watch_points.append(overflow_watchpoint)


 def _recommend_overflow_ascend_chip(basic_info_nodes, condition_mgr, watch_points, condition_context):
 def _recommend_operator_overflow(basic_info_nodes, condition_mgr, watch_points, condition_context):
    """Recommend tensor overflow watchpoint."""
    if not basic_info_nodes:
        return
    if not condition_mgr.has_condition(ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value, condition_context):
    if not condition_mgr.has_condition(ConditionIdEnum.OPERATOR_OVERFLOW.value, condition_context):
        return

    condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value)
    condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.OPERATOR_OVERFLOW.value)
    overflow_d_watchpoint = _WatchPointData(
        watch_condition={
            "condition": condition.id,
            "params": []
        },
        watch_nodes=basic_info_nodes.copy(),
        name='recommend_overflow_ascend_chip_watchpoint'
        name='recommend_operator_overflow_watchpoint'
    )
    watch_points.append(overflow_d_watchpoint)

--- a/mindinsight/debugger/debugger_server.py
+++ b/mindinsight/debugger/debugger_server.py
@@ -68,17 +68,10 @@ class DebuggerServer:
        self.grpc_server_manager = None
        self.back_server = None

    def get_conditions(self, train_id):
        """Get all default conditions"""
        metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA)
        condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 0))
        log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend)
        return self.condition_mgr.get_all(condition_context)

    def get_condition_collections(self, train_id):
        """Get default condition_collections"""
        metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA)
        condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 0))
        condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 1))
        log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend)
        return self.condition_mgr.get_all_collections(condition_context)

@@ -88,7 +81,7 @@ class DebuggerServer:
            log.error("Bool param should be given for set_recommended")
            raise DebuggerParamValueError("Bool param should be given.")
        metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA)
        condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 0))
        condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 1))
        log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend)
        res = metadata_stream.get(['state', 'enable_recheck'])
        if set_recommended and not metadata_stream.recommendation_confirmed:
--- a/mindinsight/debugger/proto/debug_grpc.proto
+++ b/mindinsight/debugger/proto/debug_grpc.proto
@@ -91,17 +91,7 @@ message ViewCMD {

 message WatchCondition {
  enum Condition {
    nan = 0;
    inf = 1;
    overflow = 2;
    max_gt = 3;
    max_lt = 4;
    min_gt = 5;
    min_lt = 6;
    max_min_gt = 7;
    max_min_lt = 8;
    mean_gt = 9;
    mean_lt = 10;
    sd_gt = 11;
    sd_lt = 12;
    tensor_general_overflow = 13;
--- a/mindinsight/debugger/stream_cache/watchpoint.py
+++ b/mindinsight/debugger/stream_cache/watchpoint.py
@@ -29,18 +29,7 @@ WATCHPOINT_CONDITION_MAPPING = {
    ConditionIdEnum.GRADIENT_EXPLODING.value: WatchCondition.Condition.tensor_general_overflow,
    ConditionIdEnum.GRADIENT_TOO_LARGE.value: WatchCondition.Condition.tensor_too_large,
    ConditionIdEnum.GRADIENT_VANISHING.value: WatchCondition.Condition.tensor_too_small,
    ConditionIdEnum.INF.value: WatchCondition.Condition.inf,
    ConditionIdEnum.MAX_GT.value: WatchCondition.Condition.max_gt,
    ConditionIdEnum.MAX_LT.value: WatchCondition.Condition.max_lt,
    ConditionIdEnum.MAX_MIN_GT.value: WatchCondition.Condition.max_min_gt,
    ConditionIdEnum.MAX_MIN_LT.value: WatchCondition.Condition.max_min_lt,
    ConditionIdEnum.MEAN_GT.value: WatchCondition.Condition.mean_gt,
    ConditionIdEnum.MEAN_LT.value: WatchCondition.Condition.mean_lt,
    ConditionIdEnum.MIN_GT.value: WatchCondition.Condition.min_gt,
    ConditionIdEnum.MIN_LT.value: WatchCondition.Condition.min_lt,
    ConditionIdEnum.NAN.value: WatchCondition.Condition.nan,
    ConditionIdEnum.OPERATOR_OVERFLOW.value: WatchCondition.Condition.overflow,
    ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value: WatchCondition.Condition.overflow,
    ConditionIdEnum.TENSOR_ALL_ZERO.value: WatchCondition.Condition.tensor_all_zero,
    ConditionIdEnum.TENSOR_INITIALIZATION.value: WatchCondition.Condition.tensor_initialization,
    ConditionIdEnum.TENSOR_OVERFLOW.value: WatchCondition.Condition.tensor_general_overflow,
--- a/mindinsight/debugger/stream_operator/watchpoint_operator.py
+++ b/mindinsight/debugger/stream_operator/watchpoint_operator.py
@@ -95,13 +95,9 @@ class WatchpointOperator:
    def _validate_watch_condition(self, watch_condition):
        """Validate watch condition."""
        metadata_stream = self._metadata_stream
        if metadata_stream.backend == 'GPU' and watch_condition.get('id') in (
                ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value, ConditionIdEnum.OPERATOR_OVERFLOW.value):
        if metadata_stream.backend == 'GPU' and watch_condition.get('id') == ConditionIdEnum.OPERATOR_OVERFLOW.value:
            log.error("GPU doesn't support overflow watch condition.")
            raise DebuggerParamValueError("GPU doesn't support overflow watch condition.")
        if metadata_stream.backend == 'Ascend' and watch_condition.get('id') == ConditionIdEnum.NAN.value:
            log.error("Ascend doesn't support nan watch condition.")
            raise DebuggerParamValueError("Ascend doesn't support nan watch condition.")

    def update_watchpoint(self, params):
        """
--- a/tests/st/func/debugger/expect_results/restful_results/create_and_delete_watchpoint.json
+++ b/tests/st/func/debugger/expect_results/restful_results/create_and_delete_watchpoint.json
@@ -1 +1 @@
 {"watch_points": [{"id": 1, "watch_condition": {"id": "max_gt", "params": [{"name": "param", "value": 1.0}], "abbr": "MAX>"}}, {"id": 2, "watch_condition": {"id": "max_lt", "params": [{"name": "param", "value": -1.0}], "abbr": "MAX<"}}, {"id": 3, "watch_condition": {"id": "min_gt", "params": [{"name": "param", "value": 1e+32}], "abbr": "MIN>"}}, {"id": 5, "watch_condition": {"id": "max_min_gt", "params": [{"name": "param", "value": 0}], "abbr": "MAX-MIN>"}}, {"id": 6, "watch_condition": {"id": "max_min_lt", "params": [{"name": "param", "value": 0}], "abbr": "MAX-Min<"}}, {"id": 7, "watch_condition": {"id": "mean_gt", "params": [{"name": "param", "value": 0}], "abbr": "MEAN>"}}, {"id": 8, "watch_condition": {"id": "mean_lt", "params": [{"name": "param", "value": 0}], "abbr": "MEAN<"}}, {"id": 9, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}, {"id": 10, "watch_condition": {"id": "overflow", "params": [], "abbr": "OVERFLOW"}}]}
 {"watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0}], "abbr": "TL"}}, {"id": 2, "watch_condition": {"id": "tensor_too_small", "params": [{"name": "max_lt", "value": -1.0}], "abbr": "TS"}}, {"id": 3, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "min_gt", "value": 1e+32}], "abbr": "TL"}}, {"id": 5, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "mean_gt", "value": 0}], "abbr": "TL"}}, {"id": 6, "watch_condition": {"id": "tensor_too_small", "params": [{"name": "mean_lt", "value": 0}], "abbr": "TS"}}]}
--- a/tests/st/func/debugger/expect_results/restful_results/get_conditions_for_ascend.json
+++ b/tests/st/func/debugger/expect_results/restful_results/get_conditions_for_ascend.json
--- a/tests/st/func/debugger/expect_results/restful_results/get_conditions_for_gpu.json
+++ b/tests/st/func/debugger/expect_results/restful_results/get_conditions_for_gpu.json
--- a/tests/st/func/debugger/expect_results/restful_results/retrieve_tensor_graph-0.json
+++ b/tests/st/func/debugger/expect_results/restful_results/retrieve_tensor_graph-0.json
@@ -1 +1 @@
 {"graph": {"nodes": [{"name": "Default/args0", "full_name": "Default/args0", "type": "Parameter", "input": {}, "output": {"Default/TransData-op99": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "slots": [{"slot": "0", "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3], "has_prev_step": true}], "graph_name": "graph_0"}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", "type": "Cast", "input": {"Default/TransData-op99": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "output": {}, "slots": [{"slot": "0", "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3]}], "graph_name": "graph_0"}, {"name": "Default/TransData-op99", "full_name": "Default/TransData-op99", "type": "TransData", "input": {"Default/args0": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "output": {"Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "slots": [{"slot": "0", "summarized_error_code": 0, "watch_points": [{"id": 1, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}, "error_code": 0}], "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3]}], "graph_name": "graph_0"}]}}
 {"graph": {"nodes": [{"name": "Default/args0", "full_name": "Default/args0", "type": "Parameter", "input": {}, "output": {"Default/TransData-op99": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "slots": [{"slot": "0", "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3], "has_prev_step": true}], "graph_name": "graph_0"}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", "type": "Cast", "input": {"Default/TransData-op99": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "output": {}, "slots": [{"slot": "0", "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3]}], "graph_name": "graph_0"}, {"name": "Default/TransData-op99", "full_name": "Default/TransData-op99", "type": "TransData", "input": {"Default/args0": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "output": {"Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "slots": [{"slot": "0", "summarized_error_code": 0, "watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0, "actual_value": null}], "abbr": "TL"}, "error_code": 0}], "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3]}], "graph_name": "graph_0"}]}}
--- a/tests/st/func/debugger/expect_results/restful_results/retrieve_watchpoint_hit.json
+++ b/tests/st/func/debugger/expect_results/restful_results/retrieve_watchpoint_hit.json
@@ -1,47 +1 @@
 {
  "watch_point_hits": [
    {
      "node_name": "Default/TransData-op99",
      "tensors": [
        {
          "slot": "0",
          "summarized_error_code": 0,
          "watch_points": [
            {
              "id": 1,
              "watch_condition": {
                "id": "inf",
                "params": [],
                "abbr": "INF"
              },
              "error_code": 0
            }
          ]
        }
      ],
      "graph_name": "graph_0"
    },
    {
      "node_name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25",
      "tensors": [
        {
          "slot": "0",
          "summarized_error_code": 0,
          "watch_points": [
            {
              "id": 1,
              "watch_condition": {
                "id": "inf",
                "params": [],
                "abbr": "INF"
              },
              "error_code": 0
            }
          ]
        }
      ],
      "graph_name": "graph_0"
    }
  ],
  "outdated": false
 }
 {"watch_point_hits": [{"node_name": "Default/TransData-op99", "tensors": [{"slot": "0", "summarized_error_code": 0, "watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0, "actual_value": null}], "abbr": "TL"}, "error_code": 0}]}], "graph_name": "graph_0"}, {"node_name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25", "tensors": [{"slot": "0", "summarized_error_code": 0, "watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0, "actual_value": null}], "abbr": "TL"}, "error_code": 0}]}], "graph_name": "graph_0"}], "outdated": false}
--- a/tests/st/func/debugger/test_restful_api.py
+++ b/tests/st/func/debugger/test_restful_api.py
@@ -84,7 +84,7 @@ class TestAscendDebugger:

    def test_get_conditions(self, app_client):
        """Test get conditions for ascend."""
        url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/conditions'
        url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/condition-collections'
        body_data = {}
        expect_file = 'get_conditions_for_ascend.json'
        with self._debugger_client.get_thread_instance():
@@ -131,16 +131,12 @@ class TestAscendDebugger:
        with self._debugger_client.get_thread_instance():
            check_state(app_client)
            conditions = [
                {'id': 'max_gt', 'params': [{'name': 'param', 'value': 1.0}]},
                {'id': 'max_lt', 'params': [{'name': 'param', 'value': -1.0}]},
                {'id': 'min_gt', 'params': [{'name': 'param', 'value': 1e+32}]},
                {'id': 'min_lt', 'params': [{'name': 'param', 'value': -1e+32}]},
                {'id': 'max_min_gt', 'params': [{'name': 'param', 'value': 0}]},
                {'id': 'max_min_lt', 'params': [{'name': 'param', 'value': 0}]},
                {'id': 'mean_gt', 'params': [{'name': 'param', 'value': 0}]},
                {'id': 'mean_lt', 'params': [{'name': 'param', 'value': 0}]},
                {'id': 'inf', 'params': []},
                {'id': 'overflow', 'params': []},
                {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]},
                {'id': 'tensor_too_small', 'params': [{'name': 'max_lt', 'value': -1.0}]},
                {'id': 'tensor_too_large', 'params': [{'name': 'min_gt', 'value': 1e+32}]},
                {'id': 'tensor_too_small', 'params': [{'name': 'min_lt', 'value': -1e+32}]},
                {'id': 'tensor_too_large', 'params': [{'name': 'mean_gt', 'value': 0}]},
                {'id': 'tensor_too_small', 'params': [{'name': 'mean_lt', 'value': 0}]}
            ]
            for idx, condition in enumerate(conditions):
                create_watchpoint(app_client, condition, idx + 1)
@@ -167,7 +163,7 @@ class TestAscendDebugger:
        leaf_node_name = 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias'
        with self._debugger_client.get_thread_instance():
            check_state(app_client)
            condition = {'id': 'inf', 'params': []}
            condition = {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}
            create_watchpoint(app_client, condition, watch_point_id)
            # update watchpoint watchpoint list
            url = 'update_watchpoint'
@@ -327,7 +323,7 @@ class TestAscendDebugger:
    @pytest.mark.platform_x86_ascend_training
    @pytest.mark.parametrize("url, body_data, enable_recheck", [
        ('create_watchpoint',
         {'condition': {'id': 'inf', 'params': []},
         {'condition': {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]},
          'watch_nodes': ['Default']}, True),
        ('update_watchpoint',
         {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7'],
@@ -434,10 +430,10 @@ class TestGPUDebugger:
    @pytest.mark.platform_x86_ascend_training
    @pytest.mark.parametrize("url, body_data, enable_recheck", [
        ('create_watchpoint',
         {'condition': {'id': 'inf', 'params': []},
         {'condition': {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]},
          'watch_nodes': ['Default']}, True),
        ('create_watchpoint',
         {'condition': {'id': 'inf', 'params': []},
         {'condition': {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]},
          'watch_nodes': ['Default/TransData-op99']}, True),
        ('update_watchpoint',
         {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7'],
@@ -472,7 +468,7 @@ class TestGPUDebugger:

    def test_get_conditions(self, app_client):
        """Test get conditions for gpu."""
        url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/conditions'
        url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/condition-collections'
        body_data = {}
        expect_file = 'get_conditions_for_gpu.json'
        with self._debugger_client.get_thread_instance():
@@ -493,7 +489,7 @@ class TestGPUDebugger:
            # send recheck when disable to do recheck
            get_request_result(app_client, 'recheck', {}, method='post', expect_code=400)
            # send recheck when enable to do recheck
            create_watchpoint(app_client, {'id': 'inf', 'params': []}, 2)
            create_watchpoint(app_client, {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}, 2)
            res = get_request_result(app_client, 'recheck', {}, method='post')
            assert res['metadata']['enable_recheck'] is False

@@ -579,10 +575,10 @@ class TestMultiGraphDebugger:
    @pytest.mark.platform_x86_gpu_training
    @pytest.mark.platform_x86_ascend_training
    @pytest.mark.parametrize("filter_condition, expect_id", [
        ({'condition': {'id': 'inf'},
        ({'condition': {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]},
          'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7'],
          'graph_name': 'graph_0'}, 1),
        ({'condition': {'id': 'inf'},
        ({'condition': {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]},
          'watch_nodes': ['graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1'],
          'graph_name': None}, 1)
    ])
@@ -665,7 +661,8 @@ def create_watchpoint(app_client, condition, expect_id):
 def create_watchpoint_and_wait(app_client):
    """Preparation for recheck."""
    check_state(app_client)
    create_watchpoint(app_client, condition={'id': 'inf', 'params': []}, expect_id=1)
    create_watchpoint(app_client, condition={'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]},
                      expect_id=1)
    # send run command to get watchpoint hit
    url = 'control'
    body_data = {'mode': 'continue',
--- a/tests/st/func/debugger/utils.py
+++ b/tests/st/func/debugger/utils.py
@@ -74,7 +74,7 @@ def send_and_save_result(app_client, url, body_data, file_path, method='post'):

 def delete_random_items(res):
    """delete the random items in metadata."""
    if res.get('metadata'):
    if isinstance(res, dict) and res.get('metadata'):
        if res['metadata'].get('ip'):
            res['metadata'].pop('ip')
        if res['metadata'].get('pos'):
--- a/tests/ut/debugger/expected_results/watchpoint/watchpoint_handler_get_0.json
+++ b/tests/ut/debugger/expected_results/watchpoint/watchpoint_handler_get_0.json
@@ -1,25 +1,5 @@
 [
  {
    "watchCondition": {
      "condition": "inf"
    },
    "id": 1,
    "watch_nodes_num": 0
  },
  {
    "watchCondition": {
      "condition": "inf"
    },
    "id": 2,
    "watch_nodes_num": 172
  },
  {
    "watchCondition": {
      "condition": "max_gt",
      "params": [{"name": "param", "value": 1}],
      "value": 1
    },
    "id": 3,
    "watch_nodes_num": 1
  }
  {"watchCondition": {"condition": "tensor_too_small", "value": 1.0, "params": [{"name": "abs_mean_lt", "disabled": true}, {"name": "max_lt", "value": 1.0}, {"name": "min_lt", "disabled": true}, {"name": "mean_lt", "disabled": true}]}, "id": 1, "watch_nodes_num": 0},
  {"watchCondition": {"condition": "tensor_too_small", "value": 1.0, "params": [{"name": "abs_mean_lt", "disabled": true}, {"name": "max_lt", "disabled": true}, {"name": "min_lt", "value": 1.0}, {"name": "mean_lt", "disabled": true}]}, "id": 2, "watch_nodes_num": 172},
  {"watchCondition": {"condition": "tensor_too_large", "value": 1.0, "params": [{"name": "abs_mean_gt", "disabled": true}, {"name": "max_gt", "value": 1.0}, {"name": "min_gt", "disabled": true}, {"name": "mean_gt", "disabled": true}]}, "id": 3, "watch_nodes_num": 1}
 ]
--- a/tests/ut/debugger/expected_results/watchpoint/watchpoint_handler_get_1.json
+++ b/tests/ut/debugger/expected_results/watchpoint/watchpoint_handler_get_1.json
@@ -1 +1 @@
 [{"id": 1, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}, {"id": 2, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}, {"id": 3, "watch_condition": {"id": "max_gt", "params": [{"name": "param", "value": 1}], "abbr": "MAX>"}}]
 [{"id": 1, "watch_condition": {"id": "tensor_too_small", "params": [{"name": "max_lt", "value": 1.0}], "abbr": "TS"}}, {"id": 2, "watch_condition": {"id": "tensor_too_small", "params": [{"name": "min_lt", "value": 1.0}], "abbr": "TS"}}, {"id": 3, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0}], "abbr": "TL"}}]
--- a/tests/ut/debugger/stream_handler/test_watchpoint_handler.py
+++ b/tests/ut/debugger/stream_handler/test_watchpoint_handler.py
@@ -61,9 +61,9 @@ class TestWatchpointHandler:
    def _create_watchpoint(self):
        """Test create_watchpoint."""
        watchpoints = [
            ({'id': 'inf', 'params': []}, None, None, 1),
            ({'id': 'inf', 'params': []}, ["Default"], None, 2),
            ({'id': 'max_gt', 'params': [{'name': 'param', 'value': 1}]},
            ({'id': 'tensor_too_small', 'params': [{'name': 'max_lt', 'value': 1.0}]}, None, None, 1),
            ({'id': 'tensor_too_small', 'params': [{'name': 'min_lt', 'value': 1.0}]}, ["Default"], None, 2),
            ({'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]},
             ["Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92"],
             None, 3)
        ]
@@ -160,7 +160,8 @@ class TestWatchpointHandler:
                                        expect_deleted_ids):
        """Test delete_watchpoint."""
        for _ in range(watch_point_id):
            self.handler.create_watchpoint(self.conditionmgr, {'id': 'inf', 'param': []})
            self.handler.create_watchpoint(self.conditionmgr,
                                           {'id': 'tensor_too_small', 'params': [{'name': 'max_lt', 'value': 1.0}]})
        with TestCase().assertLogs(logger=log, level='DEBUG') as log_content:
            self.handler.delete_watchpoint(watch_point_id)
        TestCase().assertIn(
@@ -233,13 +234,13 @@ def test_validate_watch_condition_type_error():

 def test_validate_watch_condition_params_except():
    """Test validate_watch_condition_params."""
    watch_condition = {'id': 'inf', 'params': [{'name': 'param', 'value': 0}]}
    watch_condition = {'id': 'weight_overflow', 'params': [{'name': 'param', 'value': 0}]}
    conditionmgr = ConditionMgr()
    with pytest.raises(DebuggerParamValueError) as err:
        validate_watch_condition_params(conditionmgr, watch_condition)
    assert err.value.error_code == '5054B081'

    watch_condition = {'id': 'max_gt', 'params': [{'name': 'param', 'value': '0'}]}
    watch_condition = {'id': 'tensor_overflow', 'params': [{'name': 'param', 'value': '0'}]}
    with pytest.raises(DebuggerParamValueError) as err:
        validate_watch_condition_params(conditionmgr, watch_condition)
    assert err.value.error_code == '5054B081'
--- a/tests/ut/debugger/test_debugger_server.py
+++ b/tests/ut/debugger/test_debugger_server.py
@@ -199,8 +199,9 @@ class TestDebuggerServer:
    def test_create_watchpoint(self, *args):
        """Test create watchpoint."""
        args[0].return_value = 1
        res = self._server.create_watchpoint({'watch_condition': {'id': 'inf'},
                                              'watch_nodes': ['watch_node_name']})
        res = self._server.create_watchpoint(
            {'watch_condition': {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]},
             'watch_nodes': ['watch_node_name']})
        assert res == {'id': 1, 'metadata': {'enable_recheck': False, 'state': 'waiting'}}

    @mock.patch.object(MetadataHandler, 'state', 'waiting')