From 254690e254e135dcf8cd6bd8ed43fb5010f38aed Mon Sep 17 00:00:00 2001 From: jiangshuqiang <962978787@qq.com> Date: Sat, 28 Nov 2020 16:28:25 +0800 Subject: [PATCH] update the version of debugger from 1.0 to 1.1 delete old version conditions and old condition api fix condition collection orders fix tests --- .../backend/conditionmgr/conditionmgr_api.py | 7 - mindinsight/conf/constants.py | 2 +- .../debugger/conditionmgr/condition.py | 17 -- .../debugger/conditionmgr/condition_list.py | 169 +----------------- .../debugger/conditionmgr/conditionmgr.py | 28 +-- .../debugger/conditionmgr/recommender.py | 10 +- mindinsight/debugger/debugger_server.py | 11 +- mindinsight/debugger/proto/debug_grpc.proto | 10 -- .../debugger/stream_cache/watchpoint.py | 11 -- .../stream_operator/watchpoint_operator.py | 6 +- .../create_and_delete_watchpoint.json | 2 +- .../get_conditions_for_ascend.json | 2 +- .../get_conditions_for_gpu.json | 2 +- .../retrieve_tensor_graph-0.json | 2 +- .../retrieve_watchpoint_hit.json | 48 +---- tests/st/func/debugger/test_restful_api.py | 37 ++-- tests/st/func/debugger/utils.py | 2 +- .../watchpoint/watchpoint_handler_get_0.json | 26 +-- .../watchpoint/watchpoint_handler_get_1.json | 2 +- .../stream_handler/test_watchpoint_handler.py | 13 +- tests/ut/debugger/test_debugger_server.py | 5 +- 21 files changed, 53 insertions(+), 359 deletions(-) diff --git a/mindinsight/backend/conditionmgr/conditionmgr_api.py b/mindinsight/backend/conditionmgr/conditionmgr_api.py index 788586fe..936cd78d 100644 --- a/mindinsight/backend/conditionmgr/conditionmgr_api.py +++ b/mindinsight/backend/conditionmgr/conditionmgr_api.py @@ -26,13 +26,6 @@ BLUEPRINT = Blueprint("conditionmgr", __name__, url_prefix=settings.URL_PATH_PREFIX + settings.API_PREFIX) -@BLUEPRINT.route("/conditionmgr/train-jobs//conditions", methods=["GET"]) -def get_conditions(train_id): - """get conditions""" - reply = _wrap_reply(BACKEND_SERVER.get_conditions, train_id) - return reply - - @BLUEPRINT.route("/conditionmgr/train-jobs//condition-collections", methods=["GET"]) def get_condition_collections(train_id): """get condition collections""" diff --git a/mindinsight/conf/constants.py b/mindinsight/conf/constants.py index aebeef8f..db0bb3db 100644 --- a/mindinsight/conf/constants.py +++ b/mindinsight/conf/constants.py @@ -64,4 +64,4 @@ MAX_HISTOGRAM_STEP_SIZE_PER_TAG = 50 MAX_TENSOR_STEP_SIZE_PER_TAG = 20 MAX_TENSOR_RESPONSE_DATA_SIZE = 100000 -ENABLE_RECOMMENDED_WATCHPOINTS = False +ENABLE_RECOMMENDED_WATCHPOINTS = True diff --git a/mindinsight/debugger/conditionmgr/condition.py b/mindinsight/debugger/conditionmgr/condition.py index ae6f1564..50686b0d 100644 --- a/mindinsight/debugger/conditionmgr/condition.py +++ b/mindinsight/debugger/conditionmgr/condition.py @@ -18,8 +18,6 @@ Management of all conditions. This module is used to register all conditions, as well as their parameters. This module also provide the available conditions to condition_collections api. """ -import math - from enum import Enum from mindinsight.debugger.conditionmgr.log import logger @@ -35,17 +33,6 @@ class ConditionIdEnum(Enum): GRADIENT_EXPLODING = "gradient_exploding" TENSOR_OVERFLOW = "tensor_overflow" OPERATOR_OVERFLOW = "operator_overflow" - NAN = "nan" - OVERFLOW_ASCEND_CHIP = "overflow" - INF = "inf" - MAX_GT = "max_gt" - MAX_LT = "max_lt" - MIN_GT = "min_gt" - MIN_LT = "min_lt" - MAX_MIN_GT = "max_min_gt" - MAX_MIN_LT = "max_min_lt" - MEAN_GT = "mean_gt" - MEAN_LT = "mean_lt" TENSOR_INITIALIZATION = "tensor_initialization" TENSOR_TOO_LARGE = "tensor_too_large" TENSOR_TOO_SMALL = "tensor_too_small" @@ -287,7 +274,3 @@ def check_abs_param_range(value): if 0 <= value < float("inf"): return True return False - - -def check_not_nan(value): - return not math.isnan(value) diff --git a/mindinsight/debugger/conditionmgr/condition_list.py b/mindinsight/debugger/conditionmgr/condition_list.py index 5821b891..d64e3d7e 100644 --- a/mindinsight/debugger/conditionmgr/condition_list.py +++ b/mindinsight/debugger/conditionmgr/condition_list.py @@ -29,7 +29,6 @@ from mindinsight.debugger.conditionmgr.condition import check_initialization_ava from mindinsight.debugger.conditionmgr.condition import check_normal_param_range from mindinsight.debugger.conditionmgr.condition import check_percentage_param_range from mindinsight.debugger.conditionmgr.condition import check_abs_param_range -from mindinsight.debugger.conditionmgr.condition import check_not_nan CONDITION_LIST = [ @@ -67,7 +66,7 @@ CONDITION_LIST = [ # Send this condition to MindSpore will use WatchCondition.Condition.tensor_general_overflow optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, parameters=[], - supported_target_type=TargetTypeEnum.TENSOR, + supported_target_type=TargetTypeEnum.WEIGHT, supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), minimum_debugger_capability=(1, 1) ), @@ -225,164 +224,6 @@ CONDITION_LIST = [ supported_platforms=(PlatformEnum.ASCEND,), minimum_debugger_capability=(1, 1) ), - Condition( - condition_id=ConditionIdEnum.NAN, - abbr="NAN", - # Send this condition to MindSpore will use WatchCondition.Condition.nan - optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, - parameters=[], - supported_target_type=TargetTypeEnum.TENSOR, - supported_platforms=(PlatformEnum.GPU,), - minimum_debugger_capability=(1, 0) - ), - Condition( - condition_id=ConditionIdEnum.OVERFLOW_ASCEND_CHIP, - abbr="OVERFLOW", - # Send this condition to MindSpore will use WatchCondition.Condition.overflow - optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, - parameters=[], - supported_target_type=TargetTypeEnum.TENSOR, - supported_platforms=(PlatformEnum.ASCEND,), - minimum_debugger_capability=(1, 0) - ), - Condition( - condition_id=ConditionIdEnum.INF, - abbr="INF", - # Send this condition to MindSpore will use WatchCondition.Condition.inf - optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, - parameters=[], - supported_target_type=TargetTypeEnum.TENSOR, - supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), - minimum_debugger_capability=(1, 0) - ), - Condition( - condition_id=ConditionIdEnum.MAX_GT, - abbr="MAX>", - # Send this condition to MindSpore will use WatchCondition.Condition.max_gt - optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, - parameters=[ - ConditionParameter( - name="param", - value_type=ValueTypeEnum.FLOAT64, - valid_test_func=check_normal_param_range - ) - ], - supported_target_type=TargetTypeEnum.TENSOR, - supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), - minimum_debugger_capability=(1, 0) - ), - Condition( - condition_id=ConditionIdEnum.MAX_LT, - abbr="MAX<", - # Send this condition to MindSpore will use WatchCondition.Condition.max_lt - optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, - parameters=[ - ConditionParameter( - name="param", - value_type=ValueTypeEnum.FLOAT64, - valid_test_func=check_normal_param_range - ) - ], - supported_target_type=TargetTypeEnum.TENSOR, - supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), - minimum_debugger_capability=(1, 0) - ), - Condition( - condition_id=ConditionIdEnum.MIN_GT, - abbr="MIN>", - # Send this condition to MindSpore will use WatchCondition.Condition.min_gt - optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, - parameters=[ - ConditionParameter( - name="param", - value_type=ValueTypeEnum.FLOAT64, - valid_test_func=check_normal_param_range - ) - ], - supported_target_type=TargetTypeEnum.TENSOR, - supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), - minimum_debugger_capability=(1, 0) - ), - Condition( - condition_id=ConditionIdEnum.MIN_LT, - abbr="MIN<", - # Send this condition to MindSpore will use WatchCondition.Condition.min_lt - optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, - parameters=[ - ConditionParameter( - name="param", - value_type=ValueTypeEnum.FLOAT64, - valid_test_func=check_normal_param_range - ) - ], - supported_target_type=TargetTypeEnum.TENSOR, - supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), - minimum_debugger_capability=(1, 0) - ), - Condition( - condition_id=ConditionIdEnum.MAX_MIN_GT, - abbr="MAX-MIN>", - # Send this condition to MindSpore will use WatchCondition.Condition.max_min_gt - optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, - parameters=[ - ConditionParameter( - name="param", - value_type=ValueTypeEnum.FLOAT64, - valid_test_func=check_normal_param_range - ) - ], - supported_target_type=TargetTypeEnum.TENSOR, - supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), - minimum_debugger_capability=(1, 0) - ), - Condition( - condition_id=ConditionIdEnum.MAX_MIN_LT, - abbr="MAX-Min<", - # Send this condition to MindSpore will use WatchCondition.Condition.max_min_lt - optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, - parameters=[ - ConditionParameter( - name="param", - value_type=ValueTypeEnum.FLOAT64, - valid_test_func=check_normal_param_range - ) - ], - supported_target_type=TargetTypeEnum.TENSOR, - supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), - minimum_debugger_capability=(1, 0) - ), - Condition( - condition_id=ConditionIdEnum.MEAN_GT, - abbr="MEAN>", - # Send this condition to MindSpore will use WatchCondition.Condition.mean_gt - optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, - parameters=[ - ConditionParameter( - name="param", - value_type=ValueTypeEnum.FLOAT64, - valid_test_func=check_normal_param_range - ) - ], - supported_target_type=TargetTypeEnum.TENSOR, - supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), - minimum_debugger_capability=(1, 0) - ), - Condition( - condition_id=ConditionIdEnum.MEAN_LT, - abbr="MEAN<", - # Send this condition to MindSpore will use WatchCondition.Condition.mean_lt - optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, - parameters=[ - ConditionParameter( - name="param", - value_type=ValueTypeEnum.FLOAT64, - valid_test_func=check_normal_param_range - ) - ], - supported_target_type=TargetTypeEnum.TENSOR, - supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), - minimum_debugger_capability=(1, 0) - ), Condition( condition_id=ConditionIdEnum.TENSOR_INITIALIZATION, abbr="TI", @@ -578,13 +419,13 @@ CONDITION_LIST = [ ConditionParameter( name="range_start_inclusive", value_type=ValueTypeEnum.FLOAT64, - valid_test_func=check_not_nan, + valid_test_func=check_normal_param_range, param_type=ParamTypeEnum.SUPPORT_PARAM ), ConditionParameter( name="range_end_inclusive", value_type=ValueTypeEnum.FLOAT64, - valid_test_func=check_not_nan, + valid_test_func=check_normal_param_range, param_type=ParamTypeEnum.SUPPORT_PARAM ), ConditionParameter( @@ -623,13 +464,13 @@ CONDITION_LIST = [ ConditionParameter( name="range_start_inclusive", value_type=ValueTypeEnum.FLOAT64, - valid_test_func=check_not_nan, + valid_test_func=check_normal_param_range, param_type=ParamTypeEnum.SUPPORT_PARAM ), ConditionParameter( name="range_end_inclusive", value_type=ValueTypeEnum.FLOAT64, - valid_test_func=check_not_nan, + valid_test_func=check_normal_param_range, param_type=ParamTypeEnum.SUPPORT_PARAM ), ConditionParameter( diff --git a/mindinsight/debugger/conditionmgr/conditionmgr.py b/mindinsight/debugger/conditionmgr/conditionmgr.py index cab74c72..6044441a 100644 --- a/mindinsight/debugger/conditionmgr/conditionmgr.py +++ b/mindinsight/debugger/conditionmgr/conditionmgr.py @@ -46,30 +46,6 @@ class ConditionMgr: for condition in conditions: self.register_condition(condition) - def get_all(self, condition_context): - """Get all register conditions.""" - conditions = [] - for condition in self.conditions.values(): - parameters = [] - if not condition.is_available(condition_context): - continue - for param in condition.parameters: - if not param.visible_on_ui: - continue - parameters.append({ - "name": param.name, - "type": param.type.name, - "support_disable": param.support_disable, - "default_value": param.default_value - }) - conditions.append({ - "id": condition.id, - "parameters": parameters, - "supported_target_type": condition.supported_target_type.name - }) - conditions = sorted(conditions, key=lambda x: x.get('id')) - return {"conditions": conditions} - def get_condition(self, condition_id) -> Condition: """Get condition by condition id""" return self.conditions[condition_id] @@ -126,9 +102,9 @@ class ConditionMgr: }) reply = [] - self.check_and_sort(collections, TargetTypeEnum.ACTIVATION.value, reply) - self.check_and_sort(collections, TargetTypeEnum.GRADIENT.value, reply) self.check_and_sort(collections, TargetTypeEnum.TENSOR.value, reply) self.check_and_sort(collections, TargetTypeEnum.WEIGHT.value, reply) + self.check_and_sort(collections, TargetTypeEnum.ACTIVATION.value, reply) + self.check_and_sort(collections, TargetTypeEnum.GRADIENT.value, reply) return reply diff --git a/mindinsight/debugger/conditionmgr/recommender.py b/mindinsight/debugger/conditionmgr/recommender.py index 3bdacd58..8b367928 100644 --- a/mindinsight/debugger/conditionmgr/recommender.py +++ b/mindinsight/debugger/conditionmgr/recommender.py @@ -106,7 +106,7 @@ def recommend_watchpoints(condition_mgr: ConditionMgr, graph_stream, condition_c # add tensor watch points merged_info = get_basic_node_info(TargetTypeEnum.TENSOR.value, graph_stream) - _recommend_overflow_ascend_chip(merged_info, condition_mgr, watch_points, condition_context) + _recommend_operator_overflow(merged_info, condition_mgr, watch_points, condition_context) _recommend_tensor_overflow(merged_info, condition_mgr, watch_points, condition_context) _recommend_tensor_all_zero(merged_info, condition_mgr, watch_points, condition_context) @@ -165,21 +165,21 @@ def _recommend_tensor_overflow(basic_info_nodes, condition_mgr, watch_points, co watch_points.append(overflow_watchpoint) -def _recommend_overflow_ascend_chip(basic_info_nodes, condition_mgr, watch_points, condition_context): +def _recommend_operator_overflow(basic_info_nodes, condition_mgr, watch_points, condition_context): """Recommend tensor overflow watchpoint.""" if not basic_info_nodes: return - if not condition_mgr.has_condition(ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value, condition_context): + if not condition_mgr.has_condition(ConditionIdEnum.OPERATOR_OVERFLOW.value, condition_context): return - condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value) + condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.OPERATOR_OVERFLOW.value) overflow_d_watchpoint = _WatchPointData( watch_condition={ "condition": condition.id, "params": [] }, watch_nodes=basic_info_nodes.copy(), - name='recommend_overflow_ascend_chip_watchpoint' + name='recommend_operator_overflow_watchpoint' ) watch_points.append(overflow_d_watchpoint) diff --git a/mindinsight/debugger/debugger_server.py b/mindinsight/debugger/debugger_server.py index 208421a5..bd79aefa 100644 --- a/mindinsight/debugger/debugger_server.py +++ b/mindinsight/debugger/debugger_server.py @@ -68,17 +68,10 @@ class DebuggerServer: self.grpc_server_manager = None self.back_server = None - def get_conditions(self, train_id): - """Get all default conditions""" - metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) - condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 0)) - log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend) - return self.condition_mgr.get_all(condition_context) - def get_condition_collections(self, train_id): """Get default condition_collections""" metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) - condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 0)) + condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 1)) log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend) return self.condition_mgr.get_all_collections(condition_context) @@ -88,7 +81,7 @@ class DebuggerServer: log.error("Bool param should be given for set_recommended") raise DebuggerParamValueError("Bool param should be given.") metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) - condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 0)) + condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 1)) log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend) res = metadata_stream.get(['state', 'enable_recheck']) if set_recommended and not metadata_stream.recommendation_confirmed: diff --git a/mindinsight/debugger/proto/debug_grpc.proto b/mindinsight/debugger/proto/debug_grpc.proto index 5c83413f..d3283e5e 100644 --- a/mindinsight/debugger/proto/debug_grpc.proto +++ b/mindinsight/debugger/proto/debug_grpc.proto @@ -91,17 +91,7 @@ message ViewCMD { message WatchCondition { enum Condition { - nan = 0; - inf = 1; overflow = 2; - max_gt = 3; - max_lt = 4; - min_gt = 5; - min_lt = 6; - max_min_gt = 7; - max_min_lt = 8; - mean_gt = 9; - mean_lt = 10; sd_gt = 11; sd_lt = 12; tensor_general_overflow = 13; diff --git a/mindinsight/debugger/stream_cache/watchpoint.py b/mindinsight/debugger/stream_cache/watchpoint.py index fe2e857d..c4c02a92 100644 --- a/mindinsight/debugger/stream_cache/watchpoint.py +++ b/mindinsight/debugger/stream_cache/watchpoint.py @@ -29,18 +29,7 @@ WATCHPOINT_CONDITION_MAPPING = { ConditionIdEnum.GRADIENT_EXPLODING.value: WatchCondition.Condition.tensor_general_overflow, ConditionIdEnum.GRADIENT_TOO_LARGE.value: WatchCondition.Condition.tensor_too_large, ConditionIdEnum.GRADIENT_VANISHING.value: WatchCondition.Condition.tensor_too_small, - ConditionIdEnum.INF.value: WatchCondition.Condition.inf, - ConditionIdEnum.MAX_GT.value: WatchCondition.Condition.max_gt, - ConditionIdEnum.MAX_LT.value: WatchCondition.Condition.max_lt, - ConditionIdEnum.MAX_MIN_GT.value: WatchCondition.Condition.max_min_gt, - ConditionIdEnum.MAX_MIN_LT.value: WatchCondition.Condition.max_min_lt, - ConditionIdEnum.MEAN_GT.value: WatchCondition.Condition.mean_gt, - ConditionIdEnum.MEAN_LT.value: WatchCondition.Condition.mean_lt, - ConditionIdEnum.MIN_GT.value: WatchCondition.Condition.min_gt, - ConditionIdEnum.MIN_LT.value: WatchCondition.Condition.min_lt, - ConditionIdEnum.NAN.value: WatchCondition.Condition.nan, ConditionIdEnum.OPERATOR_OVERFLOW.value: WatchCondition.Condition.overflow, - ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value: WatchCondition.Condition.overflow, ConditionIdEnum.TENSOR_ALL_ZERO.value: WatchCondition.Condition.tensor_all_zero, ConditionIdEnum.TENSOR_INITIALIZATION.value: WatchCondition.Condition.tensor_initialization, ConditionIdEnum.TENSOR_OVERFLOW.value: WatchCondition.Condition.tensor_general_overflow, diff --git a/mindinsight/debugger/stream_operator/watchpoint_operator.py b/mindinsight/debugger/stream_operator/watchpoint_operator.py index 5d81a0b4..3e6030e6 100644 --- a/mindinsight/debugger/stream_operator/watchpoint_operator.py +++ b/mindinsight/debugger/stream_operator/watchpoint_operator.py @@ -95,13 +95,9 @@ class WatchpointOperator: def _validate_watch_condition(self, watch_condition): """Validate watch condition.""" metadata_stream = self._metadata_stream - if metadata_stream.backend == 'GPU' and watch_condition.get('id') in ( - ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value, ConditionIdEnum.OPERATOR_OVERFLOW.value): + if metadata_stream.backend == 'GPU' and watch_condition.get('id') == ConditionIdEnum.OPERATOR_OVERFLOW.value: log.error("GPU doesn't support overflow watch condition.") raise DebuggerParamValueError("GPU doesn't support overflow watch condition.") - if metadata_stream.backend == 'Ascend' and watch_condition.get('id') == ConditionIdEnum.NAN.value: - log.error("Ascend doesn't support nan watch condition.") - raise DebuggerParamValueError("Ascend doesn't support nan watch condition.") def update_watchpoint(self, params): """ diff --git a/tests/st/func/debugger/expect_results/restful_results/create_and_delete_watchpoint.json b/tests/st/func/debugger/expect_results/restful_results/create_and_delete_watchpoint.json index 3ce167e0..04b242ae 100644 --- a/tests/st/func/debugger/expect_results/restful_results/create_and_delete_watchpoint.json +++ b/tests/st/func/debugger/expect_results/restful_results/create_and_delete_watchpoint.json @@ -1 +1 @@ -{"watch_points": [{"id": 1, "watch_condition": {"id": "max_gt", "params": [{"name": "param", "value": 1.0}], "abbr": "MAX>"}}, {"id": 2, "watch_condition": {"id": "max_lt", "params": [{"name": "param", "value": -1.0}], "abbr": "MAX<"}}, {"id": 3, "watch_condition": {"id": "min_gt", "params": [{"name": "param", "value": 1e+32}], "abbr": "MIN>"}}, {"id": 5, "watch_condition": {"id": "max_min_gt", "params": [{"name": "param", "value": 0}], "abbr": "MAX-MIN>"}}, {"id": 6, "watch_condition": {"id": "max_min_lt", "params": [{"name": "param", "value": 0}], "abbr": "MAX-Min<"}}, {"id": 7, "watch_condition": {"id": "mean_gt", "params": [{"name": "param", "value": 0}], "abbr": "MEAN>"}}, {"id": 8, "watch_condition": {"id": "mean_lt", "params": [{"name": "param", "value": 0}], "abbr": "MEAN<"}}, {"id": 9, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}, {"id": 10, "watch_condition": {"id": "overflow", "params": [], "abbr": "OVERFLOW"}}]} \ No newline at end of file +{"watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0}], "abbr": "TL"}}, {"id": 2, "watch_condition": {"id": "tensor_too_small", "params": [{"name": "max_lt", "value": -1.0}], "abbr": "TS"}}, {"id": 3, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "min_gt", "value": 1e+32}], "abbr": "TL"}}, {"id": 5, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "mean_gt", "value": 0}], "abbr": "TL"}}, {"id": 6, "watch_condition": {"id": "tensor_too_small", "params": [{"name": "mean_lt", "value": 0}], "abbr": "TS"}}]} \ No newline at end of file diff --git a/tests/st/func/debugger/expect_results/restful_results/get_conditions_for_ascend.json b/tests/st/func/debugger/expect_results/restful_results/get_conditions_for_ascend.json index 722612ed..af60b0b0 100644 --- a/tests/st/func/debugger/expect_results/restful_results/get_conditions_for_ascend.json +++ b/tests/st/func/debugger/expect_results/restful_results/get_conditions_for_ascend.json @@ -1 +1 @@ -{"conditions": [{"id": "inf", "parameters": [], "supported_target_type": "TENSOR"}, {"id": "max_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_min_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_min_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "mean_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "mean_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "min_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "min_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "overflow", "parameters": [], "supported_target_type": "TENSOR"}]} \ No newline at end of file +[{"id": "tensor_condition_collection", "conditions": [{"id": "operator_overflow", "parameters": [], "supported_target_type": "TENSOR", "abbr": "OO"}, {"id": "tensor_all_zero", "parameters": [{"name": "zero_percentage_ge", "type": "FLOAT64", "support_disable": true, "default_value": 100, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "TENSOR", "abbr": "TZ"}, {"id": "tensor_overflow", "parameters": [], "supported_target_type": "TENSOR", "abbr": "TO"}, {"id": "tensor_range", "parameters": [{"name": "range_start_inclusive", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "SUPPORT_PARAM", "required_params": null}, {"name": "range_end_inclusive", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "SUPPORT_PARAM", "required_params": null}, {"name": "range_percentage_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": ["range_start_inclusive", "range_end_inclusive"]}, {"name": "range_percentage_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": ["range_start_inclusive", "range_end_inclusive"]}, {"name": "max_min_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_min_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "TENSOR", "abbr": "TR"}, {"id": "tensor_too_large", "parameters": [{"name": "abs_mean_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "min_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "mean_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "TENSOR", "abbr": "TL"}, {"id": "tensor_too_small", "parameters": [{"name": "abs_mean_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "min_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "mean_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "TENSOR", "abbr": "TS"}]}, {"id": "weight_condition_collection", "conditions": [{"id": "weight_change_too_large", "parameters": [{"name": "abs_mean_update_ratio_gt", "type": "FLOAT64", "support_disable": true, "default_value": 0.1, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "WEIGHT", "abbr": "WCL"}, {"id": "weight_change_too_small", "parameters": [{"name": "abs_mean_update_ratio_lt", "type": "FLOAT64", "support_disable": true, "default_value": 0.0001, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "WEIGHT", "abbr": "WCS"}, {"id": "weight_not_changed", "parameters": [{"name": "rtol", "type": "FLOAT64", "support_disable": true, "default_value": 1e-05, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "WEIGHT", "abbr": "WNC"}, {"id": "weight_overflow", "parameters": [], "supported_target_type": "WEIGHT", "abbr": "WO"}, {"id": "weight_too_large", "parameters": [{"name": "abs_mean_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "min_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "mean_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "WEIGHT", "abbr": "WL"}, {"id": "weight_too_small", "parameters": [{"name": "abs_mean_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "min_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "mean_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "WEIGHT", "abbr": "WS"}]}, {"id": "activation_condition_collection", "conditions": [{"id": "activation_range", "parameters": [{"name": "range_start_inclusive", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "SUPPORT_PARAM", "required_params": null}, {"name": "range_end_inclusive", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "SUPPORT_PARAM", "required_params": null}, {"name": "range_percentage_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": ["range_start_inclusive", "range_end_inclusive"]}, {"name": "range_percentage_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": ["range_start_inclusive", "range_end_inclusive"]}, {"name": "max_min_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_min_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "ACTIVATION", "abbr": "AR"}]}, {"id": "gradient_condition_collection", "conditions": [{"id": "gradient_exploding", "parameters": [], "supported_target_type": "GRADIENT", "abbr": "GE"}, {"id": "gradient_too_large", "parameters": [{"name": "abs_mean_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "min_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "mean_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "GRADIENT", "abbr": "GL"}, {"id": "gradient_vanishing", "parameters": [{"name": "abs_mean_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "min_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "mean_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "GRADIENT", "abbr": "GV"}]}] \ No newline at end of file diff --git a/tests/st/func/debugger/expect_results/restful_results/get_conditions_for_gpu.json b/tests/st/func/debugger/expect_results/restful_results/get_conditions_for_gpu.json index 55cdec3a..5254d4cf 100644 --- a/tests/st/func/debugger/expect_results/restful_results/get_conditions_for_gpu.json +++ b/tests/st/func/debugger/expect_results/restful_results/get_conditions_for_gpu.json @@ -1 +1 @@ -{"conditions": [{"id": "inf", "parameters": [], "supported_target_type": "TENSOR"}, {"id": "max_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_min_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_min_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "mean_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "mean_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "min_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "min_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "nan", "parameters": [], "supported_target_type": "TENSOR"}]} \ No newline at end of file +[{"id": "tensor_condition_collection", "conditions": [{"id": "tensor_all_zero", "parameters": [{"name": "zero_percentage_ge", "type": "FLOAT64", "support_disable": true, "default_value": 100, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "TENSOR", "abbr": "TZ"}, {"id": "tensor_overflow", "parameters": [], "supported_target_type": "TENSOR", "abbr": "TO"}, {"id": "tensor_range", "parameters": [{"name": "range_start_inclusive", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "SUPPORT_PARAM", "required_params": null}, {"name": "range_end_inclusive", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "SUPPORT_PARAM", "required_params": null}, {"name": "range_percentage_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": ["range_start_inclusive", "range_end_inclusive"]}, {"name": "range_percentage_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": ["range_start_inclusive", "range_end_inclusive"]}, {"name": "max_min_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_min_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "TENSOR", "abbr": "TR"}, {"id": "tensor_too_large", "parameters": [{"name": "abs_mean_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "min_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "mean_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "TENSOR", "abbr": "TL"}, {"id": "tensor_too_small", "parameters": [{"name": "abs_mean_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "min_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "mean_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "TENSOR", "abbr": "TS"}]}, {"id": "weight_condition_collection", "conditions": [{"id": "weight_change_too_large", "parameters": [{"name": "abs_mean_update_ratio_gt", "type": "FLOAT64", "support_disable": true, "default_value": 0.1, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "WEIGHT", "abbr": "WCL"}, {"id": "weight_change_too_small", "parameters": [{"name": "abs_mean_update_ratio_lt", "type": "FLOAT64", "support_disable": true, "default_value": 0.0001, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "WEIGHT", "abbr": "WCS"}, {"id": "weight_not_changed", "parameters": [{"name": "rtol", "type": "FLOAT64", "support_disable": true, "default_value": 1e-05, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "WEIGHT", "abbr": "WNC"}, {"id": "weight_overflow", "parameters": [], "supported_target_type": "WEIGHT", "abbr": "WO"}, {"id": "weight_too_large", "parameters": [{"name": "abs_mean_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "min_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "mean_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "WEIGHT", "abbr": "WL"}, {"id": "weight_too_small", "parameters": [{"name": "abs_mean_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "min_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "mean_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "WEIGHT", "abbr": "WS"}]}, {"id": "activation_condition_collection", "conditions": [{"id": "activation_range", "parameters": [{"name": "range_start_inclusive", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "SUPPORT_PARAM", "required_params": null}, {"name": "range_end_inclusive", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "SUPPORT_PARAM", "required_params": null}, {"name": "range_percentage_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": ["range_start_inclusive", "range_end_inclusive"]}, {"name": "range_percentage_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": ["range_start_inclusive", "range_end_inclusive"]}, {"name": "max_min_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_min_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "ACTIVATION", "abbr": "AR"}]}, {"id": "gradient_condition_collection", "conditions": [{"id": "gradient_exploding", "parameters": [], "supported_target_type": "GRADIENT", "abbr": "GE"}, {"id": "gradient_too_large", "parameters": [{"name": "abs_mean_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "min_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "mean_gt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "GRADIENT", "abbr": "GL"}, {"id": "gradient_vanishing", "parameters": [{"name": "abs_mean_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "max_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "min_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}, {"name": "mean_lt", "type": "FLOAT64", "support_disable": true, "default_value": null, "param_type": "CHECK_PARAM", "required_params": null}], "supported_target_type": "GRADIENT", "abbr": "GV"}]}] \ No newline at end of file diff --git a/tests/st/func/debugger/expect_results/restful_results/retrieve_tensor_graph-0.json b/tests/st/func/debugger/expect_results/restful_results/retrieve_tensor_graph-0.json index e01ea4c4..59c5fa63 100644 --- a/tests/st/func/debugger/expect_results/restful_results/retrieve_tensor_graph-0.json +++ b/tests/st/func/debugger/expect_results/restful_results/retrieve_tensor_graph-0.json @@ -1 +1 @@ -{"graph": {"nodes": [{"name": "Default/args0", "full_name": "Default/args0", "type": "Parameter", "input": {}, "output": {"Default/TransData-op99": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "slots": [{"slot": "0", "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3], "has_prev_step": true}], "graph_name": "graph_0"}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", "type": "Cast", "input": {"Default/TransData-op99": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "output": {}, "slots": [{"slot": "0", "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3]}], "graph_name": "graph_0"}, {"name": "Default/TransData-op99", "full_name": "Default/TransData-op99", "type": "TransData", "input": {"Default/args0": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "output": {"Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "slots": [{"slot": "0", "summarized_error_code": 0, "watch_points": [{"id": 1, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}, "error_code": 0}], "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3]}], "graph_name": "graph_0"}]}} \ No newline at end of file +{"graph": {"nodes": [{"name": "Default/args0", "full_name": "Default/args0", "type": "Parameter", "input": {}, "output": {"Default/TransData-op99": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "slots": [{"slot": "0", "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3], "has_prev_step": true}], "graph_name": "graph_0"}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", "type": "Cast", "input": {"Default/TransData-op99": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "output": {}, "slots": [{"slot": "0", "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3]}], "graph_name": "graph_0"}, {"name": "Default/TransData-op99", "full_name": "Default/TransData-op99", "type": "TransData", "input": {"Default/args0": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "output": {"Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "slots": [{"slot": "0", "summarized_error_code": 0, "watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0, "actual_value": null}], "abbr": "TL"}, "error_code": 0}], "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3]}], "graph_name": "graph_0"}]}} \ No newline at end of file diff --git a/tests/st/func/debugger/expect_results/restful_results/retrieve_watchpoint_hit.json b/tests/st/func/debugger/expect_results/restful_results/retrieve_watchpoint_hit.json index 508abddf..dd3e0c34 100644 --- a/tests/st/func/debugger/expect_results/restful_results/retrieve_watchpoint_hit.json +++ b/tests/st/func/debugger/expect_results/restful_results/retrieve_watchpoint_hit.json @@ -1,47 +1 @@ -{ - "watch_point_hits": [ - { - "node_name": "Default/TransData-op99", - "tensors": [ - { - "slot": "0", - "summarized_error_code": 0, - "watch_points": [ - { - "id": 1, - "watch_condition": { - "id": "inf", - "params": [], - "abbr": "INF" - }, - "error_code": 0 - } - ] - } - ], - "graph_name": "graph_0" - }, - { - "node_name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25", - "tensors": [ - { - "slot": "0", - "summarized_error_code": 0, - "watch_points": [ - { - "id": 1, - "watch_condition": { - "id": "inf", - "params": [], - "abbr": "INF" - }, - "error_code": 0 - } - ] - } - ], - "graph_name": "graph_0" - } - ], - "outdated": false -} \ No newline at end of file +{"watch_point_hits": [{"node_name": "Default/TransData-op99", "tensors": [{"slot": "0", "summarized_error_code": 0, "watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0, "actual_value": null}], "abbr": "TL"}, "error_code": 0}]}], "graph_name": "graph_0"}, {"node_name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25", "tensors": [{"slot": "0", "summarized_error_code": 0, "watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0, "actual_value": null}], "abbr": "TL"}, "error_code": 0}]}], "graph_name": "graph_0"}], "outdated": false} \ No newline at end of file diff --git a/tests/st/func/debugger/test_restful_api.py b/tests/st/func/debugger/test_restful_api.py index 07fa103f..1e5e8fdf 100644 --- a/tests/st/func/debugger/test_restful_api.py +++ b/tests/st/func/debugger/test_restful_api.py @@ -84,7 +84,7 @@ class TestAscendDebugger: def test_get_conditions(self, app_client): """Test get conditions for ascend.""" - url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/conditions' + url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/condition-collections' body_data = {} expect_file = 'get_conditions_for_ascend.json' with self._debugger_client.get_thread_instance(): @@ -131,16 +131,12 @@ class TestAscendDebugger: with self._debugger_client.get_thread_instance(): check_state(app_client) conditions = [ - {'id': 'max_gt', 'params': [{'name': 'param', 'value': 1.0}]}, - {'id': 'max_lt', 'params': [{'name': 'param', 'value': -1.0}]}, - {'id': 'min_gt', 'params': [{'name': 'param', 'value': 1e+32}]}, - {'id': 'min_lt', 'params': [{'name': 'param', 'value': -1e+32}]}, - {'id': 'max_min_gt', 'params': [{'name': 'param', 'value': 0}]}, - {'id': 'max_min_lt', 'params': [{'name': 'param', 'value': 0}]}, - {'id': 'mean_gt', 'params': [{'name': 'param', 'value': 0}]}, - {'id': 'mean_lt', 'params': [{'name': 'param', 'value': 0}]}, - {'id': 'inf', 'params': []}, - {'id': 'overflow', 'params': []}, + {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}, + {'id': 'tensor_too_small', 'params': [{'name': 'max_lt', 'value': -1.0}]}, + {'id': 'tensor_too_large', 'params': [{'name': 'min_gt', 'value': 1e+32}]}, + {'id': 'tensor_too_small', 'params': [{'name': 'min_lt', 'value': -1e+32}]}, + {'id': 'tensor_too_large', 'params': [{'name': 'mean_gt', 'value': 0}]}, + {'id': 'tensor_too_small', 'params': [{'name': 'mean_lt', 'value': 0}]} ] for idx, condition in enumerate(conditions): create_watchpoint(app_client, condition, idx + 1) @@ -167,7 +163,7 @@ class TestAscendDebugger: leaf_node_name = 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias' with self._debugger_client.get_thread_instance(): check_state(app_client) - condition = {'id': 'inf', 'params': []} + condition = {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]} create_watchpoint(app_client, condition, watch_point_id) # update watchpoint watchpoint list url = 'update_watchpoint' @@ -327,7 +323,7 @@ class TestAscendDebugger: @pytest.mark.platform_x86_ascend_training @pytest.mark.parametrize("url, body_data, enable_recheck", [ ('create_watchpoint', - {'condition': {'id': 'inf', 'params': []}, + {'condition': {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}, 'watch_nodes': ['Default']}, True), ('update_watchpoint', {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7'], @@ -434,10 +430,10 @@ class TestGPUDebugger: @pytest.mark.platform_x86_ascend_training @pytest.mark.parametrize("url, body_data, enable_recheck", [ ('create_watchpoint', - {'condition': {'id': 'inf', 'params': []}, + {'condition': {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}, 'watch_nodes': ['Default']}, True), ('create_watchpoint', - {'condition': {'id': 'inf', 'params': []}, + {'condition': {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}, 'watch_nodes': ['Default/TransData-op99']}, True), ('update_watchpoint', {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7'], @@ -472,7 +468,7 @@ class TestGPUDebugger: def test_get_conditions(self, app_client): """Test get conditions for gpu.""" - url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/conditions' + url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/condition-collections' body_data = {} expect_file = 'get_conditions_for_gpu.json' with self._debugger_client.get_thread_instance(): @@ -493,7 +489,7 @@ class TestGPUDebugger: # send recheck when disable to do recheck get_request_result(app_client, 'recheck', {}, method='post', expect_code=400) # send recheck when enable to do recheck - create_watchpoint(app_client, {'id': 'inf', 'params': []}, 2) + create_watchpoint(app_client, {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}, 2) res = get_request_result(app_client, 'recheck', {}, method='post') assert res['metadata']['enable_recheck'] is False @@ -579,10 +575,10 @@ class TestMultiGraphDebugger: @pytest.mark.platform_x86_gpu_training @pytest.mark.platform_x86_ascend_training @pytest.mark.parametrize("filter_condition, expect_id", [ - ({'condition': {'id': 'inf'}, + ({'condition': {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}, 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7'], 'graph_name': 'graph_0'}, 1), - ({'condition': {'id': 'inf'}, + ({'condition': {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}, 'watch_nodes': ['graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1'], 'graph_name': None}, 1) ]) @@ -665,7 +661,8 @@ def create_watchpoint(app_client, condition, expect_id): def create_watchpoint_and_wait(app_client): """Preparation for recheck.""" check_state(app_client) - create_watchpoint(app_client, condition={'id': 'inf', 'params': []}, expect_id=1) + create_watchpoint(app_client, condition={'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}, + expect_id=1) # send run command to get watchpoint hit url = 'control' body_data = {'mode': 'continue', diff --git a/tests/st/func/debugger/utils.py b/tests/st/func/debugger/utils.py index 3fbab72c..275e6a31 100644 --- a/tests/st/func/debugger/utils.py +++ b/tests/st/func/debugger/utils.py @@ -74,7 +74,7 @@ def send_and_save_result(app_client, url, body_data, file_path, method='post'): def delete_random_items(res): """delete the random items in metadata.""" - if res.get('metadata'): + if isinstance(res, dict) and res.get('metadata'): if res['metadata'].get('ip'): res['metadata'].pop('ip') if res['metadata'].get('pos'): diff --git a/tests/ut/debugger/expected_results/watchpoint/watchpoint_handler_get_0.json b/tests/ut/debugger/expected_results/watchpoint/watchpoint_handler_get_0.json index 2dc082f8..bd64a13f 100644 --- a/tests/ut/debugger/expected_results/watchpoint/watchpoint_handler_get_0.json +++ b/tests/ut/debugger/expected_results/watchpoint/watchpoint_handler_get_0.json @@ -1,25 +1,5 @@ [ - { - "watchCondition": { - "condition": "inf" - }, - "id": 1, - "watch_nodes_num": 0 - }, - { - "watchCondition": { - "condition": "inf" - }, - "id": 2, - "watch_nodes_num": 172 - }, - { - "watchCondition": { - "condition": "max_gt", - "params": [{"name": "param", "value": 1}], - "value": 1 - }, - "id": 3, - "watch_nodes_num": 1 - } + {"watchCondition": {"condition": "tensor_too_small", "value": 1.0, "params": [{"name": "abs_mean_lt", "disabled": true}, {"name": "max_lt", "value": 1.0}, {"name": "min_lt", "disabled": true}, {"name": "mean_lt", "disabled": true}]}, "id": 1, "watch_nodes_num": 0}, + {"watchCondition": {"condition": "tensor_too_small", "value": 1.0, "params": [{"name": "abs_mean_lt", "disabled": true}, {"name": "max_lt", "disabled": true}, {"name": "min_lt", "value": 1.0}, {"name": "mean_lt", "disabled": true}]}, "id": 2, "watch_nodes_num": 172}, + {"watchCondition": {"condition": "tensor_too_large", "value": 1.0, "params": [{"name": "abs_mean_gt", "disabled": true}, {"name": "max_gt", "value": 1.0}, {"name": "min_gt", "disabled": true}, {"name": "mean_gt", "disabled": true}]}, "id": 3, "watch_nodes_num": 1} ] \ No newline at end of file diff --git a/tests/ut/debugger/expected_results/watchpoint/watchpoint_handler_get_1.json b/tests/ut/debugger/expected_results/watchpoint/watchpoint_handler_get_1.json index 515b7180..43e21dd3 100644 --- a/tests/ut/debugger/expected_results/watchpoint/watchpoint_handler_get_1.json +++ b/tests/ut/debugger/expected_results/watchpoint/watchpoint_handler_get_1.json @@ -1 +1 @@ -[{"id": 1, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}, {"id": 2, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}, {"id": 3, "watch_condition": {"id": "max_gt", "params": [{"name": "param", "value": 1}], "abbr": "MAX>"}}] \ No newline at end of file +[{"id": 1, "watch_condition": {"id": "tensor_too_small", "params": [{"name": "max_lt", "value": 1.0}], "abbr": "TS"}}, {"id": 2, "watch_condition": {"id": "tensor_too_small", "params": [{"name": "min_lt", "value": 1.0}], "abbr": "TS"}}, {"id": 3, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0}], "abbr": "TL"}}] \ No newline at end of file diff --git a/tests/ut/debugger/stream_handler/test_watchpoint_handler.py b/tests/ut/debugger/stream_handler/test_watchpoint_handler.py index 01f9cfc2..08e37be6 100644 --- a/tests/ut/debugger/stream_handler/test_watchpoint_handler.py +++ b/tests/ut/debugger/stream_handler/test_watchpoint_handler.py @@ -61,9 +61,9 @@ class TestWatchpointHandler: def _create_watchpoint(self): """Test create_watchpoint.""" watchpoints = [ - ({'id': 'inf', 'params': []}, None, None, 1), - ({'id': 'inf', 'params': []}, ["Default"], None, 2), - ({'id': 'max_gt', 'params': [{'name': 'param', 'value': 1}]}, + ({'id': 'tensor_too_small', 'params': [{'name': 'max_lt', 'value': 1.0}]}, None, None, 1), + ({'id': 'tensor_too_small', 'params': [{'name': 'min_lt', 'value': 1.0}]}, ["Default"], None, 2), + ({'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}, ["Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92"], None, 3) ] @@ -160,7 +160,8 @@ class TestWatchpointHandler: expect_deleted_ids): """Test delete_watchpoint.""" for _ in range(watch_point_id): - self.handler.create_watchpoint(self.conditionmgr, {'id': 'inf', 'param': []}) + self.handler.create_watchpoint(self.conditionmgr, + {'id': 'tensor_too_small', 'params': [{'name': 'max_lt', 'value': 1.0}]}) with TestCase().assertLogs(logger=log, level='DEBUG') as log_content: self.handler.delete_watchpoint(watch_point_id) TestCase().assertIn( @@ -233,13 +234,13 @@ def test_validate_watch_condition_type_error(): def test_validate_watch_condition_params_except(): """Test validate_watch_condition_params.""" - watch_condition = {'id': 'inf', 'params': [{'name': 'param', 'value': 0}]} + watch_condition = {'id': 'weight_overflow', 'params': [{'name': 'param', 'value': 0}]} conditionmgr = ConditionMgr() with pytest.raises(DebuggerParamValueError) as err: validate_watch_condition_params(conditionmgr, watch_condition) assert err.value.error_code == '5054B081' - watch_condition = {'id': 'max_gt', 'params': [{'name': 'param', 'value': '0'}]} + watch_condition = {'id': 'tensor_overflow', 'params': [{'name': 'param', 'value': '0'}]} with pytest.raises(DebuggerParamValueError) as err: validate_watch_condition_params(conditionmgr, watch_condition) assert err.value.error_code == '5054B081' diff --git a/tests/ut/debugger/test_debugger_server.py b/tests/ut/debugger/test_debugger_server.py index 210df69f..e1963cf3 100644 --- a/tests/ut/debugger/test_debugger_server.py +++ b/tests/ut/debugger/test_debugger_server.py @@ -199,8 +199,9 @@ class TestDebuggerServer: def test_create_watchpoint(self, *args): """Test create watchpoint.""" args[0].return_value = 1 - res = self._server.create_watchpoint({'watch_condition': {'id': 'inf'}, - 'watch_nodes': ['watch_node_name']}) + res = self._server.create_watchpoint( + {'watch_condition': {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}, + 'watch_nodes': ['watch_node_name']}) assert res == {'id': 1, 'metadata': {'enable_recheck': False, 'state': 'waiting'}} @mock.patch.object(MetadataHandler, 'state', 'waiting')