Browse Source

!947 Add error_list when hitting watchpoint and fix log by adding image_path information

From: @jiang-shuqiang
Reviewed-by: 
Signed-off-by:
tags/v1.1.0
mindspore-ci-bot Gitee 5 years ago
parent
commit
29fa9bd365
13 changed files with 74 additions and 108 deletions
  1. +15
    -6
      mindinsight/datavisual/data_transform/summary_parser/event_parser.py
  2. +1
    -2
      mindinsight/debugger/conditionmgr/condition.py
  3. +0
    -28
      mindinsight/debugger/conditionmgr/condition_list.py
  4. +2
    -2
      mindinsight/debugger/debugger_server.py
  5. +2
    -0
      mindinsight/debugger/proto/debug_grpc.proto
  6. +21
    -57
      mindinsight/debugger/proto/debug_grpc_pb2.py
  7. +0
    -1
      mindinsight/debugger/stream_cache/watchpoint.py
  8. +22
    -8
      mindinsight/debugger/stream_handler/watchpoint_handler.py
  9. +7
    -1
      mindinsight/debugger/stream_operator/watchpoint_operator.py
  10. +1
    -1
      tests/st/func/debugger/expect_results/restful_results/retrieve_tensor_graph-0.json
  11. +1
    -1
      tests/st/func/debugger/expect_results/restful_results/retrieve_watchpoint_hit.json
  12. +1
    -1
      tests/ut/debugger/expected_results/watchpoint/watchpoint_hit_handler_get_0.json
  13. +1
    -0
      tests/ut/debugger/test_debugger_server.py

+ 15
- 6
mindinsight/datavisual/data_transform/summary_parser/event_parser.py View File

@@ -61,16 +61,25 @@ class EventParser:
parse_summary_logger.info("Loading %s.", self.summary_file)
result = self._load(summary_file_handler)

warning = ''
if not self._scalar_check:
warning = warning + " the summary file contains no scalar value."
if not self._image_check:
warning = warning + " the summary file contains no image."
if result:
parse_summary_logger.info("Writing parsed data into scalar.csv")
warning = ''
scalar_path = FileHandler.join(self._output, "scalar.csv")
image_path = FileHandler.join(self._output, IMAGE)

if not self._image_check:
warning = warning + " The summary file contains no image."
else:
parse_summary_logger.info("Images are written in %s.", image_path)

if not self._scalar_check:
warning = warning + " The summary file contains no scalar value."
else:
parse_summary_logger.info("Writing scalar data into %s.", scalar_path)

self._scalar_writer.write()
if warning:
parse_summary_logger.warning(warning)

parse_summary_logger.info("Finished loading %s.", self.summary_file)

def _load(self, file_handler):


+ 1
- 2
mindinsight/debugger/conditionmgr/condition.py View File

@@ -33,7 +33,6 @@ class ConditionIdEnum(Enum):
GRADIENT_EXPLODING = "gradient_exploding"
TENSOR_OVERFLOW = "tensor_overflow"
OPERATOR_OVERFLOW = "operator_overflow"
TENSOR_INITIALIZATION = "tensor_initialization"
TENSOR_TOO_LARGE = "tensor_too_large"
TENSOR_TOO_SMALL = "tensor_too_small"
TENSOR_ALL_ZERO = "tensor_all_zero"
@@ -96,7 +95,7 @@ class ConditionContext:
step (int): the type of value.
debugger_capability (tuple): whether the param support no assignment.
"""
def __init__(self, backend, step=0, debugger_capability=(1, 0)):
def __init__(self, backend, step=0, debugger_capability=(1, 1)):
self._backend = backend
self._step = step
self._debugger_capability = debugger_capability


+ 0
- 28
mindinsight/debugger/conditionmgr/condition_list.py View File

@@ -224,34 +224,6 @@ CONDITION_LIST = [
supported_platforms=(PlatformEnum.ASCEND,),
minimum_debugger_capability=(1, 1)
),
Condition(
condition_id=ConditionIdEnum.TENSOR_INITIALIZATION,
abbr="TI",
# Send this condition to MindSpore will use WatchCondition.Condition.tensor_initialization
optimize_phase=OptimizePhaseEnum.TENSOR_CHECK,
parameters=[
ConditionParameter(
name="zero_percentage_ge",
value_type=ValueTypeEnum.FLOAT64,
valid_test_func=check_percentage_param_range,
default_value=100
),
ConditionParameter(
name="max_gt",
value_type=ValueTypeEnum.FLOAT64,
valid_test_func=check_normal_param_range
),
ConditionParameter(
name="min_lt",
value_type=ValueTypeEnum.FLOAT64,
valid_test_func=check_normal_param_range
)
],
supported_target_type=TargetTypeEnum.TENSOR,
supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU),
minimum_debugger_capability=(1, 1),
availability_test_func=check_initialization_available
),
Condition(
condition_id=ConditionIdEnum.TENSOR_TOO_LARGE,
abbr="TL",


+ 2
- 2
mindinsight/debugger/debugger_server.py View File

@@ -71,7 +71,7 @@ class DebuggerServer:
def get_condition_collections(self, train_id):
"""Get default condition_collections"""
metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA)
condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 1))
condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step)
log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend)
return self.condition_mgr.get_all_collections(condition_context)

@@ -81,7 +81,7 @@ class DebuggerServer:
log.error("Bool param should be given for set_recommended")
raise DebuggerParamValueError("Bool param should be given.")
metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA)
condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 1))
condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step)
log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend)
res = metadata_stream.get(['state', 'enable_recheck'])
if set_recommended and not metadata_stream.recommendation_confirmed:


+ 2
- 0
mindinsight/debugger/proto/debug_grpc.proto View File

@@ -91,6 +91,8 @@ message ViewCMD {

message WatchCondition {
enum Condition {
// nan won't be not used anymore, but the first enum value must be zero in proto3, so we keep this Enum member.
nan = 0;
overflow = 2;
sd_gt = 11;
sd_lt = 12;


+ 21
- 57
mindinsight/debugger/proto/debug_grpc_pb2.py View File

@@ -21,7 +21,7 @@ DESCRIPTOR = _descriptor.FileDescriptor(
package='debugger',
syntax='proto3',
serialized_options=None,
serialized_pb=_b('\n+mindinsight/debugger/proto/debug_grpc.proto\x12\x08\x64\x65\x62ugger\x1a)mindinsight/debugger/proto/ms_graph.proto\"\x92\x01\n\x08Metadata\x12\x13\n\x0b\x64\x65vice_name\x18\x01 \x01(\t\x12\x10\n\x08\x63ur_step\x18\x02 \x01(\x05\x12\x0f\n\x07\x62\x61\x63kend\x18\x03 \x01(\t\x12\x10\n\x08\x63ur_node\x18\x04 \x01(\t\x12\x15\n\rtraining_done\x18\x05 \x01(\x08\x12\x11\n\tgraph_num\x18\x06 \x01(\x05\x12\x12\n\nms_version\x18\x07 \x01(\t\")\n\x05\x43hunk\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\x12\x10\n\x08\x66inished\x18\x02 \x01(\x08\"\x87\x02\n\nEventReply\x12+\n\x06status\x18\x01 \x01(\x0e\x32\x1b.debugger.EventReply.Status\x12\x0e\n\x04\x65xit\x18\x02 \x01(\x08H\x00\x12#\n\x07run_cmd\x18\x03 \x01(\x0b\x32\x10.debugger.RunCMDH\x00\x12#\n\x07set_cmd\x18\x04 \x01(\x0b\x32\x10.debugger.SetCMDH\x00\x12%\n\x08view_cmd\x18\x05 \x01(\x0b\x32\x11.debugger.ViewCMDH\x00\x12\x19\n\x0fversion_matched\x18\x06 \x01(\x08H\x00\")\n\x06Status\x12\x06\n\x02OK\x10\x00\x12\n\n\x06\x46\x41ILED\x10\x01\x12\x0b\n\x07PENDING\x10\x02\x42\x05\n\x03\x63md\"L\n\x06RunCMD\x12\x11\n\trun_level\x18\x01 \x01(\t\x12\x13\n\trun_steps\x18\x02 \x01(\x05H\x00\x12\x13\n\tnode_name\x18\x03 \x01(\tH\x00\x42\x05\n\x03\x63md\"\x81\x01\n\x06SetCMD\x12(\n\x0bwatch_nodes\x18\x01 \x03(\x0b\x32\x13.debugger.WatchNode\x12\x31\n\x0fwatch_condition\x18\x02 \x01(\x0b\x32\x18.debugger.WatchCondition\x12\x0e\n\x06\x64\x65lete\x18\x03 \x01(\x08\x12\n\n\x02id\x18\x04 \x01(\x05\"1\n\x07ViewCMD\x12&\n\x07tensors\x18\x01 \x03(\x0b\x32\x15.debugger.TensorProto\"\xf4\x04\n\x0eWatchCondition\x12\x35\n\tcondition\x18\x01 \x01(\x0e\x32\".debugger.WatchCondition.Condition\x12\r\n\x05value\x18\x02 \x01(\x02\x12\x32\n\x06params\x18\x04 \x03(\x0b\x32\".debugger.WatchCondition.Parameter\x1a]\n\tParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64isabled\x18\x02 \x01(\x08\x12\r\n\x05value\x18\x03 \x01(\x01\x12\x0b\n\x03hit\x18\x04 \x01(\x08\x12\x14\n\x0c\x61\x63tual_value\x18\x05 \x01(\x01\"\x88\x03\n\tCondition\x12\x07\n\x03nan\x10\x00\x12\x07\n\x03inf\x10\x01\x12\x0c\n\x08overflow\x10\x02\x12\n\n\x06max_gt\x10\x03\x12\n\n\x06max_lt\x10\x04\x12\n\n\x06min_gt\x10\x05\x12\n\n\x06min_lt\x10\x06\x12\x0e\n\nmax_min_gt\x10\x07\x12\x0e\n\nmax_min_lt\x10\x08\x12\x0b\n\x07mean_gt\x10\t\x12\x0b\n\x07mean_lt\x10\n\x12\t\n\x05sd_gt\x10\x0b\x12\t\n\x05sd_lt\x10\x0c\x12\x1b\n\x17tensor_general_overflow\x10\r\x12\x19\n\x15tensor_initialization\x10\x0e\x12\x14\n\x10tensor_too_large\x10\x0f\x12\x14\n\x10tensor_too_small\x10\x10\x12\x13\n\x0ftensor_all_zero\x10\x11\x12\x1b\n\x17tensor_change_too_large\x10\x12\x12\x1b\n\x17tensor_change_too_small\x10\x13\x12\x16\n\x12tensor_not_changed\x10\x14\x12\x10\n\x0ctensor_range\x10\x15\"1\n\tWatchNode\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x11\n\tnode_type\x18\x02 \x01(\t\"\x89\x01\n\rWatchpointHit\x12%\n\x06tensor\x18\x01 \x01(\x0b\x32\x15.debugger.TensorProto\x12\x31\n\x0fwatch_condition\x18\x02 \x01(\x0b\x32\x18.debugger.WatchCondition\x12\n\n\x02id\x18\x03 \x01(\x05\x12\x12\n\nerror_code\x18\x04 \x01(\x05\x32\x81\x03\n\rEventListener\x12\x35\n\x07WaitCMD\x12\x12.debugger.Metadata\x1a\x14.debugger.EventReply\"\x00\x12:\n\x0cSendMetadata\x12\x12.debugger.Metadata\x1a\x14.debugger.EventReply\"\x00\x12\x36\n\tSendGraph\x12\x0f.debugger.Chunk\x1a\x14.debugger.EventReply\"\x00(\x01\x12>\n\x0bSendTensors\x12\x15.debugger.TensorProto\x1a\x14.debugger.EventReply\"\x00(\x01\x12G\n\x12SendWatchpointHits\x12\x17.debugger.WatchpointHit\x1a\x14.debugger.EventReply\"\x00(\x01\x12<\n\x0fSendMultiGraphs\x12\x0f.debugger.Chunk\x1a\x14.debugger.EventReply\"\x00(\x01\x62\x06proto3')
serialized_pb=_b('\n+mindinsight/debugger/proto/debug_grpc.proto\x12\x08\x64\x65\x62ugger\x1a)mindinsight/debugger/proto/ms_graph.proto\"\x92\x01\n\x08Metadata\x12\x13\n\x0b\x64\x65vice_name\x18\x01 \x01(\t\x12\x10\n\x08\x63ur_step\x18\x02 \x01(\x05\x12\x0f\n\x07\x62\x61\x63kend\x18\x03 \x01(\t\x12\x10\n\x08\x63ur_node\x18\x04 \x01(\t\x12\x15\n\rtraining_done\x18\x05 \x01(\x08\x12\x11\n\tgraph_num\x18\x06 \x01(\x05\x12\x12\n\nms_version\x18\x07 \x01(\t\")\n\x05\x43hunk\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\x12\x10\n\x08\x66inished\x18\x02 \x01(\x08\"\x87\x02\n\nEventReply\x12+\n\x06status\x18\x01 \x01(\x0e\x32\x1b.debugger.EventReply.Status\x12\x0e\n\x04\x65xit\x18\x02 \x01(\x08H\x00\x12#\n\x07run_cmd\x18\x03 \x01(\x0b\x32\x10.debugger.RunCMDH\x00\x12#\n\x07set_cmd\x18\x04 \x01(\x0b\x32\x10.debugger.SetCMDH\x00\x12%\n\x08view_cmd\x18\x05 \x01(\x0b\x32\x11.debugger.ViewCMDH\x00\x12\x19\n\x0fversion_matched\x18\x06 \x01(\x08H\x00\")\n\x06Status\x12\x06\n\x02OK\x10\x00\x12\n\n\x06\x46\x41ILED\x10\x01\x12\x0b\n\x07PENDING\x10\x02\x42\x05\n\x03\x63md\"L\n\x06RunCMD\x12\x11\n\trun_level\x18\x01 \x01(\t\x12\x13\n\trun_steps\x18\x02 \x01(\x05H\x00\x12\x13\n\tnode_name\x18\x03 \x01(\tH\x00\x42\x05\n\x03\x63md\"\x81\x01\n\x06SetCMD\x12(\n\x0bwatch_nodes\x18\x01 \x03(\x0b\x32\x13.debugger.WatchNode\x12\x31\n\x0fwatch_condition\x18\x02 \x01(\x0b\x32\x18.debugger.WatchCondition\x12\x0e\n\x06\x64\x65lete\x18\x03 \x01(\x08\x12\n\n\x02id\x18\x04 \x01(\x05\"1\n\x07ViewCMD\x12&\n\x07tensors\x18\x01 \x03(\x0b\x32\x15.debugger.TensorProto\"\x81\x04\n\x0eWatchCondition\x12\x35\n\tcondition\x18\x01 \x01(\x0e\x32\".debugger.WatchCondition.Condition\x12\r\n\x05value\x18\x02 \x01(\x02\x12\x32\n\x06params\x18\x04 \x03(\x0b\x32\".debugger.WatchCondition.Parameter\x1a]\n\tParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64isabled\x18\x02 \x01(\x08\x12\r\n\x05value\x18\x03 \x01(\x01\x12\x0b\n\x03hit\x18\x04 \x01(\x08\x12\x14\n\x0c\x61\x63tual_value\x18\x05 \x01(\x01\"\x95\x02\n\tCondition\x12\x07\n\x03nan\x10\x00\x12\x0c\n\x08overflow\x10\x02\x12\t\n\x05sd_gt\x10\x0b\x12\t\n\x05sd_lt\x10\x0c\x12\x1b\n\x17tensor_general_overflow\x10\r\x12\x19\n\x15tensor_initialization\x10\x0e\x12\x14\n\x10tensor_too_large\x10\x0f\x12\x14\n\x10tensor_too_small\x10\x10\x12\x13\n\x0ftensor_all_zero\x10\x11\x12\x1b\n\x17tensor_change_too_large\x10\x12\x12\x1b\n\x17tensor_change_too_small\x10\x13\x12\x16\n\x12tensor_not_changed\x10\x14\x12\x10\n\x0ctensor_range\x10\x15\"1\n\tWatchNode\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x11\n\tnode_type\x18\x02 \x01(\t\"\x89\x01\n\rWatchpointHit\x12%\n\x06tensor\x18\x01 \x01(\x0b\x32\x15.debugger.TensorProto\x12\x31\n\x0fwatch_condition\x18\x02 \x01(\x0b\x32\x18.debugger.WatchCondition\x12\n\n\x02id\x18\x03 \x01(\x05\x12\x12\n\nerror_code\x18\x04 \x01(\x05\x32\x81\x03\n\rEventListener\x12\x35\n\x07WaitCMD\x12\x12.debugger.Metadata\x1a\x14.debugger.EventReply\"\x00\x12:\n\x0cSendMetadata\x12\x12.debugger.Metadata\x1a\x14.debugger.EventReply\"\x00\x12\x36\n\tSendGraph\x12\x0f.debugger.Chunk\x1a\x14.debugger.EventReply\"\x00(\x01\x12>\n\x0bSendTensors\x12\x15.debugger.TensorProto\x1a\x14.debugger.EventReply\"\x00(\x01\x12G\n\x12SendWatchpointHits\x12\x17.debugger.WatchpointHit\x1a\x14.debugger.EventReply\"\x00(\x01\x12<\n\x0fSendMultiGraphs\x12\x0f.debugger.Chunk\x1a\x14.debugger.EventReply\"\x00(\x01\x62\x06proto3')
,
dependencies=[mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2.DESCRIPTOR,])

@@ -64,94 +64,58 @@ _WATCHCONDITION_CONDITION = _descriptor.EnumDescriptor(
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='inf', index=1, number=1,
name='overflow', index=1, number=2,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='overflow', index=2, number=2,
name='sd_gt', index=2, number=11,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='max_gt', index=3, number=3,
name='sd_lt', index=3, number=12,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='max_lt', index=4, number=4,
name='tensor_general_overflow', index=4, number=13,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='min_gt', index=5, number=5,
name='tensor_initialization', index=5, number=14,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='min_lt', index=6, number=6,
name='tensor_too_large', index=6, number=15,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='max_min_gt', index=7, number=7,
name='tensor_too_small', index=7, number=16,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='max_min_lt', index=8, number=8,
name='tensor_all_zero', index=8, number=17,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='mean_gt', index=9, number=9,
name='tensor_change_too_large', index=9, number=18,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='mean_lt', index=10, number=10,
name='tensor_change_too_small', index=10, number=19,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='sd_gt', index=11, number=11,
name='tensor_not_changed', index=11, number=20,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='sd_lt', index=12, number=12,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='tensor_general_overflow', index=13, number=13,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='tensor_initialization', index=14, number=14,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='tensor_too_large', index=15, number=15,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='tensor_too_small', index=16, number=16,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='tensor_all_zero', index=17, number=17,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='tensor_change_too_large', index=18, number=18,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='tensor_change_too_small', index=19, number=19,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='tensor_not_changed', index=20, number=20,
serialized_options=None,
type=None),
_descriptor.EnumValueDescriptor(
name='tensor_range', index=21, number=21,
name='tensor_range', index=12, number=21,
serialized_options=None,
type=None),
],
containing_type=None,
serialized_options=None,
serialized_start=1056,
serialized_end=1448,
serialized_end=1333,
)
_sym_db.RegisterEnumDescriptor(_WATCHCONDITION_CONDITION)

@@ -568,7 +532,7 @@ _WATCHCONDITION = _descriptor.Descriptor(
oneofs=[
],
serialized_start=820,
serialized_end=1448,
serialized_end=1333,
)


@@ -605,8 +569,8 @@ _WATCHNODE = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=1450,
serialized_end=1499,
serialized_start=1335,
serialized_end=1384,
)


@@ -657,8 +621,8 @@ _WATCHPOINTHIT = _descriptor.Descriptor(
extension_ranges=[],
oneofs=[
],
serialized_start=1502,
serialized_end=1639,
serialized_start=1387,
serialized_end=1524,
)

_EVENTREPLY.fields_by_name['status'].enum_type = _EVENTREPLY_STATUS
@@ -786,8 +750,8 @@ _EVENTLISTENER = _descriptor.ServiceDescriptor(
file=DESCRIPTOR,
index=0,
serialized_options=None,
serialized_start=1642,
serialized_end=2027,
serialized_start=1527,
serialized_end=1912,
methods=[
_descriptor.MethodDescriptor(
name='WaitCMD',


+ 0
- 1
mindinsight/debugger/stream_cache/watchpoint.py View File

@@ -31,7 +31,6 @@ WATCHPOINT_CONDITION_MAPPING = {
ConditionIdEnum.GRADIENT_VANISHING.value: WatchCondition.Condition.tensor_too_small,
ConditionIdEnum.OPERATOR_OVERFLOW.value: WatchCondition.Condition.overflow,
ConditionIdEnum.TENSOR_ALL_ZERO.value: WatchCondition.Condition.tensor_all_zero,
ConditionIdEnum.TENSOR_INITIALIZATION.value: WatchCondition.Condition.tensor_initialization,
ConditionIdEnum.TENSOR_OVERFLOW.value: WatchCondition.Condition.tensor_general_overflow,
ConditionIdEnum.TENSOR_RANGE.value: WatchCondition.Condition.tensor_range,
ConditionIdEnum.TENSOR_TOO_LARGE.value: WatchCondition.Condition.tensor_too_large,


+ 22
- 8
mindinsight/debugger/stream_handler/watchpoint_handler.py View File

@@ -436,23 +436,18 @@ class WatchpointHitHandler(StreamHandlerBase):
"""
res = {}
watch_points = []
error_codes = set()
for tensor_hit in tensor_hits:
error_code = tensor_hit.error_code
error_list = _get_error_list(error_code)
watchpoint = tensor_hit.watchpoint
watchpoint['error_code'] = error_code
watchpoint['error_list'] = error_list
watch_points.append(watchpoint)
error_codes.add(error_code)

summarized_error_code = error_codes.pop()
while error_codes:
temp = error_codes.pop()
summarized_error_code = summarized_error_code | temp

if watch_points:
res = {
'slot': slot,
'summarized_error_code': summarized_error_code,
'watch_points': watch_points
}
return res
@@ -617,3 +612,22 @@ def set_default_param(condition_mgr, watch_condition):
})
watch_condition["abbr"] = condition.abbr
return watch_condition


def _get_error_list(error_code):
"""
Get error list.
Args:
error_code (int): the code of errors.

Returns:
list, the error list.
"""
all_error_list = ["nan", "inf", "no_prev_tensor"]
error_list = []
for i, error_str in enumerate(all_error_list):
error = (error_code >> i) & 1
if error == 1:
error_list.append(error_str)

return error_list

+ 7
- 1
mindinsight/debugger/stream_operator/watchpoint_operator.py View File

@@ -21,7 +21,7 @@ from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValue
from mindinsight.debugger.common.log import LOGGER as log
from mindinsight.debugger.common.utils import ServerStatus, \
Streams, is_cst_type
from mindinsight.debugger.conditionmgr.condition import ConditionIdEnum, TargetTypeEnum
from mindinsight.debugger.conditionmgr.condition import ConditionIdEnum, TargetTypeEnum, ConditionContext
from mindinsight.debugger.conditionmgr.recommender import get_basic_node_info
from mindinsight.debugger.stream_handler.watchpoint_handler import validate_watch_condition

@@ -78,6 +78,12 @@ class WatchpointOperator:
validate_watch_condition(self._condition_mgr, watch_condition)
condition_id = watch_condition.get('id')
condition = self._condition_mgr.get_condition(condition_id)
condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step)
if not condition.is_available(condition_context):
log.error("Failed to create watchpoint as the condition is not available.")
raise DebuggerCreateWatchPointError(
"Failed to create watchpoint as the condition is not available.")

if condition.supported_target_type in [TargetTypeEnum.ACTIVATION, TargetTypeEnum.GRADIENT,
TargetTypeEnum.WEIGHT]:
watch_nodes = get_basic_node_info(condition.supported_target_type.value, self._graph_stream).copy()


+ 1
- 1
tests/st/func/debugger/expect_results/restful_results/retrieve_tensor_graph-0.json View File

@@ -1 +1 @@
{"graph": {"nodes": [{"name": "Default/args0", "full_name": "Default/args0", "type": "Parameter", "input": {}, "output": {"Default/TransData-op99": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "slots": [{"slot": "0", "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3], "has_prev_step": true}], "graph_name": "graph_0"}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", "type": "Cast", "input": {"Default/TransData-op99": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "output": {}, "slots": [{"slot": "0", "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3]}], "graph_name": "graph_0"}, {"name": "Default/TransData-op99", "full_name": "Default/TransData-op99", "type": "TransData", "input": {"Default/args0": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "output": {"Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "slots": [{"slot": "0", "summarized_error_code": 0, "watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0, "actual_value": null}], "abbr": "TL"}, "error_code": 0}], "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3]}], "graph_name": "graph_0"}]}}
{"graph": {"nodes": [{"name": "Default/args0", "full_name": "Default/args0", "type": "Parameter", "input": {}, "output": {"Default/TransData-op99": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "slots": [{"slot": "0", "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3], "has_prev_step": true}], "graph_name": "graph_0"}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", "type": "Cast", "input": {"Default/TransData-op99": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "output": {}, "slots": [{"slot": "0", "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3]}], "graph_name": "graph_0"}, {"name": "Default/TransData-op99", "full_name": "Default/TransData-op99", "type": "TransData", "input": {"Default/args0": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "output": {"Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190": {"shape": [[32, 1, 32, 32]], "edge_type": "data", "independent_layout": false, "data_type": "DT_TENSOR[DT_FLOAT32]", "slot_mapping": [["0", ""]]}}, "slots": [{"slot": "0", "watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0, "actual_value": null}], "abbr": "TL"}, "error_code": 0, "error_list": []}], "statistics": {"overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "shape": [2, 3]}], "graph_name": "graph_0"}]}}

+ 1
- 1
tests/st/func/debugger/expect_results/restful_results/retrieve_watchpoint_hit.json View File

@@ -1 +1 @@
{"watch_point_hits": [{"node_name": "Default/TransData-op99", "tensors": [{"slot": "0", "summarized_error_code": 0, "watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0, "actual_value": null}], "abbr": "TL"}, "error_code": 0}]}], "graph_name": "graph_0"}, {"node_name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25", "tensors": [{"slot": "0", "summarized_error_code": 0, "watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0, "actual_value": null}], "abbr": "TL"}, "error_code": 0}]}], "graph_name": "graph_0"}], "outdated": false}
{"watch_point_hits": [{"node_name": "Default/TransData-op99", "tensors": [{"slot": "0", "watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0, "actual_value": null}], "abbr": "TL"}, "error_code": 0, "error_list": []}]}], "graph_name": "graph_0"}, {"node_name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25", "tensors": [{"slot": "0", "watch_points": [{"id": 1, "watch_condition": {"id": "tensor_too_large", "params": [{"name": "max_gt", "value": 1.0, "actual_value": null}], "abbr": "TL"}, "error_code": 0, "error_list": []}]}], "graph_name": "graph_0"}], "outdated": false}

+ 1
- 1
tests/ut/debugger/expected_results/watchpoint/watchpoint_hit_handler_get_0.json View File

@@ -5,11 +5,11 @@
"tensors": [
{
"slot": "0",
"summarized_error_code": 0,
"watch_points": [
{
"id": 1,
"error_code": 0,
"error_list": [],
"watch_condition": {
"condition": "MAX_GT",
"param": 1


+ 1
- 0
tests/ut/debugger/test_debugger_server.py View File

@@ -193,6 +193,7 @@ class TestDebuggerServer:
self._server.create_watchpoint({'watch_condition': {'id': 'inf'}})

@mock.patch.object(MetadataHandler, 'state', 'waiting')
@mock.patch.object(MetadataHandler, 'backend', 'GPU')
@mock.patch.object(GraphHandler, 'get_node_basic_info', return_value=MagicMock())
@mock.patch.object(GraphHandler, 'get_node_type', return_value='aggregation_scope')
@mock.patch.object(WatchpointHandler, 'create_watchpoint')


Loading…
Cancel
Save