You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

debug_grpc.proto 3.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. /**
  2. * Copyright 2020 Huawei Technologies Co., Ltd
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. syntax = "proto3";
  17. package debugger;
  18. import "mindinsight/debugger/proto/ms_graph.proto";
  19. service EventListener {
  20. rpc WaitCMD (Metadata) returns (EventReply) {};
  21. rpc SendMetadata (Metadata) returns (EventReply) {};
  22. rpc SendGraph (stream Chunk) returns (EventReply) {};
  23. rpc SendTensors (stream TensorProto) returns (EventReply) {};
  24. rpc SendWatchpointHits (stream WatchpointHit) returns (EventReply) {};
  25. rpc SendMultiGraphs (stream Chunk) returns (EventReply) {};
  26. }
  27. message Metadata {
  28. string device_name = 1;
  29. int32 cur_step = 2;
  30. // define the backend is 'GPU' or 'Ascend'
  31. string backend = 3;
  32. // the full name of current node
  33. string cur_node = 4;
  34. // check if training is done.
  35. bool training_done = 5;
  36. // the number of total graphs
  37. int32 graph_num = 6;
  38. // the version number of mindspore
  39. string ms_version = 7;
  40. }
  41. message Chunk {
  42. bytes buffer = 1;
  43. bool finished = 2;
  44. }
  45. message EventReply {
  46. enum Status {
  47. OK = 0;
  48. FAILED = 1;
  49. PENDING = 2;
  50. }
  51. Status status = 1;
  52. oneof cmd {
  53. bool exit = 2;
  54. RunCMD run_cmd = 3;
  55. SetCMD set_cmd = 4;
  56. ViewCMD view_cmd = 5;
  57. bool version_matched = 6;
  58. }
  59. }
  60. message RunCMD {
  61. // step level or node level. "step", "node" or "recheck".
  62. string run_level = 1;
  63. oneof cmd {
  64. int32 run_steps = 2;
  65. // the next node full name
  66. string node_name = 3;
  67. }
  68. }
  69. message SetCMD {
  70. repeated WatchNode watch_nodes = 1;
  71. WatchCondition watch_condition = 2;
  72. bool delete = 3;
  73. int32 id = 4;
  74. }
  75. message ViewCMD {
  76. repeated TensorProto tensors = 1;
  77. }
  78. message WatchCondition {
  79. enum Condition {
  80. // nan won't be not used anymore, but the first enum value must be zero in proto3, so we keep this Enum member.
  81. nan = 0;
  82. overflow = 2;
  83. sd_gt = 11;
  84. sd_lt = 12;
  85. tensor_general_overflow = 13;
  86. tensor_initialization = 14;
  87. tensor_too_large = 15;
  88. tensor_too_small = 16;
  89. tensor_all_zero = 17;
  90. tensor_change_too_large = 18;
  91. tensor_change_too_small = 19;
  92. tensor_not_changed = 20;
  93. tensor_range = 21;
  94. }
  95. Condition condition = 1;
  96. float value = 2;
  97. message Parameter {
  98. string name = 1;
  99. bool disabled = 2;
  100. double value = 3;
  101. bool hit = 4; // Whether this parameter is hit when checking tensor.
  102. double actual_value = 5;
  103. }
  104. // The ID 3 has been used on the mindspore side repeated bool include=3, so skip 3 for backward compatibility.
  105. repeated Parameter params = 4;
  106. }
  107. message WatchNode {
  108. string node_name = 1;
  109. string node_type = 2;
  110. string graph_name = 3;
  111. int32 rank_id = 4;
  112. int32 device_id = 5;
  113. }
  114. message WatchpointHit {
  115. TensorProto tensor = 1;
  116. WatchCondition watch_condition = 2;
  117. int32 id = 3;
  118. int32 error_code = 4;
  119. }