| @@ -0,0 +1,26 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Module init file.""" | |||
| from mindinsight.backend.conditionmgr.conditionmgr_api import init_module as init_query_module | |||
| def init_module(app): | |||
| """ | |||
| Init module entry. | |||
| Args: | |||
| app (Flask): A Flask instance. | |||
| """ | |||
| init_query_module(app) | |||
| @@ -0,0 +1,46 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Conditionmgr restful api.""" | |||
| from flask import Blueprint | |||
| from mindinsight.conf import settings | |||
| from mindinsight.backend.debugger.debugger_api import BACKEND_SERVER, _wrap_reply | |||
| BLUEPRINT = Blueprint("conditionmgr", __name__, | |||
| url_prefix=settings.URL_PATH_PREFIX + settings.API_PREFIX) | |||
| @BLUEPRINT.route("/conditionmgr/train-jobs/<train_id>/conditions", methods=["GET"]) | |||
| def get_conditions(train_id): | |||
| """get conditions""" | |||
| reply = _wrap_reply(BACKEND_SERVER.get_conditions, train_id) | |||
| return reply | |||
| @BLUEPRINT.route("/conditionmgr/train-jobs/<train_id>/condition-collections", methods=["GET"]) | |||
| def get_condition_collections(train_id): | |||
| """get condition collections""" | |||
| reply = _wrap_reply(BACKEND_SERVER.get_condition_collections, train_id) | |||
| return reply | |||
| def init_module(app): | |||
| """ | |||
| Init module entry. | |||
| Args: | |||
| app (Flask): The application obj. | |||
| """ | |||
| app.register_blueprint(BLUEPRINT) | |||
| @@ -88,11 +88,16 @@ def search(): | |||
| str, the required data. | |||
| Examples: | |||
| >>> Get http://xxxx/v1/mindinsight/debugger/retrive?mode=all | |||
| >>> Get http://xxxx/v1/mindinsight/debugger/search?name=mock_name&watch_point_id=1 | |||
| """ | |||
| name = request.args.get('name') | |||
| graph_name = request.args.get('graph_name') | |||
| watch_point_id = int(request.args.get('watch_point_id', 0)) | |||
| reply = _wrap_reply(BACKEND_SERVER.search, name, watch_point_id) | |||
| node_category = request.args.get('node_category') | |||
| reply = _wrap_reply(BACKEND_SERVER.search, {'name': name, | |||
| 'graph_name': graph_name, | |||
| 'watch_point_id': watch_point_id, | |||
| 'node_category': node_category}) | |||
| return reply | |||
| @@ -109,9 +114,10 @@ def retrieve_node_by_bfs(): | |||
| >>> Get http://xxxx/v1/mindinsight/debugger/retrieve_node_by_bfs?name=node_name&ascend=true | |||
| """ | |||
| name = request.args.get('name') | |||
| graph_name = request.args.get('graph_name') | |||
| ascend = request.args.get('ascend', 'false') | |||
| ascend = ascend == 'true' | |||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_node_by_bfs, name, ascend) | |||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_node_by_bfs, name, graph_name, ascend) | |||
| return reply | |||
| @@ -167,7 +173,8 @@ def retrieve_tensor_history(): | |||
| """ | |||
| body = _read_post_request(request) | |||
| name = body.get('name') | |||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_tensor_history, name) | |||
| graph_name = body.get('graph_name') | |||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_tensor_history, name, graph_name) | |||
| return reply | |||
| @@ -180,12 +187,15 @@ def retrieve_tensor_value(): | |||
| str, the required data. | |||
| Examples: | |||
| >>> GET http://xxxx/v1/mindinsight/debugger/tensors?name=node_name&detail=data&shape=[1,1,:,:] | |||
| >>> GET http://xxxx/v1/mindinsight/debugger/tensors?name=tensor_name&detail=data&shape=[1,1,:,:] | |||
| """ | |||
| name = request.args.get('name') | |||
| detail = request.args.get('detail') | |||
| shape = request.args.get('shape') | |||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_tensor_value, name, detail, shape) | |||
| graph_name = request.args.get('graph_name') | |||
| prev = bool(request.args.get('prev') == 'true') | |||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_tensor_value, name, detail, shape, graph_name, prev) | |||
| return reply | |||
| @@ -199,7 +209,6 @@ def create_watchpoint(): | |||
| Raises: | |||
| MindInsightException: If method fails to be called. | |||
| ParamValueError: If parsing json data search_condition fails. | |||
| Examples: | |||
| >>> POST http://xxxx/v1/mindinsight/debugger/create_watchpoint | |||
| @@ -207,9 +216,12 @@ def create_watchpoint(): | |||
| body = _read_post_request(request) | |||
| condition = body.get('condition') | |||
| graph_name = body.get('graph_name') | |||
| watch_nodes = body.get('watch_nodes') | |||
| watch_point_id = body.get('watch_point_id') | |||
| reply = _wrap_reply(BACKEND_SERVER.create_watchpoint, condition, watch_nodes, watch_point_id) | |||
| search_pattern = body.get('search_pattern') | |||
| reply = _wrap_reply(BACKEND_SERVER.create_watchpoint, | |||
| condition, watch_nodes, watch_point_id, search_pattern, graph_name) | |||
| return reply | |||
| @@ -223,7 +235,6 @@ def update_watchpoint(): | |||
| Raises: | |||
| MindInsightException: If method fails to be called. | |||
| ParamValueError: If parsing json data search_condition fails. | |||
| Examples: | |||
| >>> POST http://xxxx/v1/mindinsight/debugger/update_watchpoint | |||
| @@ -232,10 +243,10 @@ def update_watchpoint(): | |||
| watch_point_id = body.get('watch_point_id') | |||
| watch_nodes = body.get('watch_nodes') | |||
| graph_name = body.get('graph_name') | |||
| mode = body.get('mode') | |||
| name = body.get('name') | |||
| reply = _wrap_reply(BACKEND_SERVER.update_watchpoint, watch_point_id, watch_nodes, mode, name) | |||
| pattern = body.get('search_pattern') | |||
| reply = _wrap_reply(BACKEND_SERVER.update_watchpoint, watch_point_id, watch_nodes, mode, pattern, graph_name) | |||
| return reply | |||
| @@ -249,7 +260,6 @@ def delete_watchpoint(): | |||
| Raises: | |||
| MindInsightException: If method fails to be called. | |||
| ParamValueError: If parsing json data search_condition fails. | |||
| Examples: | |||
| >>> POST http://xxxx/v1/mindinsight/debugger/delete_watchpoint | |||
| @@ -273,7 +283,6 @@ def control(): | |||
| Raises: | |||
| MindInsightException: If method fails to be called. | |||
| ParamValueError: If parsing json data search_condition fails. | |||
| Examples: | |||
| >>> POST http://xxxx/v1/mindinsight/debugger/control | |||
| @@ -284,6 +293,59 @@ def control(): | |||
| return reply | |||
| @BLUEPRINT.route("/debugger/recheck", methods=["POST"]) | |||
| def recheck(): | |||
| """ | |||
| Recheck request. | |||
| Returns: | |||
| str, reply message. | |||
| Raises: | |||
| MindInsightException: If method fails to be called. | |||
| Examples: | |||
| >>> POST http://xxxx/v1/mindinsight/debugger/recheck | |||
| """ | |||
| reply = _wrap_reply(BACKEND_SERVER.recheck) | |||
| return reply | |||
| @BLUEPRINT.route("/debugger/tensor_graphs", methods=["GET"]) | |||
| def retrieve_tensor_graph(): | |||
| """ | |||
| Retrieve tensor value according to name and shape. | |||
| Returns: | |||
| str, the required data. | |||
| Examples: | |||
| >>> GET http://xxxx/v1/mindinsight/debugger/tensor_graphs?tensor_name=tensor_name$graph_name=graph_name | |||
| """ | |||
| tensor_name = request.args.get('tensor_name') | |||
| graph_name = request.args.get('graph_name') | |||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_tensor_graph, tensor_name, graph_name) | |||
| return reply | |||
| @BLUEPRINT.route("/debugger/tensor_hits", methods=["GET"]) | |||
| def retrieve_tensor_hits(): | |||
| """ | |||
| Retrieve tensor value according to name and shape. | |||
| Returns: | |||
| str, the required data. | |||
| Examples: | |||
| >>> GET http://xxxx/v1/mindinsight/debugger/tensor_hits?tensor_name=tensor_name$graph_name=graph_name | |||
| """ | |||
| tensor_name = request.args.get('tensor_name') | |||
| graph_name = request.args.get('graph_name') | |||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_tensor_hits, tensor_name, graph_name) | |||
| return reply | |||
| BACKEND_SERVER = _initialize_debugger_server() | |||
| @@ -0,0 +1,15 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Provide condition manager function.""" | |||
| @@ -0,0 +1,15 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Common module.""" | |||
| @@ -0,0 +1,18 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Utils module.""" | |||
| from collections import namedtuple | |||
| NodeBasicInfo = namedtuple('node_basic_info', ['name', 'full_name', 'type']) | |||
| @@ -0,0 +1,232 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Management of all conditions. | |||
| This module is used to register all conditions, as well as their parameters. | |||
| This module also provide the available conditions to condition_collections api. | |||
| """ | |||
| from enum import Enum | |||
| from mindinsight.conditionmgr.log import logger | |||
| class ConditionIdEnum(Enum): | |||
| """Condition ids.""" | |||
| WEIGHT_INITIALIZATION = "weight_initialization" | |||
| WEIGHT_OVERFLOW = "weight_overflow" | |||
| WEIGHT_TOO_LARGE = "weight_too_large" | |||
| WEIGHT_TOO_SMALL = "weight_too_small" | |||
| GRADIENT_VANISHING = "gradient_vanishing" | |||
| GRADIENT_TOO_LARGE = "gradient_too_large" | |||
| GRADIENT_EXPLODING = "gradient_exploding" | |||
| TENSOR_OVERFLOW = "tensor_overflow" | |||
| OPERATOR_OVERFLOW = "operator_overflow" | |||
| NAN = "nan" | |||
| OVERFLOW_ASCEND_CHIP = "overflow" | |||
| INF = "inf" | |||
| MAX_GT = "max_gt" | |||
| MAX_LT = "max_lt" | |||
| MIN_GT = "min_gt" | |||
| MIN_LT = "min_lt" | |||
| MAX_MIN_GT = "max_min_gt" | |||
| MAX_MIN_LT = "max_min_lt" | |||
| MEAN_GT = "mean_gt" | |||
| MEAN_LT = "mean_lt" | |||
| TENSOR_INITIALIZATION = "tensor_initialization" | |||
| TENSOR_TOO_LARGE = "tensor_too_large" | |||
| TENSOR_TOO_SMALL = "tensor_too_small" | |||
| TENSOR_ALL_ZERO = "tensor_all_zero" | |||
| WEIGHT_NOT_CHANGED = "weight_not_changed" | |||
| WEIGHT_CHANGE_TOO_LARGE = "weight_change_too_large" | |||
| WEIGHT_CHANGE_TOO_SMALL = "weight_change_too_small" | |||
| TENSOR_CHANGE_TOO_LARGE = "tensor_change_too_large" | |||
| TENSOR_CHANGE_TOO_SMALL = "tensor_change_too_small" | |||
| TENSOR_NOT_CHANGED = "tensor_not_changed" | |||
| class OptimizePhaseEnum(Enum): | |||
| """Optimize phases.""" | |||
| TENSOR_CHECK = 400 | |||
| OPERATOR_CHECK = 100 | |||
| LOSS_CHECK = 300 | |||
| INPUT_DATA_CHECK = 200 | |||
| class ValueTypeEnum(Enum): | |||
| """Value types.""" | |||
| FLOAT64 = 1 | |||
| INT64 = 2 | |||
| BOOL = 3 | |||
| class PlatformEnum(Enum): | |||
| """Platform types.""" | |||
| GPU = "GPU" | |||
| ASCEND = "Ascend" | |||
| class TargetTypeEnum(Enum): | |||
| """Target types.""" | |||
| TENSOR = 'tensor' | |||
| WEIGHT = 'weight' | |||
| ACTIVATION = 'activation' | |||
| GRADIENT = 'gradient' | |||
| class ConditionContext: | |||
| """ | |||
| The class for condition context. | |||
| Args: | |||
| backend (str): parameter name. | |||
| step (int): the type of value. | |||
| debugger_capability (tuple): whether the param support no assignment. | |||
| """ | |||
| def __init__(self, backend, step=0, debugger_capability=(1, 0)): | |||
| self._backend = backend | |||
| self._step = step | |||
| self._debugger_capability = debugger_capability | |||
| @property | |||
| def backend(self): | |||
| """Get backend.""" | |||
| return self._backend | |||
| @property | |||
| def step(self): | |||
| """Get _step.""" | |||
| return self._step | |||
| @property | |||
| def debugger_capability(self): | |||
| """Get debugger_capability.""" | |||
| return self._debugger_capability | |||
| class ConditionParameter: | |||
| """ | |||
| The class for parameters of conditions. | |||
| Args: | |||
| name (str): parameter name. | |||
| value_type (ValueTypeEnum): the type of value. | |||
| support_disable (bool): whether the param support no assignment. | |||
| default_value (float): default value. | |||
| visible_on_ui (bool): whether the param visible on ui. | |||
| """ | |||
| def __init__(self, name, value_type: ValueTypeEnum, support_disable=True, default_value=None, visible_on_ui=True): | |||
| self._name = name | |||
| self._type = value_type | |||
| self._support_disable = support_disable | |||
| self._default_value = default_value | |||
| self._visible_on_ui = visible_on_ui | |||
| @property | |||
| def name(self): | |||
| """Get name of parameter.""" | |||
| return self._name | |||
| @property | |||
| def type(self): | |||
| """Get type of parameter.""" | |||
| return self._type | |||
| @property | |||
| def support_disable(self): | |||
| """Get support_disable of parameter.""" | |||
| return self._support_disable | |||
| @property | |||
| def default_value(self): | |||
| """Get default_value of parameter.""" | |||
| return self._default_value | |||
| @property | |||
| def visible_on_ui(self): | |||
| """Get visible_on_ui of parameter.""" | |||
| return self._visible_on_ui | |||
| class Condition: | |||
| """ | |||
| The class for parameters of conditions. | |||
| Args: | |||
| condition_id (str): condition id. | |||
| abbr (str): the abbreviation of condition id. | |||
| optimize_phase (OptimizePhaseEnum): optimize phase. | |||
| parameters (List[ConditionParameter]): parameters. | |||
| supported_target_type (TargetTypeEnum): the supported target type. | |||
| supported_platforms (tuple[PlatformEnum, PlatformEnum]): the supported platforms. | |||
| minimum_debugger_capability (tuple): the minimum debugger capability required. | |||
| available_test_func (func): the function used to test whether the condition is available | |||
| """ | |||
| def __init__(self, condition_id, abbr, optimize_phase, parameters, supported_target_type, supported_platforms, | |||
| minimum_debugger_capability, available_test_func=None): | |||
| self.id = condition_id | |||
| self._abbr = abbr | |||
| self.optimize_phase = optimize_phase | |||
| self._parameters = { | |||
| parameter.name: parameter for parameter in parameters | |||
| } | |||
| self._supported_target_type = supported_target_type | |||
| self.supported_platforms = supported_platforms | |||
| self.minimum_debugger_capability = minimum_debugger_capability | |||
| self.available_test_func = available_test_func | |||
| def get_parameter_definition(self, name): | |||
| """Return parameter definition by the name""" | |||
| return self._parameters[name] | |||
| def is_available(self, condition_context): | |||
| """Check is the condition available.""" | |||
| backend = condition_context.backend | |||
| debugger_capability = condition_context.debugger_capability | |||
| if debugger_capability < self.minimum_debugger_capability: | |||
| logger.debug("The debugger capability is lower than the minimum debugger capability.") | |||
| return False | |||
| if backend not in [platform.value for platform in self.supported_platforms]: | |||
| logger.debug("The condition %s is not supported on the platform.", self.id) | |||
| return False | |||
| if self.available_test_func is None: | |||
| return True | |||
| return self.available_test_func(condition_context) | |||
| @property | |||
| def abbr(self): | |||
| """The abbreviation of condition""" | |||
| return self._abbr | |||
| @property | |||
| def names(self): | |||
| """The name of condition""" | |||
| return self._parameters.keys() | |||
| @property | |||
| def parameters(self): | |||
| """The parameters of condition""" | |||
| return self._parameters.values() | |||
| @property | |||
| def supported_target_type(self): | |||
| """The supported target type of condition""" | |||
| return self._supported_target_type | |||
| def check_initialization_available(condition_context): | |||
| """Check if initialization is available at this step""" | |||
| if condition_context.step == 0: | |||
| return True | |||
| return False | |||
| @@ -0,0 +1,599 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Condition list. | |||
| This module provide the detail conditions list. | |||
| """ | |||
| from mindinsight.conditionmgr.condition import Condition | |||
| from mindinsight.conditionmgr.condition import OptimizePhaseEnum | |||
| from mindinsight.conditionmgr.condition import ConditionParameter | |||
| from mindinsight.conditionmgr.condition import ValueTypeEnum | |||
| from mindinsight.conditionmgr.condition import TargetTypeEnum | |||
| from mindinsight.conditionmgr.condition import PlatformEnum | |||
| from mindinsight.conditionmgr.condition import check_initialization_available | |||
| CONDITION_LIST = [ | |||
| Condition( | |||
| condition_id="weight_initialization", | |||
| abbr="WI", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_initialization | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="zero_percentage_ge", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| default_value=100 | |||
| ), | |||
| ConditionParameter( | |||
| name="max_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="min_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.WEIGHT, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1), | |||
| available_test_func=check_initialization_available | |||
| ), | |||
| Condition( | |||
| condition_id="weight_overflow", | |||
| abbr="WO", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_general_overflow | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="weight_too_large", | |||
| abbr="WL", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_too_large | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="abs_mean_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="max_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="min_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="mean_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.WEIGHT, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="weight_too_small", | |||
| abbr="WS", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_too_small | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="abs_mean_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="max_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="min_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="mean_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.WEIGHT, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="gradient_vanishing", | |||
| abbr="GV", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_too_small | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="abs_mean_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="max_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="min_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="mean_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.GRADIENT, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="gradient_too_large", | |||
| abbr="GL", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_too_large | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="abs_mean_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="max_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="min_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="mean_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.GRADIENT, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="gradient_exploding", | |||
| abbr="GE", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_general_overflow | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[], | |||
| supported_target_type=TargetTypeEnum.GRADIENT, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="tensor_overflow", | |||
| abbr="TO", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_general_overflow | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="operator_overflow", | |||
| abbr="OO", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.overflow | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND,), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="nan", | |||
| abbr="NAN", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.nan | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.GPU,), | |||
| minimum_debugger_capability=(1, 0) | |||
| ), | |||
| Condition( | |||
| condition_id="overflow", | |||
| abbr="OVERFLOW", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.overflow | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND,), | |||
| minimum_debugger_capability=(1, 0) | |||
| ), | |||
| Condition( | |||
| condition_id="inf", | |||
| abbr="INF", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.inf | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 0) | |||
| ), | |||
| Condition( | |||
| condition_id="max_gt", | |||
| abbr="MAX>", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.max_gt | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="param", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 0) | |||
| ), | |||
| Condition( | |||
| condition_id="max_lt", | |||
| abbr="MAX<", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.max_lt | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="param", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 0) | |||
| ), | |||
| Condition( | |||
| condition_id="min_gt", | |||
| abbr="MIN>", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.min_gt | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="param", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 0) | |||
| ), | |||
| Condition( | |||
| condition_id="min_lt", | |||
| abbr="MIN<", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.min_lt | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="param", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 0) | |||
| ), | |||
| Condition( | |||
| condition_id="max_min_gt", | |||
| abbr="MAX-MIN>", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.max_min_gt | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="param", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 0) | |||
| ), | |||
| Condition( | |||
| condition_id="max_min_lt", | |||
| abbr="MAX-Min<", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.max_min_lt | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="param", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 0) | |||
| ), | |||
| Condition( | |||
| condition_id="mean_gt", | |||
| abbr="MEAN>", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.mean_gt | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="param", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 0) | |||
| ), | |||
| Condition( | |||
| condition_id="mean_lt", | |||
| abbr="MEAN<", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.mean_lt | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="param", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 0) | |||
| ), | |||
| Condition( | |||
| condition_id="tensor_initialization", | |||
| abbr="TI", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_initialization | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="zero_percentage_ge", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| default_value=100 | |||
| ), | |||
| ConditionParameter( | |||
| name="max_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="min_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1), | |||
| available_test_func=check_initialization_available | |||
| ), | |||
| Condition( | |||
| condition_id="tensor_too_large", | |||
| abbr="TL", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_too_large | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="abs_mean_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="max_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="min_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="mean_gt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="tensor_too_small", | |||
| abbr="TS", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_too_small | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="abs_mean_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="max_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="min_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ), | |||
| ConditionParameter( | |||
| name="mean_lt", | |||
| value_type=ValueTypeEnum.FLOAT64 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="tensor_all_zero", | |||
| abbr="TZ", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_all_zero | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="zero_percentage_ge", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| default_value=100 | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="weight_not_changed", | |||
| abbr="WNC", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_not_changed | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="rtol", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| default_value=1e-5 | |||
| ), | |||
| ConditionParameter( | |||
| name="atol", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| support_disable=False, | |||
| default_value=1e-8, | |||
| visible_on_ui=False | |||
| ), | |||
| ConditionParameter( | |||
| name="equal_nan", | |||
| value_type=ValueTypeEnum.BOOL, | |||
| support_disable=False, | |||
| default_value=False, | |||
| visible_on_ui=False | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.WEIGHT, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="weight_change_too_large", | |||
| abbr="WCL", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_change_too_large | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="abs_update_ratio_mean_gt", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| default_value=1e-1 | |||
| ), | |||
| ConditionParameter( | |||
| name="epsilon", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| support_disable=False, | |||
| default_value=1e-9, | |||
| visible_on_ui=False | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.WEIGHT, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="weight_change_too_small", | |||
| abbr="WCS", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_change_too_small | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="abs_update_ratio_mean_lt", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| default_value=1e-4 | |||
| ), | |||
| ConditionParameter( | |||
| name="epsilon", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| support_disable=False, | |||
| default_value=1e-9, | |||
| visible_on_ui=False | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.WEIGHT, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="tensor_change_too_large", | |||
| abbr="TCL", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_change_too_large | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="abs_update_ratio_mean_gt", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| default_value=1e-1 | |||
| ), | |||
| ConditionParameter( | |||
| name="epsilon", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| support_disable=False, | |||
| default_value=1e-9, | |||
| visible_on_ui=False | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="tensor_change_too_small", | |||
| abbr="TCS", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_change_too_small | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="abs_update_ratio_mean_lt", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| default_value=1e-4 | |||
| ), | |||
| ConditionParameter( | |||
| name="epsilon", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| support_disable=False, | |||
| default_value=1e-9, | |||
| visible_on_ui=False | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ), | |||
| Condition( | |||
| condition_id="tensor_not_changed", | |||
| abbr="TNC", | |||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_not_changed | |||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||
| parameters=[ | |||
| ConditionParameter( | |||
| name="rtol", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| default_value=1e-5 | |||
| ), | |||
| ConditionParameter( | |||
| name="atol", | |||
| value_type=ValueTypeEnum.FLOAT64, | |||
| support_disable=False, | |||
| default_value=1e-8, | |||
| visible_on_ui=False | |||
| ), | |||
| ConditionParameter( | |||
| name="equal_nan", | |||
| value_type=ValueTypeEnum.BOOL, | |||
| support_disable=False, | |||
| default_value=False, | |||
| visible_on_ui=False | |||
| ) | |||
| ], | |||
| supported_target_type=TargetTypeEnum.TENSOR, | |||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||
| minimum_debugger_capability=(1, 1) | |||
| ) | |||
| ] | |||
| @@ -0,0 +1,132 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Condition manager.. | |||
| This module provide condition manager function. | |||
| """ | |||
| from mindinsight.conditionmgr.condition import Condition | |||
| from mindinsight.conditionmgr.condition import TargetTypeEnum | |||
| from mindinsight.conditionmgr.condition_list import CONDITION_LIST | |||
| from mindinsight.conditionmgr.log import logger | |||
| class ConditionMgr: | |||
| """Condition manager.""" | |||
| def __init__(self): | |||
| self.conditions = {} | |||
| self.no_parameter_conditions = [] | |||
| self._register_default_conditions() | |||
| def _register_default_conditions(self): | |||
| """Register default condition definitions""" | |||
| self.register_conditions(CONDITION_LIST) | |||
| def register_condition(self, condition): | |||
| """Register conditions into dict""" | |||
| if not condition.parameters: | |||
| self.no_parameter_conditions.append(condition.id) | |||
| self.conditions[condition.id] = condition | |||
| def register_conditions(self, conditions): | |||
| """Register conditions""" | |||
| for condition in conditions: | |||
| self.register_condition(condition) | |||
| def get_all(self, condition_context): | |||
| """Get all register conditions.""" | |||
| conditions = [] | |||
| for condition in self.conditions.values(): | |||
| parameters = [] | |||
| if not condition.is_available(condition_context): | |||
| continue | |||
| for param in condition.parameters: | |||
| if not param.visible_on_ui: | |||
| continue | |||
| parameters.append({ | |||
| "name": param.name, | |||
| "type": param.type.name, | |||
| "support_disable": param.support_disable, | |||
| "default_value": param.default_value | |||
| }) | |||
| conditions.append({ | |||
| "id": condition.id, | |||
| "parameters": parameters, | |||
| "supported_target_type": condition.supported_target_type.name | |||
| }) | |||
| conditions = sorted(conditions, key=lambda x: x.get('id')) | |||
| return {"conditions": conditions} | |||
| def get_condition(self, condition_id) -> Condition: | |||
| """Get condition by condition id""" | |||
| return self.conditions[condition_id] | |||
| def has_condition(self, condition_id, condition_context) -> bool: | |||
| """Return if the condition exist and avilible""" | |||
| if condition_id in self.conditions: | |||
| condition = self.get_condition(condition_id) | |||
| return condition.is_available(condition_context) | |||
| logger.warning("Condition id %s not found.", condition_id) | |||
| return False | |||
| def get_no_param_condition(self) -> list: | |||
| """Return the list of condition without parameters""" | |||
| return self.no_parameter_conditions | |||
| @staticmethod | |||
| def check_and_sort(collections, target_type, reply): | |||
| """Check the collection and sort conditions""" | |||
| collection = collections.get(target_type) | |||
| if collection: | |||
| collection = sorted(collection, key=lambda x: x.get('id')) | |||
| reply.append({"id": target_type + "_condition_collection", "conditions": collection}) | |||
| else: | |||
| logger.warning("Condition collection for %s is None.", target_type) | |||
| def get_all_collections(self, condition_context): | |||
| """Get all register conditions.""" | |||
| collections = { | |||
| TargetTypeEnum.WEIGHT.value: [], TargetTypeEnum.TENSOR.value: [], TargetTypeEnum.GRADIENT.value: [], | |||
| TargetTypeEnum.ACTIVATION.value: [] | |||
| } | |||
| for condition in self.conditions.values(): | |||
| parameters = [] | |||
| if not condition.is_available(condition_context): | |||
| continue | |||
| for param in condition.parameters: | |||
| if not param.visible_on_ui: | |||
| continue | |||
| parameters.append({ | |||
| "name": param.name, | |||
| "type": param.type.name, | |||
| "support_disable": param.support_disable, | |||
| "default_value": param.default_value | |||
| }) | |||
| collections[condition.supported_target_type.value].append({ | |||
| "id": condition.id, | |||
| "parameters": parameters, | |||
| "supported_target_type": condition.supported_target_type.name, | |||
| "abbr": condition.abbr | |||
| }) | |||
| reply = [] | |||
| self.check_and_sort(collections, TargetTypeEnum.ACTIVATION.value, reply) | |||
| self.check_and_sort(collections, TargetTypeEnum.GRADIENT.value, reply) | |||
| self.check_and_sort(collections, TargetTypeEnum.TENSOR.value, reply) | |||
| self.check_and_sort(collections, TargetTypeEnum.WEIGHT.value, reply) | |||
| return reply | |||
| @@ -0,0 +1,19 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Log module""" | |||
| from mindinsight.utils.log import setup_logger | |||
| logger = setup_logger(sub_module="conditionmgr", log_name="conditionmgr") | |||
| @@ -0,0 +1,365 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Predefined watchpoints. | |||
| This module predefine recommend watchpoints. | |||
| """ | |||
| import queue as Queue | |||
| from mindinsight.conditionmgr.conditionmgr import ConditionMgr | |||
| from mindinsight.conditionmgr.condition import TargetTypeEnum | |||
| from mindinsight.conditionmgr.condition import ConditionIdEnum | |||
| from mindinsight.conditionmgr.common.utils import NodeBasicInfo | |||
| from mindinsight.conditionmgr.log import logger | |||
| from mindinsight.conf import settings | |||
| UNSELECTED_STATUS = 0 | |||
| HALF_SELECTED_STATUS = 1 | |||
| SELECTED_STATUS = 2 | |||
| class _WatchPointData: | |||
| """WatchPoint data container""" | |||
| def __init__(self, watch_condition, watch_nodes): | |||
| self.watch_condition = watch_condition | |||
| self.watch_nodes = watch_nodes | |||
| def get_watch_condition_dict(self): | |||
| return { | |||
| "id": self.watch_condition.get("condition"), | |||
| "params": [{ | |||
| "name": param.get_parameter_name(), | |||
| "disable": False, | |||
| "value": param.value | |||
| } for param in self.watch_condition.get("params")] | |||
| } | |||
| class _ConditionParameterValue: | |||
| """Condition parameter data container""" | |||
| def __init__(self, parameter, value): | |||
| self.parameter = parameter | |||
| self.value = value | |||
| def get_parameter_name(self): | |||
| return self.parameter.name | |||
| def recommend_watchpoints(condition_mgr: ConditionMgr, graph_stream, condition_context): | |||
| """ | |||
| Recommend watchpoints. | |||
| Args: | |||
| condition_mgr (ConditionMgr): Condition manager instance. | |||
| graph_stream (GraphHandler): Graph handler instance. | |||
| condition_context (ConditionContext): Context for condition. | |||
| Returns: | |||
| list[WatchPointData], watch points to be created. | |||
| """ | |||
| watch_points = [] | |||
| if not graph_stream.graph: | |||
| logger.warning("Given graph is None.") | |||
| return watch_points | |||
| if not settings.ENABLE_RECOMMENDED_WATCHPOINTS: | |||
| return watch_points | |||
| # add weight watch points | |||
| merged_info = _get_basic_node_info(TargetTypeEnum.WEIGHT.value, graph_stream) | |||
| _recommend_weight_initialization(merged_info, condition_mgr, watch_points, condition_context) | |||
| _recommend_weight_change_too_large(merged_info, condition_mgr, watch_points, condition_context) | |||
| # Because we cannot identify trainable weights currently, weight_no_change and weight_change_too_small will not be | |||
| # recommended. | |||
| trainable_weight_nodes = [] | |||
| _recommend_weight_not_changed(condition_mgr, trainable_weight_nodes, watch_points, condition_context) | |||
| _recommend_weight_change_too_small(condition_mgr, trainable_weight_nodes, watch_points, condition_context) | |||
| # add gradient watch points | |||
| merged_info = _get_basic_node_info(TargetTypeEnum.GRADIENT.value, graph_stream) | |||
| _recommend_gradient_vanishing(merged_info, condition_mgr, watch_points, condition_context) | |||
| # add tensor watch points | |||
| merged_info = _get_basic_node_info(TargetTypeEnum.TENSOR.value, graph_stream) | |||
| _recommend_overflow_ascend_chip(merged_info, condition_mgr, watch_points, condition_context) | |||
| _recommend_tensor_overflow(merged_info, condition_mgr, watch_points, condition_context) | |||
| _recommend_tensor_all_zero(merged_info, condition_mgr, watch_points, condition_context) | |||
| return watch_points | |||
| def _recommend_tensor_all_zero(basic_info_nodes, condition_mgr, watch_points, condition_context): | |||
| """Recommend tensor all zero watchpoint.""" | |||
| if not basic_info_nodes: | |||
| return | |||
| if not condition_mgr.has_condition(ConditionIdEnum.TENSOR_ALL_ZERO.value, condition_context): | |||
| return | |||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.TENSOR_ALL_ZERO.value) | |||
| tensor_all_zero_watchpoint = _WatchPointData( | |||
| watch_condition={ | |||
| "condition": condition.id, | |||
| "params": [_ConditionParameterValue( | |||
| parameter=condition.get_parameter_definition("zero_percentage_ge"), | |||
| value=100 # set default value to 100 | |||
| )] | |||
| }, | |||
| watch_nodes=basic_info_nodes.copy(), | |||
| ) | |||
| watch_points.append(tensor_all_zero_watchpoint) | |||
| def _recommend_tensor_overflow(basic_info_nodes, condition_mgr, watch_points, condition_context): | |||
| """Recommend tensor general overflow watchpoint.""" | |||
| if not basic_info_nodes: | |||
| return | |||
| if not condition_mgr.has_condition(ConditionIdEnum.TENSOR_OVERFLOW.value, condition_context): | |||
| return | |||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.TENSOR_OVERFLOW.value) | |||
| overflow_watchpoint = _WatchPointData( | |||
| watch_condition={ | |||
| "condition": condition.id, | |||
| "params": [] | |||
| }, | |||
| watch_nodes=basic_info_nodes.copy(), | |||
| ) | |||
| watch_points.append(overflow_watchpoint) | |||
| def _recommend_overflow_ascend_chip(basic_info_nodes, condition_mgr, watch_points, condition_context): | |||
| """Recommend tensor overflow watchpoint.""" | |||
| if not basic_info_nodes: | |||
| return | |||
| if not condition_mgr.has_condition(ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value, condition_context): | |||
| return | |||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value) | |||
| overflow_d_watchpoint = _WatchPointData( | |||
| watch_condition={ | |||
| "condition": condition.id, | |||
| "params": [] | |||
| }, | |||
| watch_nodes=basic_info_nodes.copy(), | |||
| ) | |||
| watch_points.append(overflow_d_watchpoint) | |||
| def _recommend_gradient_vanishing(basic_info_nodes, condition_mgr, watch_points, condition_context): | |||
| """Recommend gradient vanishing watchpoint.""" | |||
| if not basic_info_nodes: | |||
| return | |||
| if not condition_mgr.has_condition(ConditionIdEnum.GRADIENT_VANISHING.value, condition_context): | |||
| return | |||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.GRADIENT_VANISHING.value) | |||
| gradient_vanishing_watchpoint = _WatchPointData( | |||
| watch_condition={ | |||
| "condition": condition.id, | |||
| "params": [_ConditionParameterValue( | |||
| parameter=condition.get_parameter_definition("abs_mean_lt"), | |||
| value=1e-9 # set default value to 1e-9 | |||
| )] | |||
| }, | |||
| watch_nodes=basic_info_nodes.copy(), | |||
| ) | |||
| watch_points.append(gradient_vanishing_watchpoint) | |||
| def _recommend_weight_change_too_small(condition_mgr, trainable_weight_nodes, watch_points, condition_context): | |||
| """Recommend weight change too small watchpoint.""" | |||
| if not trainable_weight_nodes: | |||
| return | |||
| if not condition_mgr.has_condition(ConditionIdEnum.WEIGHT_CHANGE_TOO_SMALL.value, condition_context): | |||
| return | |||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.WEIGHT_CHANGE_TOO_SMALL.value) | |||
| weight_change_too_small_watchpoint = _WatchPointData( | |||
| watch_condition={ | |||
| "condition": condition.id, | |||
| "params": [ | |||
| _ConditionParameterValue( | |||
| parameter=condition.get_parameter_definition("abs_update_ratio_mean_lt"), | |||
| value=1.0e-4 # set default value to 1.0e-4 | |||
| ), | |||
| ] | |||
| }, | |||
| watch_nodes=trainable_weight_nodes, | |||
| ) | |||
| watch_points.append(weight_change_too_small_watchpoint) | |||
| def _recommend_weight_not_changed(condition_mgr, trainable_weight_nodes, watch_points, condition_context): | |||
| """Recommend weight not changed watchpoint.""" | |||
| if not trainable_weight_nodes: | |||
| return | |||
| if not condition_mgr.has_condition(ConditionIdEnum.WEIGHT_NOT_CHANGED.value, condition_context): | |||
| return | |||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.WEIGHT_NOT_CHANGED.value) | |||
| weight_no_change_watchpoint = _WatchPointData( | |||
| watch_condition={ | |||
| "condition": condition.id, | |||
| "params": [ | |||
| _ConditionParameterValue( | |||
| parameter=condition.get_parameter_definition("rtol"), | |||
| value=1.0e-5 # set default value to 1.0e-5 | |||
| ), | |||
| _ConditionParameterValue( | |||
| parameter=condition.get_parameter_definition("atol"), | |||
| value=1.0e-8 # set default value to 1.0e-8 | |||
| ), | |||
| ] | |||
| }, | |||
| watch_nodes=trainable_weight_nodes, | |||
| ) | |||
| watch_points.append(weight_no_change_watchpoint) | |||
| def _recommend_weight_change_too_large(basic_info_nodes, condition_mgr, watch_points, condition_context): | |||
| """Recommend weight change too large watchpoint.""" | |||
| if not basic_info_nodes: | |||
| return | |||
| if not condition_mgr.has_condition(ConditionIdEnum.WEIGHT_CHANGE_TOO_LARGE.value, condition_context): | |||
| return | |||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.WEIGHT_CHANGE_TOO_LARGE.value) | |||
| weight_initialization_watchpoint = _WatchPointData( | |||
| watch_condition={ | |||
| "condition": condition.id, | |||
| "params": [_ConditionParameterValue( | |||
| parameter=condition.get_parameter_definition("abs_update_ratio_mean_gt"), | |||
| value=0.1 # set default value to 0.1 | |||
| )] | |||
| }, | |||
| watch_nodes=basic_info_nodes.copy(), | |||
| ) | |||
| watch_points.append(weight_initialization_watchpoint) | |||
| def _recommend_weight_initialization(basic_info_nodes, condition_mgr, watch_points, condition_context): | |||
| """Recommend weight initialization watchpoint.""" | |||
| if not basic_info_nodes: | |||
| return | |||
| if not condition_mgr.has_condition(ConditionIdEnum.WEIGHT_INITIALIZATION.value, condition_context): | |||
| return | |||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.WEIGHT_INITIALIZATION.value) | |||
| weight_initialization_watchpoint = _WatchPointData( | |||
| watch_condition={ | |||
| "condition": condition.id, | |||
| "params": [_ConditionParameterValue( | |||
| parameter=condition.get_parameter_definition("zero_percentage_ge"), | |||
| value=100 # set default value to 100 | |||
| )] | |||
| }, | |||
| watch_nodes=basic_info_nodes.copy(), | |||
| ) | |||
| watch_points.append(weight_initialization_watchpoint) | |||
| def _get_basic_node_info(node_category, graph_stream): | |||
| """Get node merged info.""" | |||
| basic_info_nodes = _get_basic_node_info_by_node_category(node_category, graph_stream) | |||
| merged_info = _merge_nodes(basic_info_nodes, graph_stream.whole_graph) | |||
| merged_info = _add_graph_name(merged_info, graph_stream) | |||
| return merged_info | |||
| def _get_basic_node_info_by_node_category(node_category, graph_stream): | |||
| """Get node basic info by node category.""" | |||
| all_graph_nodes = graph_stream.get_searched_nodes(pattern={'node_category': node_category}) | |||
| basic_info_nodes = [] | |||
| for graph_name, nodes in all_graph_nodes.items(): | |||
| if len(all_graph_nodes) == 1: | |||
| logger.debug("This is a single graph") | |||
| graph_name = "" | |||
| for node in nodes: | |||
| if graph_name == "": | |||
| basic_node_info = NodeBasicInfo(name=node.name, full_name=node.full_name, type=node.type) | |||
| else: | |||
| basic_node_info = graph_stream.construct_node_basic_info( | |||
| full_name=node.full_name, graph_name=graph_name, node_name=node.name, node_type=node.type) | |||
| basic_info_nodes.append(basic_node_info) | |||
| return basic_info_nodes | |||
| def _merge_nodes(leaf_nodes, graph): | |||
| """merge nodes in one graph""" | |||
| unmerged_tree = graph.get_nodes(leaf_nodes) | |||
| tmp_node_queue = Queue.Queue() | |||
| # watch node list in layer order | |||
| watch_nodes = [] | |||
| for node in unmerged_tree: | |||
| if node["type"] != "name_scope": | |||
| # if node is leaf_node, it is totally chosen | |||
| node["status"] = SELECTED_STATUS | |||
| else: | |||
| # if node is not leaf_node, it is not chosen initially | |||
| node["status"] = UNSELECTED_STATUS | |||
| tmp_node_queue.put(node) | |||
| while not tmp_node_queue.empty(): | |||
| cur_node = tmp_node_queue.get() | |||
| watch_nodes.append(cur_node) | |||
| for sub_node in cur_node["nodes"]: | |||
| if sub_node["type"] != "name_scope": | |||
| # if node is leaf_node, it is totally chosen | |||
| sub_node["status"] = SELECTED_STATUS | |||
| else: | |||
| # if node is not leaf_node, it is not chosen initially | |||
| sub_node["status"] = UNSELECTED_STATUS | |||
| tmp_node_queue.put(sub_node) | |||
| merged_watch_nodes = [] | |||
| while watch_nodes: | |||
| cur_node = watch_nodes.pop() | |||
| node_name = cur_node["name"] | |||
| sub_count = graph.normal_node_map.get(node_name).subnode_count | |||
| if len(cur_node["nodes"]) < sub_count or not cur_node["nodes"]: | |||
| continue | |||
| is_all_chosen = True | |||
| for sub_node in cur_node["nodes"]: | |||
| if sub_node["status"] != SELECTED_STATUS: | |||
| is_all_chosen = False | |||
| break | |||
| if is_all_chosen: | |||
| cur_node["status"] = SELECTED_STATUS | |||
| merged_watch_nodes.append(cur_node) | |||
| else: | |||
| cur_node["status"] = HALF_SELECTED_STATUS | |||
| logger.debug("merged_watch_nodes: %s", merged_watch_nodes) | |||
| out_nodes = [] | |||
| for node_info in merged_watch_nodes: | |||
| node_basic_info = NodeBasicInfo(name=node_info["name"], full_name=node_info["name"], type=node_info["type"]) | |||
| out_nodes.append(node_basic_info) | |||
| logger.debug("out_nodes: %s", out_nodes) | |||
| return out_nodes | |||
| def _add_graph_name(nodes, graph_stream): | |||
| """add graph_name in node.name""" | |||
| if len(graph_stream.graph) > 1: | |||
| return nodes | |||
| graph_name = graph_stream.graph_names[0] | |||
| output_nodes = [] | |||
| for node in nodes: | |||
| node_basic_info = graph_stream.construct_node_basic_info( | |||
| full_name=node.name, graph_name=graph_name, node_name=node.name, node_type=node.type) | |||
| output_nodes.append(node_basic_info) | |||
| return output_nodes | |||
| @@ -63,3 +63,5 @@ MAX_GRAPH_STEP_SIZE_PER_TAG = 1 | |||
| MAX_HISTOGRAM_STEP_SIZE_PER_TAG = 50 | |||
| MAX_TENSOR_STEP_SIZE_PER_TAG = 20 | |||
| MAX_TENSOR_RESPONSE_DATA_SIZE = 100000 | |||
| ENABLE_RECOMMENDED_WATCHPOINTS = False | |||
| @@ -54,7 +54,7 @@ class MSGraph(Graph): | |||
| node_protos (list[anf_ir_pb2.NodeProto]): Refer to anf_ir_pb2.NodeProto. | |||
| """ | |||
| logger.debug("Start to parse op nodes from proto.") | |||
| for node_proto in node_protos: | |||
| for topological_index, node_proto in enumerate(node_protos): | |||
| if not node_proto.name: | |||
| logger.warning("Finding a node with an empty name will not save it.") | |||
| continue | |||
| @@ -69,7 +69,7 @@ class MSGraph(Graph): | |||
| # The Graphviz plug-in that the UI USES can't handle these special characters. | |||
| check_invalid_character(node_name) | |||
| node = Node(name=node_name, node_id=node_proto.name) | |||
| node = Node(name=node_name, node_id=node_proto.name, topological_index=topological_index) | |||
| node.full_name = node_proto.full_name | |||
| node.type = node_proto.op_type | |||
| @@ -35,7 +35,7 @@ class Node: | |||
| node_id (str): The id of this node, and node id is unique in graph. | |||
| """ | |||
| def __init__(self, name, node_id): | |||
| def __init__(self, name, node_id, topological_index=-1): | |||
| self._node_id = node_id | |||
| self.name = name | |||
| self.type = "" | |||
| @@ -53,6 +53,8 @@ class Node: | |||
| self.output_nums = 0 | |||
| self.elem_types = [] | |||
| self.full_name = "" | |||
| # This value will be used as the priority field. | |||
| self.topological_index = topological_index | |||
| def to_dict(self): | |||
| """Converts the node object to dictionary format.""" | |||
| @@ -16,7 +16,7 @@ | |||
| This file is used to define the node of graph and associated base types. | |||
| """ | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | |||
| from mindinsight.debugger.common.log import logger as log | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| class NodeTree: | |||
| @@ -37,6 +37,7 @@ class DebuggerErrors(DebuggerErrorCodes): | |||
| CONTINUE_ERROR = 3 | _DEBUGGER_RUNNING_ERROR | |||
| PAUSE_ERROR = 4 | _DEBUGGER_RUNNING_ERROR | |||
| COMPARE_TENSOR_ERROR = 5 | _DEBUGGER_RUNNING_ERROR | |||
| RECHECK_ERROR = 6 | _DEBUGGER_RUNNING_ERROR | |||
| @unique | |||
| @@ -52,3 +53,4 @@ class DebuggerErrorMsg(Enum): | |||
| DELETE_WATCHPOINT_ERROR = "Delete watchpoint failed. {}" | |||
| CONTINUE_ERROR = "Continue debugging failed. {}" | |||
| PAUSE_ERROR = "Pause debugging failed. {}" | |||
| RECHECK_ERROR = "Recheck failed. {}" | |||
| @@ -72,6 +72,17 @@ class DebuggerDeleteWatchPointError(MindInsightException): | |||
| ) | |||
| class DebuggerRecheckError(MindInsightException): | |||
| """The error about deleting watch point.""" | |||
| def __init__(self, msg): | |||
| super(DebuggerRecheckError, self).__init__( | |||
| error=DebuggerErrors.RECHECK_ERROR, | |||
| message=DebuggerErrorMsg.RECHECK_ERROR.value.format(msg), | |||
| http_code=400 | |||
| ) | |||
| class DebuggerCompareTensorError(MindInsightException): | |||
| """The error about comparing tensors.""" | |||
| @@ -17,4 +17,4 @@ from mindinsight.utils.log import setup_logger | |||
| LOG_NAME = "debugger" | |||
| LOG_MODULE = "debugger" | |||
| logger = setup_logger(sub_module=LOG_MODULE, log_name=LOG_NAME) | |||
| LOGGER = setup_logger(sub_module=LOG_MODULE, log_name=LOG_NAME) | |||
| @@ -14,7 +14,6 @@ | |||
| # ============================================================================ | |||
| """Define the utils.""" | |||
| import enum | |||
| from collections import namedtuple | |||
| import numpy as np | |||
| @@ -72,7 +71,12 @@ class Streams(enum.Enum): | |||
| WATCHPOINT_HIT = 'watchpoint_hit' | |||
| NodeBasicInfo = namedtuple('node_basic_info', ['name', 'full_name', 'type']) | |||
| class RunLevel(enum.Enum): | |||
| """Run Level enum, it depends on whether the program is executed node by node, | |||
| step by step, or in recheck phase""" | |||
| NODE = "node" | |||
| STEP = "step" | |||
| RECHECK = "recheck" | |||
| def get_ack_reply(state=0): | |||
| @@ -140,5 +144,4 @@ def create_view_event_from_tensor_history(tensor_history): | |||
| def is_scope_type(node_type): | |||
| """Judge whether the type is scope type.""" | |||
| scope_types = [NodeTypeEnum.NAME_SCOPE.value, NodeTypeEnum.AGGREGATION_SCOPE.value] | |||
| return node_type in scope_types | |||
| return node_type.endswith('scope') | |||
| @@ -15,7 +15,7 @@ | |||
| """Implement the debugger data cache manager.""" | |||
| import sys | |||
| from mindinsight.debugger.common.log import logger as log | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.common.utils import Streams | |||
| from mindinsight.debugger.stream_handler import EventHandler, MetadataHandler, GraphHandler, \ | |||
| TensorHandler, WatchpointHandler, WatchpointHitHandler | |||
| @@ -15,11 +15,13 @@ | |||
| """Implement the debugger grpc server.""" | |||
| from functools import wraps | |||
| from mindinsight.debugger.common.log import logger as log | |||
| import mindinsight.conditionmgr.recommender | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.common.utils import get_ack_reply, ServerStatus, \ | |||
| Streams | |||
| Streams, RunLevel | |||
| from mindinsight.debugger.proto import debug_grpc_pb2_grpc as grpc_server_base | |||
| from mindinsight.debugger.proto.ms_graph_pb2 import GraphProto | |||
| from mindinsight.conditionmgr.condition import ConditionContext | |||
| def debugger_wrap(func): | |||
| @@ -39,7 +41,7 @@ def debugger_wrap(func): | |||
| class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||
| """The grpc server used to interactive with grpc client.""" | |||
| def __init__(self, cache_store): | |||
| def __init__(self, cache_store, condition_mgr): | |||
| """ | |||
| Initialize. | |||
| @@ -48,6 +50,7 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||
| """ | |||
| cache_store.initialize() | |||
| self._cache_store = cache_store | |||
| self._condition_mgr = condition_mgr | |||
| # the next position of command queue to be queried | |||
| self._pos = None | |||
| # the status of grpc server, the value is in ServerStatus | |||
| @@ -66,7 +69,7 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||
| self._status = ServerStatus.PENDING | |||
| self._old_run_cmd = {} | |||
| self._received_view_cmd = {} | |||
| self._received_hit = False | |||
| self._received_hit = [] | |||
| self._cache_store.clean() | |||
| @debugger_wrap | |||
| @@ -90,25 +93,46 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||
| reply = get_ack_reply(1) | |||
| log.warning("Failed to get command event.") | |||
| else: | |||
| log.info("Reply to WaitCMD: %s", reply) | |||
| log.debug("Reply to WaitCMD: %s", reply) | |||
| return reply | |||
| def _add_predefined_watchpoints(self, condition_context): | |||
| """Add predefined watchpoints.""" | |||
| log.debug("Add predefined watchpoints.") | |||
| graph_stream = self._cache_store.get_stream_handler(Streams.GRAPH) | |||
| watchpoints = mindinsight.conditionmgr.recommender.recommend_watchpoints(self._condition_mgr, graph_stream, | |||
| condition_context) | |||
| watch_point_stream_handler = self._cache_store.get_stream_handler(Streams.WATCHPOINT) | |||
| for watchpoint in watchpoints: | |||
| watch_point_stream_handler.create_watchpoint( | |||
| watch_condition=watchpoint.get_watch_condition_dict(), | |||
| watch_nodes=watchpoint.watch_nodes, | |||
| condition_mgr=self._condition_mgr | |||
| ) | |||
| def _pre_process(self, request): | |||
| """Pre-process before dealing with command.""" | |||
| metadata_stream = self._cache_store.get_stream_handler(Streams.METADATA) | |||
| watchpoint_stream = self._cache_store.get_stream_handler(Streams.WATCHPOINT) | |||
| is_new_step = metadata_stream.step < request.cur_step | |||
| is_new_node = metadata_stream.full_name != request.cur_node | |||
| # clean cache data at the beginning of new step | |||
| # clean cache data at the beginning of new step or node has been changed. | |||
| if is_new_step or is_new_node: | |||
| self._cache_store.clean_data() | |||
| if is_new_step: | |||
| self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT).clean() | |||
| self._cache_store.get_stream_handler(Streams.TENSOR).clean_tensors(request.cur_step) | |||
| watchpoint_stream.clean_temp_cached_names() | |||
| # receive graph at the beginning of the training | |||
| if self._status == ServerStatus.RECEIVE_GRAPH: | |||
| condition_context = ConditionContext(backend=request.backend, debugger_capability=(1, 0)) | |||
| self._add_predefined_watchpoints(condition_context) | |||
| self._send_graph_flag(metadata_stream) | |||
| # receive new metadata | |||
| if is_new_step or is_new_node: | |||
| self._update_metadata(metadata_stream, request) | |||
| # save the full name of the node which MindSpore has stored the tensor. | |||
| watchpoint_stream.add_temp_cached_name(request.cur_node) | |||
| self._send_received_tensor_tag() | |||
| self._send_watchpoint_hit_flag() | |||
| @@ -139,9 +163,14 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||
| """ | |||
| # put new metadata into cache | |||
| metadata_stream.put(metadata_proto) | |||
| cur_node = self._cache_store.get_stream_handler(Streams.GRAPH).get_node_name_by_full_name( | |||
| metadata_proto.cur_node) if metadata_proto.cur_node else '' | |||
| # update current node name and graph name | |||
| graph_stream = self._cache_store.get_stream_handler(Streams.GRAPH) | |||
| full_name = metadata_proto.cur_node | |||
| graph_name = graph_stream.get_graph_id_by_full_name( | |||
| full_name) if full_name else metadata_stream.graph_name | |||
| cur_node = graph_stream.get_node_name_by_full_name(full_name, graph_name) | |||
| metadata_stream.node_name = cur_node | |||
| metadata_stream.graph_name = graph_name | |||
| metadata = metadata_stream.get() | |||
| self._cache_store.put_data(metadata) | |||
| log.debug("Put new metadata into data queue.") | |||
| @@ -151,7 +180,7 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||
| node_name = self._received_view_cmd.get('node_name') | |||
| if not node_name or self._received_view_cmd.get('wait_for_tensor'): | |||
| return | |||
| metadata = self._cache_store.get_stream_handler(Streams.METADATA).get() | |||
| metadata = self._cache_store.get_stream_handler(Streams.METADATA).get(['step', 'state']) | |||
| ret = {'receive_tensor': {'node_name': node_name}} | |||
| ret.update(metadata) | |||
| self._cache_store.put_data(ret) | |||
| @@ -161,9 +190,12 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||
| def _send_watchpoint_hit_flag(self): | |||
| """Send Watchpoint hit flag.""" | |||
| watchpoint_hit_stream = self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT) | |||
| if watchpoint_hit_stream.empty or not self._received_hit: | |||
| if not self._received_hit: | |||
| return | |||
| self._received_hit = False | |||
| watchpoint_hits = self._received_hit | |||
| self._received_hit = [] | |||
| for watchpoint_hit in watchpoint_hits: | |||
| watchpoint_hit_stream.put(watchpoint_hit) | |||
| watchpoint_hits_info = watchpoint_hit_stream.get() | |||
| self._cache_store.put_data(watchpoint_hits_info) | |||
| log.debug("Send the watchpoint hits to DataQueue.\nSend the reply.") | |||
| @@ -187,7 +219,6 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||
| event = self._deal_with_left_continue_step(left_step_count) | |||
| else: | |||
| event = self._deal_with_left_continue_node(node_name) | |||
| self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT).clean() | |||
| log.debug("Send old RunCMD. Clean watchpoint hit.") | |||
| return event | |||
| @@ -260,7 +291,10 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||
| event = self._deal_with_run_cmd(event) | |||
| elif event.HasField('exit'): | |||
| self._cache_store.clean() | |||
| log.info("Clean cache for exit cmd.") | |||
| log.debug("Clean cache for exit cmd.") | |||
| else: | |||
| self._cache_store.get_stream_handler(Streams.WATCHPOINT).clean_cache_set_cmd(event.set_cmd) | |||
| log.debug("get set cmd.") | |||
| return event | |||
| @@ -294,7 +328,9 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||
| elif run_cmd.node_name: | |||
| self._old_run_cmd['node_name'] = run_cmd.node_name | |||
| run_cmd.node_name = '' | |||
| self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT).clean() | |||
| # clean watchpoint hit cache | |||
| if run_cmd.run_level == RunLevel.RECHECK.value: | |||
| self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT).clean() | |||
| log.debug("Receive RunCMD. Clean watchpoint hit cache.") | |||
| return event | |||
| @@ -330,9 +366,34 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||
| for chunk in request_iterator: | |||
| serial_graph += chunk.buffer | |||
| graph = GraphProto.FromString(serial_graph) | |||
| log.debug("Deserialize the graph. Receive %s nodes", len(graph.node)) | |||
| self._cache_store.get_stream_handler(Streams.GRAPH).put(graph) | |||
| log.debug("Deserialize the graph %s. Receive %s nodes", graph.name, len(graph.node)) | |||
| graph_dict = {graph.name: graph} | |||
| self._cache_store.get_stream_handler(Streams.GRAPH).put(graph_dict) | |||
| self._cache_store.get_stream_handler(Streams.TENSOR).put_const_vals(graph.const_vals) | |||
| self._cache_store.get_stream_handler(Streams.METADATA).graph_name = graph.name | |||
| self._status = ServerStatus.RECEIVE_GRAPH | |||
| reply = get_ack_reply() | |||
| log.debug("Send the reply for graph.") | |||
| return reply | |||
| @debugger_wrap | |||
| def SendMultiGraphs(self, request_iterator, context): | |||
| """Send graph into DebuggerCache.""" | |||
| log.info("Received graph.") | |||
| serial_graph = b"" | |||
| graph_dict = {} | |||
| for chunk in request_iterator: | |||
| serial_graph += chunk.buffer | |||
| if chunk.finished: | |||
| sub_graph = GraphProto.FromString(serial_graph) | |||
| graph_dict[sub_graph.name] = sub_graph | |||
| log.debug("Deserialize the graph %s. Receive %s nodes", sub_graph.name, | |||
| len(sub_graph.node)) | |||
| serial_graph = b"" | |||
| self._cache_store.get_stream_handler(Streams.TENSOR).put_const_vals( | |||
| sub_graph.const_vals) | |||
| self._cache_store.get_stream_handler(Streams.GRAPH).put(graph_dict) | |||
| self._status = ServerStatus.RECEIVE_GRAPH | |||
| reply = get_ack_reply() | |||
| log.debug("Send the reply for graph.") | |||
| @@ -365,22 +426,30 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||
| """Send watchpoint hits info DebuggerCache.""" | |||
| log.info("Received WatchpointHits. Left run cmd %s change to emtpy.", self._old_run_cmd) | |||
| self._old_run_cmd.clear() | |||
| self._received_hit = True | |||
| watchpoint_hit_stream = self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT) | |||
| if self._cache_store.get_stream_handler(Streams.METADATA).state == ServerStatus.RUNNING.value: | |||
| # if the client session is running a script, all the cached command should be cleared | |||
| # when received watchpoint_hits. | |||
| self._cache_store.clean_command() | |||
| # save the watchpoint_hits data | |||
| watchpoint_hits = [] | |||
| watchpoint_stream = self._cache_store.get_stream_handler(Streams.WATCHPOINT) | |||
| graph_stream = self._cache_store.get_stream_handler(Streams.GRAPH) | |||
| for watchpoint_hit_proto in request_iterator: | |||
| ui_node_name = graph_stream.get_node_name_by_full_name( | |||
| watchpoint_hit_proto.tensor.node_name) | |||
| node_full_name = watchpoint_hit_proto.tensor.node_name | |||
| graph_name = graph_stream.get_graph_id_by_full_name(node_full_name) | |||
| ui_node_name = graph_stream.get_node_name_by_full_name(node_full_name, graph_name) | |||
| log.debug("Receive watch point hit: %s", watchpoint_hit_proto) | |||
| if not ui_node_name: | |||
| log.info("Not support to show %s on graph.", watchpoint_hit_proto.tensor.node_name) | |||
| log.info("Not support to show %s on graph.", node_full_name) | |||
| continue | |||
| watchpoint_hit = { | |||
| 'tensor_proto': watchpoint_hit_proto.tensor, | |||
| 'watchpoint': watchpoint_stream.get_watchpoint_by_id(watchpoint_hit_proto.id), | |||
| 'node_name': ui_node_name | |||
| 'node_name': ui_node_name, | |||
| 'graph_name': graph_name | |||
| } | |||
| watchpoint_hit_stream.put(watchpoint_hit) | |||
| watchpoint_hits.append(watchpoint_hit) | |||
| self._received_hit = watchpoint_hits | |||
| reply = get_ack_reply() | |||
| return reply | |||
| @@ -16,23 +16,26 @@ | |||
| import signal | |||
| from concurrent import futures | |||
| from threading import Thread | |||
| import grpc | |||
| from mindinsight.conditionmgr.conditionmgr import ConditionMgr | |||
| from mindinsight.conditionmgr.condition import ConditionContext, ConditionIdEnum | |||
| from mindinsight.conf import settings | |||
| from mindinsight.datavisual.data_transform.graph import NodeTypeEnum | |||
| from mindinsight.datavisual.utils.tools import to_float | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | |||
| DebuggerParamTypeError, DebuggerCreateWatchPointError, DebuggerUpdateWatchPointError, \ | |||
| DebuggerDeleteWatchPointError, DebuggerContinueError, DebuggerPauseError, \ | |||
| DebuggerCompareTensorError | |||
| from mindinsight.debugger.common.log import logger as log | |||
| DebuggerCompareTensorError, DebuggerRecheckError | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.common.utils import get_ack_reply, ServerStatus, \ | |||
| create_view_event_from_tensor_history, Streams, is_scope_type, NodeBasicInfo | |||
| create_view_event_from_tensor_history, Streams, is_scope_type, RunLevel | |||
| from mindinsight.conditionmgr.common.utils import NodeBasicInfo | |||
| from mindinsight.debugger.debugger_cache import DebuggerCache | |||
| from mindinsight.debugger.debugger_grpc_server import DebuggerGrpcServer | |||
| from mindinsight.debugger.proto import debug_grpc_pb2_grpc as grpc_server_base | |||
| from mindinsight.debugger.proto.debug_grpc_pb2 import RunCMD | |||
| from mindinsight.debugger.stream_operator.tensor_detail_info import TensorDetailInfo | |||
| from mindinsight.utils.exceptions import MindInsightException | |||
| from mindinsight.utils.tensor import TensorUtils, MAX_DIMENSIONS_FOR_TENSOR | |||
| @@ -42,11 +45,26 @@ class DebuggerServer: | |||
| def __init__(self, grpc_port=None): | |||
| self.grpc_port = grpc_port | |||
| self.condition_mgr = ConditionMgr() | |||
| self.cache_store = DebuggerCache() | |||
| self.grpc_server = DebuggerGrpcServer(self.cache_store) | |||
| self.grpc_server = DebuggerGrpcServer(self.cache_store, self.condition_mgr) | |||
| self.grpc_server_manager = None | |||
| self.back_server = None | |||
| def get_conditions(self, train_id): | |||
| """Get all default conditions""" | |||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | |||
| condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 0)) | |||
| log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend) | |||
| return self.condition_mgr.get_all(condition_context) | |||
| def get_condition_collections(self, train_id): | |||
| """Get default condition_collections""" | |||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | |||
| condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 0)) | |||
| log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend) | |||
| return self.condition_mgr.get_all_collections(condition_context) | |||
| def start(self): | |||
| """Start server.""" | |||
| grpc_port = self.grpc_port if self.grpc_port else "50051" | |||
| @@ -97,24 +115,34 @@ class DebuggerServer: | |||
| return reply | |||
| def search(self, name, watch_point_id=0): | |||
| def search(self, filter_condition): | |||
| """ | |||
| Search for single node in graph. | |||
| Args: | |||
| name (str): The name pattern. | |||
| watch_point_id (int): The id of watchpoint. Default: 0. | |||
| filter_condition (dict): Filter condition. | |||
| - name (str): The name pattern. | |||
| - graph_name (str): The graph name. | |||
| - watch_point_id (int): The id of watchpoint. Default: 0. | |||
| - node_category (str): The node_category. Default: None | |||
| Returns: | |||
| dict, the searched nodes. | |||
| """ | |||
| log.info("receive search request for node:%s, in watchpoint:%d", name, watch_point_id) | |||
| log.info("receive search request with filter_condition: %s", filter_condition) | |||
| # validate watchpoint id | |||
| watch_point_id = filter_condition.pop('watch_point_id', 0) | |||
| watchpoint_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT) | |||
| watchpoint_stream.validate_watchpoint_id(watch_point_id) | |||
| # validate and update graph name | |||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||
| graph = graph_stream.search_nodes(name) | |||
| graph_name = graph_stream.validate_graph_name(filter_condition.get('graph_name')) | |||
| filter_condition['graph_name'] = graph_name | |||
| # get searched graph | |||
| graph = graph_stream.search_nodes(filter_condition) | |||
| # add watched label to graph | |||
| watchpoint_stream.set_watch_nodes(graph, graph_stream, watch_point_id) | |||
| watchpoint_stream.set_watch_nodes(graph, graph_stream, watch_point_id, graph_name) | |||
| return graph | |||
| def tensor_comparisons(self, name, shape, detail='data', tolerance='0'): | |||
| @@ -150,7 +178,8 @@ class DebuggerServer: | |||
| if node_type == NodeTypeEnum.PARAMETER.value: | |||
| reply = tensor_stream.get_tensors_diff(tensor_name, parsed_shape, tolerance) | |||
| else: | |||
| raise DebuggerParamValueError("The node type must be parameter, but got {}.".format(node_type)) | |||
| raise DebuggerParamValueError( | |||
| "The node type must be parameter, but got {}.".format(node_type)) | |||
| return reply | |||
| def retrieve(self, mode, filter_condition=None): | |||
| @@ -196,10 +225,13 @@ class DebuggerServer: | |||
| self.cache_store.clean_data() | |||
| log.info("Clean data queue cache when retrieve all request.") | |||
| result = {} | |||
| for stream in [Streams.METADATA, Streams.GRAPH, Streams.WATCHPOINT]: | |||
| for stream in [Streams.METADATA, Streams.GRAPH]: | |||
| sub_res = self.cache_store.get_stream_handler(stream).get() | |||
| result.update(sub_res) | |||
| sub_res = self._hide_parameters_for_ui() | |||
| result.update(sub_res) | |||
| return result | |||
| def _retrieve_node(self, filter_condition): | |||
| @@ -210,10 +242,9 @@ class DebuggerServer: | |||
| filter_condition (dict): Filter condition. | |||
| - name (str): The name of single node. | |||
| - graph_name (str): The relative graph_name of the node. | |||
| - single_node (bool): If False, return the sub-layer of single node. If True, return | |||
| the node list from root node to single node. | |||
| - watch_point_id (int): The id of watchpoint. | |||
| Returns: | |||
| @@ -222,9 +253,13 @@ class DebuggerServer: | |||
| log.debug("Retrieve node %s.", filter_condition) | |||
| # validate node name | |||
| node_name = filter_condition.get('name') | |||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||
| graph_name = graph_stream.validate_graph_name(filter_condition.get('graph_name')) | |||
| if node_name: | |||
| self.cache_store.get_stream_handler(Streams.GRAPH).get_node_type(node_name) | |||
| # validate node name | |||
| graph_stream.get_node_type(node_name, graph_name) | |||
| filter_condition['single_node'] = bool(filter_condition.get('single_node')) | |||
| filter_condition['graph_name'] = graph_name | |||
| reply = self._get_nodes_info(filter_condition) | |||
| return reply | |||
| @@ -236,10 +271,9 @@ class DebuggerServer: | |||
| filter_condition (dict): The filter condition. | |||
| - name (str): The node name. | |||
| - graph_name (str): The relative graph_name of the node. | |||
| - single_node (bool): If False, return the sub-layer of single node. If True, return | |||
| the node list from root node to single node. | |||
| - watch_point_id (int): The id of watchpoint. | |||
| Returns: | |||
| @@ -254,15 +288,16 @@ class DebuggerServer: | |||
| reply = graph_stream.get(filter_condition) | |||
| graph = reply.get('graph') | |||
| # add watched label to graph | |||
| watchpoint_stream.set_watch_nodes(graph, graph_stream, watch_point_id) | |||
| watchpoint_stream.set_watch_nodes(graph, graph_stream, watch_point_id, filter_condition.get('graph_name')) | |||
| return reply | |||
| def retrieve_tensor_history(self, node_name): | |||
| def retrieve_tensor_history(self, node_name, graph_name=None): | |||
| """ | |||
| Retrieve tensor history for leaf node. | |||
| Args: | |||
| node_name (str): The name of leaf node. | |||
| graph_name (str): The graph name. Default: None. | |||
| Returns: | |||
| dict, the tensor history and metadata. | |||
| @@ -271,39 +306,31 @@ class DebuggerServer: | |||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | |||
| if metadata_stream.state == ServerStatus.PENDING.value: | |||
| log.info("The backend is in pending status.") | |||
| return metadata_stream.get() | |||
| self._validate_leaf_name(node_name) | |||
| res = self._get_tensor_history(node_name) | |||
| return metadata_stream.get(['state', 'step']) | |||
| res = self._get_tensor_history(node_name, graph_name) | |||
| return res | |||
| def _validate_leaf_name(self, node_name): | |||
| """Validate if the node is a leaf node.""" | |||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||
| node_type = graph_stream.get_node_type(node_name) | |||
| if is_scope_type(node_type): | |||
| log.error("Scope type node has no tensor history.") | |||
| raise DebuggerParamValueError("Invalid leaf node name.") | |||
| def _get_tensor_history(self, node_name): | |||
| def _get_tensor_history(self, node_name, graph_name=None): | |||
| """ | |||
| Get tensor history for single node. | |||
| Args: | |||
| node_name (str): The name of leaf node. | |||
| graph_name (str): The graph name. Default: None. | |||
| Returns: | |||
| dict, the tensor history and metadata. | |||
| """ | |||
| # get basic tensor history | |||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||
| tensor_history = graph_stream.get_tensor_history(node_name) | |||
| tensor_history = graph_stream.get_tensor_history(node_name, graph_name) | |||
| # add tensor value for tensor history | |||
| self._add_tensor_value_for_tensor_history(tensor_history, node_name) | |||
| # add hit label for tensor history | |||
| watchpoint_hit_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT_HIT) | |||
| watchpoint_hit_stream.update_tensor_history(tensor_history) | |||
| # add metadata | |||
| metadata = self.cache_store.get_stream_handler(Streams.METADATA).get() | |||
| metadata = self.cache_store.get_stream_handler(Streams.METADATA).get(['state', 'step']) | |||
| tensor_history.update(metadata) | |||
| return tensor_history | |||
| @@ -325,28 +352,30 @@ class DebuggerServer: | |||
| self.cache_store.put_command({'view_cmd': view_cmd, 'node_name': node_name}) | |||
| log.debug("Send view cmd.") | |||
| def retrieve_tensor_value(self, name, detail, shape): | |||
| def retrieve_tensor_value(self, name, detail, shape, graph_name=None, prev=False): | |||
| """Retrieve the tensor value.""" | |||
| log.info("Retrieve tensor value: name: %s, detail: %s, shape: %s", name, detail, shape) | |||
| self.validate_tensor_param(name, detail) | |||
| # Limit to query max two dimensions for tensor in table view. | |||
| parsed_shape = TensorUtils.parse_shape(shape, limit=MAX_DIMENSIONS_FOR_TENSOR) | |||
| node_type, tensor_name = self._get_tensor_name_and_type_by_ui_name(name) | |||
| node_type, tensor_name = self._get_tensor_name_and_type_by_ui_name(name, graph_name) | |||
| reply = self.cache_store.get_stream_handler(Streams.TENSOR).get( | |||
| {'name': tensor_name, | |||
| 'node_type': node_type, | |||
| 'shape': parsed_shape} | |||
| 'shape': parsed_shape, | |||
| 'prev': prev} | |||
| ) | |||
| reply['tensor_value']['name'] = name | |||
| return reply | |||
| def _get_tensor_name_and_type_by_ui_name(self, name): | |||
| def _get_tensor_name_and_type_by_ui_name(self, name, graph_name=None): | |||
| """ | |||
| Get inner tensor name and type by UI name. | |||
| Args: | |||
| name (str): Node name shown in UI. | |||
| graph_name (Union[str, None]): The graph name, default is: None. | |||
| Returns: | |||
| str, full name of tensor. | |||
| @@ -354,8 +383,9 @@ class DebuggerServer: | |||
| """ | |||
| node_name, slot = name.rsplit(':', 1) | |||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||
| node_type = graph_stream.get_node_type(node_name) | |||
| full_name = graph_stream.get_full_name(node_name) | |||
| graph_name = graph_name if graph_name else graph_stream.get_graph_id_by_name(node_name) | |||
| node_type = graph_stream.get_node_type(node_name, graph_name) | |||
| full_name = graph_stream.get_full_name(node_name, graph_name) | |||
| tensor_name = full_name + ':' + slot | |||
| return node_type, tensor_name | |||
| @@ -379,9 +409,7 @@ class DebuggerServer: | |||
| filter_condition (dict): Filter condition. | |||
| - watch_point_id (int): The id of watchpoint. If not given, return all watchpoints. | |||
| - name (str): The name of single node. | |||
| - single_node (bool): If False, return the sub-layer of single node. If True, return | |||
| the node list from root node to single node. | |||
| @@ -390,7 +418,7 @@ class DebuggerServer: | |||
| """ | |||
| watchpoint_id = filter_condition.get('watch_point_id', 0) | |||
| if not watchpoint_id: | |||
| reply = self.cache_store.get_stream_handler(Streams.WATCHPOINT).get() | |||
| reply = self._hide_parameters_for_ui() | |||
| log.debug("Get condition of watchpoints.") | |||
| else: | |||
| reply = self._retrieve_node(filter_condition) | |||
| @@ -406,7 +434,6 @@ class DebuggerServer: | |||
| filter_condition (dict): Filter condition. | |||
| - name (str): The name of single node. | |||
| - single_node (bool): If False, return the sub-layer of single node. If True, return | |||
| the node list from root node to single node. | |||
| @@ -418,34 +445,48 @@ class DebuggerServer: | |||
| if node_name is None: | |||
| reply = self.cache_store.get_stream_handler(Streams.WATCHPOINT_HIT).get() | |||
| return reply | |||
| # get tensor history and graph of the hit node. | |||
| self._validate_leaf_name(node_name) | |||
| # get tensor history | |||
| reply = self._get_tensor_history(node_name) | |||
| log.debug("Get tensor history for watchpoint hit node.") | |||
| # get single graph | |||
| if filter_condition.get('single_node'): | |||
| graph_name = self.cache_store.get_stream_handler(Streams.GRAPH).validate_graph_name( | |||
| filter_condition.get('graph_name')) | |||
| filter_condition['graph_name'] = graph_name | |||
| graph = self._get_nodes_info(filter_condition) | |||
| reply.update(graph) | |||
| log.debug("Get tensor history for watchpoint hit node.") | |||
| return reply | |||
| def create_watchpoint(self, watch_condition, watch_nodes=None, watch_point_id=None): | |||
| def create_watchpoint(self, watch_condition, watch_nodes=None, watch_point_id=None, search_pattern=None, | |||
| graph_name=None): | |||
| """ | |||
| Create watchpoint. | |||
| Args: | |||
| watch_condition (dict): The watch condition. | |||
| - condition (str): Accept `INF` or `NAN`. | |||
| - param (list[float]): Not defined yet. | |||
| watch_condition (dict): The watch condition. The format is like: | |||
| { | |||
| "id": "tensor_too_large", | |||
| "params": [ | |||
| { | |||
| "name": "abs_mean_gt", | |||
| "disable": false, | |||
| "value": 1.1 | |||
| } | |||
| ] | |||
| } | |||
| - id (str): Id of condition. | |||
| - params (list[dict]): The list of param for this condition. | |||
| watch_nodes (list[str]): The list of node names. | |||
| watch_point_id (int): The id of watchpoint. | |||
| search_pattern (dict): The search pattern. Default: None. | |||
| graph_name (str): The relative graph_name of the watched node. Default: None. | |||
| Returns: | |||
| dict, the id of new watchpoint. | |||
| dict, the id of new watchpoint and metadata info. | |||
| """ | |||
| log.info("Received create watchpoint request. WatchCondition: %s", watch_condition) | |||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | |||
| @@ -453,17 +494,28 @@ class DebuggerServer: | |||
| log.error("Failed to create watchpoint as the MindSpore is not in waiting state.") | |||
| raise DebuggerCreateWatchPointError( | |||
| "Failed to create watchpoint as the MindSpore is not in waiting state.") | |||
| if metadata_stream.backend == 'GPU' and watch_condition.get('condition') == 'OVERFLOW': | |||
| log.error("GPU doesn't support OVERFLOW watch condition.") | |||
| raise DebuggerParamValueError("GPU doesn't support OVERFLOW watch condition.") | |||
| if metadata_stream.backend == 'GPU' and watch_condition.get('id') in ( | |||
| ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value, ConditionIdEnum.OPERATOR_OVERFLOW.value): | |||
| log.error("GPU doesn't support overflow watch condition.") | |||
| raise DebuggerParamValueError("GPU doesn't support overflow watch condition.") | |||
| if metadata_stream.backend == 'Ascend' and watch_condition.get('id') == ConditionIdEnum.NAN.value: | |||
| log.error("Ascend doesn't support nan watch condition.") | |||
| raise DebuggerParamValueError("Ascend doesn't support nan watch condition.") | |||
| watch_nodes = self._get_node_basic_infos(watch_nodes) | |||
| watch_point_id = self.cache_store.get_stream_handler(Streams.WATCHPOINT).create_watchpoint( | |||
| watch_condition, watch_nodes, watch_point_id) | |||
| watch_nodes = self._get_watch_node_with_basic_info( | |||
| node_names=watch_nodes, search_pattern=search_pattern, graph_name=graph_name) | |||
| watchpoint_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT) | |||
| watch_point_id = watchpoint_stream.create_watchpoint( | |||
| self.condition_mgr, watch_condition, watch_nodes, watch_point_id) | |||
| log.info("Create watchpoint %d", watch_point_id) | |||
| return {'id': watch_point_id} | |||
| def update_watchpoint(self, watch_point_id, watch_nodes, mode, name=None): | |||
| metadata_stream.enable_recheck = watchpoint_stream.is_recheckable(metadata_stream.backend) | |||
| res = metadata_stream.get(['state', 'enable_recheck']) | |||
| res['id'] = watch_point_id | |||
| return res | |||
| def update_watchpoint(self, watch_point_id, watch_nodes, mode, search_pattern=None, graph_name=None): | |||
| """ | |||
| Update watchpoint. | |||
| @@ -472,13 +524,14 @@ class DebuggerServer: | |||
| watch_nodes (list[str]): The list of node names. | |||
| mode (int): The update operator on nodes. 0 for remove nodes from watch nodes. | |||
| 1 for add nodes to watch nodes. | |||
| name (str): The search name. Default: None. | |||
| search_pattern (dict): The search pattern. Default: None. | |||
| graph_name (str): The relative graph_name of the watched node. Default: None. | |||
| Returns: | |||
| dict, empty response. | |||
| dict, the metadata info. | |||
| """ | |||
| if self.cache_store.get_stream_handler( | |||
| Streams.METADATA).state != ServerStatus.WAITING.value: | |||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | |||
| if metadata_stream.state != ServerStatus.WAITING.value: | |||
| log.error("Failed to update watchpoint as the MindSpore is not in waiting state.") | |||
| raise DebuggerUpdateWatchPointError( | |||
| "Failed to update watchpoint as the MindSpore is not in waiting state." | |||
| @@ -489,22 +542,40 @@ class DebuggerServer: | |||
| if not watch_nodes or not watch_point_id: | |||
| log.error("Invalid parameter for update watchpoint.") | |||
| raise DebuggerParamValueError("Invalid parameter for update watchpoint.") | |||
| # update watch node | |||
| if name is not None: | |||
| watch_nodes = self._get_watch_nodes_by_search(watch_nodes) | |||
| elif mode == 1: | |||
| watch_nodes = self._get_node_basic_infos(watch_nodes) | |||
| # get node basic info for watch nodes | |||
| watch_nodes = self._get_watch_node_with_basic_info(watch_nodes, search_pattern, graph_name) | |||
| watchpoint_stream.update_watchpoint(watch_point_id, watch_nodes, mode) | |||
| metadata_stream.enable_recheck = watchpoint_stream.is_recheckable(metadata_stream.backend) | |||
| log.info("Update watchpoint with id: %d", watch_point_id) | |||
| return {} | |||
| return metadata_stream.get(['state', 'enable_recheck']) | |||
| def _get_watch_nodes_by_search(self, watch_nodes): | |||
| def _get_watch_node_with_basic_info(self, node_names, search_pattern=None, graph_name=None): | |||
| """ | |||
| Get watch node with basic info. | |||
| Args: | |||
| node_names (list[str]): A list of node names. | |||
| search_pattern (dict): Get watch node with search pattern. Default: None | |||
| graph_name (str): The relative graph_name of the watched node. Default: None. | |||
| Returns: | |||
| list[NodeBasicInfo], a list of node basic infos. | |||
| """ | |||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||
| graph_name = graph_stream.validate_graph_name(graph_name) | |||
| if search_pattern is not None: | |||
| watch_nodes = self._get_watch_nodes_by_search(node_names, search_pattern, graph_name) | |||
| else: | |||
| watch_nodes = self._get_node_basic_infos(node_names, graph_name=graph_name) | |||
| return watch_nodes | |||
| def _get_watch_nodes_by_search(self, watch_nodes, search_pattern, graph_name): | |||
| """Get watched leaf nodes by search name.""" | |||
| watched_leaf_nodes = [] | |||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||
| new_pattern = {'graph_name': graph_name}.update(search_pattern) | |||
| for search_name in watch_nodes: | |||
| search_nodes = graph_stream.get_searched_node_list() | |||
| search_nodes = graph_stream.get_searched_node_list(new_pattern) | |||
| search_node_names = [ | |||
| NodeBasicInfo(name=node.name, full_name=node.full_name, type=node.type) | |||
| for node in search_nodes | |||
| @@ -515,41 +586,48 @@ class DebuggerServer: | |||
| return watched_leaf_nodes | |||
| def delete_watchpoint(self, watch_point_id): | |||
| def delete_watchpoint(self, watch_point_id=None): | |||
| """ | |||
| Delete watchpoint. | |||
| Args: | |||
| watch_point_id (int): The id of watchpoint. | |||
| watch_point_id (Union[None, int]): The id of watchpoint. | |||
| If None, delete all watchpoints. Default: None. | |||
| Returns: | |||
| dict, empty response. | |||
| dict, the metadata info. | |||
| """ | |||
| if self.cache_store.get_stream_handler( | |||
| Streams.METADATA).state != ServerStatus.WAITING.value: | |||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | |||
| if metadata_stream.state != ServerStatus.WAITING.value: | |||
| log.error("Failed to delete watchpoint as the MindSpore is not in waiting state.") | |||
| raise DebuggerDeleteWatchPointError( | |||
| "Failed to delete watchpoint as the MindSpore is not in waiting state." | |||
| ) | |||
| self.cache_store.get_stream_handler(Streams.WATCHPOINT).delete_watchpoint(watch_point_id) | |||
| log.info("Delete watchpoint with id: %d", watch_point_id) | |||
| return {} | |||
| watchpoint_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT) | |||
| watchpoint_stream.delete_watchpoint(watch_point_id) | |||
| metadata_stream.enable_recheck = watchpoint_stream.is_recheckable() | |||
| log.info("Delete watchpoint with id: %s", watch_point_id) | |||
| return metadata_stream.get(['state', 'enable_recheck']) | |||
| def _get_node_basic_infos(self, node_names, graph_name=None): | |||
| """ | |||
| Get node info according to node names. | |||
| def _get_node_basic_infos(self, node_names): | |||
| """Get node info according to node names.""" | |||
| Args: | |||
| node_names (list[str]): A list of node names. | |||
| graph_name (str): The relative graph_name of the watched node. Default: None. | |||
| Returns: | |||
| list[NodeBasicInfo], a list of basic node infos. | |||
| """ | |||
| if not node_names: | |||
| return [] | |||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||
| node_infos = [] | |||
| for node_name in node_names: | |||
| node_type = graph_stream.get_node_type(node_name) | |||
| if node_type == NodeTypeEnum.AGGREGATION_SCOPE.value: | |||
| sub_nodes = graph_stream.get_nodes_by_scope(node_name) | |||
| sub_infos = [NodeBasicInfo(name=node.name, full_name=node.full_name, type=node.type) | |||
| for node in sub_nodes] | |||
| node_infos.extend(sub_infos) | |||
| full_name = graph_stream.get_full_name(node_name) | |||
| node_infos.append(NodeBasicInfo(name=node_name, full_name=full_name, type=node_type)) | |||
| node_info = graph_stream.get_node_basic_info(node_name, graph_name) | |||
| node_infos.append(node_info) | |||
| return node_infos | |||
| def control(self, params=None): | |||
| @@ -561,14 +639,12 @@ class DebuggerServer: | |||
| - mode (str): Acceptable control command, including `continue`, | |||
| `pause` and `terminate`. | |||
| - level (str): The control granularity, `node` level or `step` level. | |||
| Default: `step`. | |||
| - steps (int): Specify the steps that training should run. | |||
| Used when `level` is `step`. | |||
| - name (str): Specify the name of the node. Used when `level` is `node`. | |||
| - graph_name (str): The graph name. | |||
| Returns: | |||
| dict, the response. | |||
| @@ -597,6 +673,9 @@ class DebuggerServer: | |||
| Args: | |||
| metadata_stream (MetadataHandler): The metadata_handler | |||
| params (dict): The control params. | |||
| Returns: | |||
| dict, metadata info. | |||
| """ | |||
| if metadata_stream.state != ServerStatus.WAITING.value: | |||
| log.error("MindSpore is not ready to run. Current state is: %s", metadata_stream.state) | |||
| @@ -604,7 +683,6 @@ class DebuggerServer: | |||
| "MindSpore is not ready to run or is running currently." | |||
| ) | |||
| metadata_stream.state = ServerStatus.RUNNING.value | |||
| current_state = ServerStatus.RUNNING.value | |||
| try: | |||
| event = self._construct_run_event(params) | |||
| self._send_watchpoints() | |||
| @@ -612,13 +690,12 @@ class DebuggerServer: | |||
| except MindInsightException as err: | |||
| log.error("Failed to send run event.") | |||
| log.exception(err) | |||
| current_state = ServerStatus.WAITING.value | |||
| metadata_stream.state = current_state | |||
| metadata_stream.state = ServerStatus.WAITING.value | |||
| raise DebuggerContinueError("Failed to send run command.") | |||
| else: | |||
| metadata_stream.enable_recheck = False | |||
| log.debug("Send the RunCMD to command queue.") | |||
| return {'metadata': {'state': current_state}} | |||
| return metadata_stream.get(['state', 'enable_recheck']) | |||
| def _construct_run_event(self, params): | |||
| """ | |||
| @@ -627,18 +704,22 @@ class DebuggerServer: | |||
| Args: | |||
| params (dict): The control params. | |||
| - level (str): The control granularity, `node` level or `step` level. | |||
| - level (str): The control granularity, `node`, `step` or `recheck` level. | |||
| Default: `step`. | |||
| - steps (int): Specify the steps that training should run. | |||
| Used when `level` is `step`. | |||
| - name (str): Specify the name of the node. Used when `level` is `node`. | |||
| - graph_name (str): The graph name. | |||
| Returns: | |||
| EventReply, control event with run command. | |||
| """ | |||
| level = params.get('level', 'step') | |||
| # validate level | |||
| if level not in [RunLevel.NODE.value, RunLevel.STEP.value, RunLevel.RECHECK.value]: | |||
| log.error("Invalid Value. `level` should be `step`, `node` or `recheck`. Got %s", level) | |||
| raise DebuggerParamValueError("level` should be `step`, `node` or `recheck`.") | |||
| # construct run command events | |||
| event = get_ack_reply() | |||
| if level == 'step': | |||
| steps = params.get('steps') | |||
| @@ -646,31 +727,37 @@ class DebuggerServer: | |||
| steps = 1 | |||
| run_cmd = RunCMD(run_level='step', run_steps=steps) | |||
| elif level == 'node': | |||
| name = params.get('name') | |||
| name = params.get('name', '') | |||
| graph_name = params.get('graph_name') | |||
| if name: | |||
| self._validate_leaf_name(name) | |||
| name = self.cache_store.get_stream_handler(Streams.GRAPH).get_full_name(name) | |||
| else: | |||
| name = '' | |||
| self._validate_leaf_name(name, graph_name) | |||
| name = self.cache_store.get_stream_handler(Streams.GRAPH).get_full_name(name, graph_name) | |||
| run_cmd = RunCMD(run_level='node', node_name=name) | |||
| else: | |||
| log.error("Invalid Value. `level` should be `step` or `node`. Got %s", level) | |||
| raise DebuggerParamValueError("level` should be `step` or `node`") | |||
| run_cmd = RunCMD(run_level='recheck') | |||
| event.run_cmd.CopyFrom(run_cmd) | |||
| log.debug("Construct run event. %s", event) | |||
| return event | |||
| def _validate_leaf_name(self, node_name, graph_name): | |||
| """Validate if the node is a leaf node.""" | |||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||
| node_type = graph_stream.get_node_type(node_name, graph_name) | |||
| if is_scope_type(node_type): | |||
| log.error("Scope type node has no tensor history.") | |||
| raise DebuggerParamValueError("Invalid leaf node name.") | |||
| def _send_watchpoints(self): | |||
| """Set watchpoints.""" | |||
| watchpoint_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT) | |||
| watchpoints = watchpoint_stream.get(filter_condition=True).get('watch_points') | |||
| if watchpoints: | |||
| for watchpoint in watchpoints: | |||
| set_commands = watchpoint_stream.get_pending_commands(self.cache_store.get_stream_handler(Streams.GRAPH)) | |||
| if set_commands: | |||
| for set_cmd in set_commands: | |||
| event = get_ack_reply() | |||
| event.set_cmd.CopyFrom(watchpoint) | |||
| event.set_cmd.CopyFrom(set_cmd) | |||
| self.cache_store.put_command(event) | |||
| watchpoint_stream.sync_set_cmd() | |||
| watchpoint_stream.sync_set_cmd(set_commands) | |||
| log.debug("Send SetCMD to MindSpore. %s", event) | |||
| def _pause(self, metadata_stream): | |||
| @@ -679,6 +766,9 @@ class DebuggerServer: | |||
| Args: | |||
| metadata_stream (MetadataHandler): The metadata stream handler. | |||
| Returns: | |||
| dict, metadata info. | |||
| """ | |||
| if metadata_stream.state != ServerStatus.RUNNING.value: | |||
| log.error("The MindSpore is not running.") | |||
| @@ -687,8 +777,9 @@ class DebuggerServer: | |||
| event = get_ack_reply() | |||
| event.run_cmd.CopyFrom(RunCMD(run_level='step', run_steps=0)) | |||
| self.cache_store.put_command(event) | |||
| metadata_stream.enable_recheck = False | |||
| log.debug("Send the Pause command") | |||
| return {'metadata': {'state': 'waiting'}} | |||
| return metadata_stream.get(['state', 'enable_recheck']) | |||
| def _terminate(self, metadata_stream): | |||
| """ | |||
| @@ -696,21 +787,27 @@ class DebuggerServer: | |||
| Args: | |||
| metadata_stream (MetadataHandler): The metadata stream handler. | |||
| Returns: | |||
| dict, metadata info. | |||
| """ | |||
| metadata_stream.state = 'pending' | |||
| self.cache_store.clean_data() | |||
| self.cache_store.clean_command() | |||
| event = get_ack_reply() | |||
| event.exit = True | |||
| self.cache_store.put_command(event) | |||
| metadata_stream.enable_recheck = False | |||
| log.debug("Send the ExitCMD.") | |||
| return {'metadata': {'state': 'pending'}} | |||
| return metadata_stream.get(['state', 'enable_recheck']) | |||
| def retrieve_node_by_bfs(self, node_name, ascend=False): | |||
| def retrieve_node_by_bfs(self, node_name, graph_name=None, ascend=False): | |||
| """ | |||
| Get the graph of the next node according to node_name. | |||
| Args: | |||
| node_name (str): The name of current chosen leaf node. | |||
| graph_name (str): The graph name. | |||
| ascend (bool): If True, traverse the input nodes; | |||
| If False, traverse the output nodes. Default is True. | |||
| @@ -721,6 +818,7 @@ class DebuggerServer: | |||
| node_name, ascend) | |||
| reply = {} | |||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||
| graph_name = graph_stream.validate_graph_name(graph_name) | |||
| next_node_name = graph_stream.get_node_by_bfs_order(node_name, ascend) | |||
| # no next node | |||
| if next_node_name is None: | |||
| @@ -728,6 +826,7 @@ class DebuggerServer: | |||
| # add graph and tensor history for next node | |||
| filter_condition = { | |||
| 'name': next_node_name, | |||
| 'graph_name': graph_name, | |||
| 'single_node': True | |||
| } | |||
| search_graph = self._get_nodes_info(filter_condition) | |||
| @@ -735,3 +834,85 @@ class DebuggerServer: | |||
| reply.update(search_graph) | |||
| return reply | |||
| def recheck(self): | |||
| """ | |||
| Recheck all watchpoints. | |||
| Returns: | |||
| dict, metadata info. | |||
| """ | |||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | |||
| # validate backend status is able to recheck watchpoint | |||
| if not metadata_stream.enable_recheck: | |||
| log.error("Recheck is not available.") | |||
| raise DebuggerRecheckError("Recheck is not available.") | |||
| metadata_stream.state = ServerStatus.RUNNING.value | |||
| metadata_stream.enable_recheck = False | |||
| # send updated watchpoint and recheck command | |||
| try: | |||
| event = self._construct_run_event({'level': 'recheck'}) | |||
| self._send_watchpoints() | |||
| self.cache_store.put_command(event) | |||
| except MindInsightException as err: | |||
| log.error("Failed to send recheck event.") | |||
| log.exception(err) | |||
| metadata_stream.state = ServerStatus.WAITING.value | |||
| metadata_stream.enable_recheck = True | |||
| raise DebuggerContinueError("Failed to send run command.") | |||
| else: | |||
| log.debug("Send the recheck to command queue.") | |||
| return metadata_stream.get(['state', 'enable_recheck']) | |||
| def retrieve_tensor_graph(self, tensor_name, graph_name): | |||
| """ | |||
| Retrieve tensor graph. | |||
| Args: | |||
| tensor_name (str): The tensor name from UI. | |||
| graph_name (str): The graph name. | |||
| Returns: | |||
| dict, tensor graph object. | |||
| """ | |||
| log.info("Retrieve tensor graph for %s from %s", tensor_name, graph_name) | |||
| tensor_graph_ops = TensorDetailInfo(self.cache_store).get_tensor_graph(tensor_name, graph_name) | |||
| return tensor_graph_ops | |||
| def retrieve_tensor_hits(self, tensor_name, graph_name): | |||
| """ | |||
| Retrieve tensor hit information. | |||
| Args: | |||
| tensor_name (str): The tensor name from UI. | |||
| graph_name (str): The graph name. | |||
| Returns: | |||
| dict, tensor hit info. | |||
| """ | |||
| log.info("Retrieve tensor hits for %s from %s", tensor_name, graph_name) | |||
| watch_points = TensorDetailInfo(self.cache_store).get_tensor_watch_points(tensor_name, graph_name) | |||
| return {'watch_points': watch_points} | |||
| def _hide_parameters_for_ui(self): | |||
| """ | |||
| Hide some parameters on ui. | |||
| Returns: | |||
| dict, watch point list. | |||
| """ | |||
| reply = self.cache_store.get_stream_handler(Streams.WATCHPOINT).get() | |||
| watch_points = reply.get('watch_points') | |||
| for i, watch_point in enumerate(watch_points): | |||
| watch_condition = watch_point.get('watch_condition') | |||
| parameters = watch_condition.get('params') | |||
| watch_condition_id = watch_condition.get('id') | |||
| mgr_condition = self.condition_mgr.get_condition(watch_condition_id) | |||
| ui_watch_condition = [] | |||
| for param in parameters: | |||
| parameter_definition = mgr_condition.get_parameter_definition(param['name']) | |||
| if not parameter_definition.visible_on_ui: | |||
| continue | |||
| ui_watch_condition.append(param) | |||
| reply['watch_points'][i]['watch_condition']['params'] = ui_watch_condition | |||
| return reply | |||
| @@ -27,6 +27,7 @@ service EventListener { | |||
| rpc SendGraph (stream Chunk) returns (EventReply) {}; | |||
| rpc SendTensors (stream TensorProto) returns (EventReply) {}; | |||
| rpc SendWatchpointHits (stream WatchpointHit) returns (EventReply) {}; | |||
| rpc SendMultiGraphs (stream Chunk) returns (EventReply) {}; | |||
| } | |||
| message Metadata { | |||
| @@ -38,11 +39,15 @@ message Metadata { | |||
| string cur_node = 4; | |||
| // check if training is done. | |||
| bool training_done = 5; | |||
| // the number of total graphs | |||
| int32 graph_num = 6; | |||
| } | |||
| message Chunk { | |||
| bytes buffer = 1; | |||
| bool finished = 2; | |||
| } | |||
| message EventReply { | |||
| enum Status { | |||
| OK = 0; | |||
| @@ -61,13 +66,11 @@ message EventReply { | |||
| } | |||
| message RunCMD { | |||
| // running level. 'step' or 'node' | |||
| // step level or node level. "step", "node" or "recheck". | |||
| string run_level = 1; | |||
| oneof cmd { | |||
| int32 run_steps = 2; | |||
| // the full name of next node | |||
| // the next node full name | |||
| string node_name = 3; | |||
| } | |||
| } | |||
| @@ -96,9 +99,27 @@ message WatchCondition { | |||
| max_min_lt = 8; | |||
| mean_gt = 9; | |||
| mean_lt = 10; | |||
| sd_gt = 11; | |||
| sd_lt = 12; | |||
| tensor_general_overflow = 13; | |||
| tensor_initialization = 14; | |||
| tensor_too_large = 15; | |||
| tensor_too_small = 16; | |||
| tensor_all_zero = 17; | |||
| tensor_change_too_large = 18; | |||
| tensor_change_too_small = 19; | |||
| tensor_not_changed = 20; | |||
| } | |||
| Condition condition = 1; | |||
| float value = 2; // for between condition, there will be two values | |||
| float value = 2; | |||
| message Parameter { | |||
| string name = 1; | |||
| bool disabled = 2; | |||
| double value = 3; | |||
| bool hit = 4; // Whether this parameter is hit when checking tensor. | |||
| } | |||
| // The ID 3 has been used on the mindspore side repeated bool include=3, so skip 3 for backward compatibility. | |||
| repeated Parameter params = 4; | |||
| } | |||
| message WatchNode { | |||
| @@ -19,7 +19,7 @@ DESCRIPTOR = _descriptor.FileDescriptor( | |||
| package='debugger', | |||
| syntax='proto3', | |||
| serialized_options=None, | |||
| serialized_pb=b'\n+mindinsight/debugger/proto/debug_grpc.proto\x12\x08\x64\x65\x62ugger\x1a)mindinsight/debugger/proto/ms_graph.proto\"k\n\x08Metadata\x12\x13\n\x0b\x64\x65vice_name\x18\x01 \x01(\t\x12\x10\n\x08\x63ur_step\x18\x02 \x01(\x05\x12\x0f\n\x07\x62\x61\x63kend\x18\x03 \x01(\t\x12\x10\n\x08\x63ur_node\x18\x04 \x01(\t\x12\x15\n\rtraining_done\x18\x05 \x01(\x08\"\x17\n\x05\x43hunk\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\"\xec\x01\n\nEventReply\x12+\n\x06status\x18\x01 \x01(\x0e\x32\x1b.debugger.EventReply.Status\x12\x0e\n\x04\x65xit\x18\x02 \x01(\x08H\x00\x12#\n\x07run_cmd\x18\x03 \x01(\x0b\x32\x10.debugger.RunCMDH\x00\x12#\n\x07set_cmd\x18\x04 \x01(\x0b\x32\x10.debugger.SetCMDH\x00\x12%\n\x08view_cmd\x18\x05 \x01(\x0b\x32\x11.debugger.ViewCMDH\x00\")\n\x06Status\x12\x06\n\x02OK\x10\x00\x12\n\n\x06\x46\x41ILED\x10\x01\x12\x0b\n\x07PENDING\x10\x02\x42\x05\n\x03\x63md\"L\n\x06RunCMD\x12\x11\n\trun_level\x18\x01 \x01(\t\x12\x13\n\trun_steps\x18\x02 \x01(\x05H\x00\x12\x13\n\tnode_name\x18\x03 \x01(\tH\x00\x42\x05\n\x03\x63md\"\x81\x01\n\x06SetCMD\x12(\n\x0bwatch_nodes\x18\x01 \x03(\x0b\x32\x13.debugger.WatchNode\x12\x31\n\x0fwatch_condition\x18\x02 \x01(\x0b\x32\x18.debugger.WatchCondition\x12\x0e\n\x06\x64\x65lete\x18\x03 \x01(\x08\x12\n\n\x02id\x18\x04 \x01(\x05\"1\n\x07ViewCMD\x12&\n\x07tensors\x18\x01 \x03(\x0b\x32\x15.debugger.TensorProto\"\xee\x01\n\x0eWatchCondition\x12\x35\n\tcondition\x18\x01 \x01(\x0e\x32\".debugger.WatchCondition.Condition\x12\r\n\x05value\x18\x02 \x01(\x02\"\x95\x01\n\tCondition\x12\x07\n\x03nan\x10\x00\x12\x07\n\x03inf\x10\x01\x12\x0c\n\x08overflow\x10\x02\x12\n\n\x06max_gt\x10\x03\x12\n\n\x06max_lt\x10\x04\x12\n\n\x06min_gt\x10\x05\x12\n\n\x06min_lt\x10\x06\x12\x0e\n\nmax_min_gt\x10\x07\x12\x0e\n\nmax_min_lt\x10\x08\x12\x0b\n\x07mean_gt\x10\t\x12\x0b\n\x07mean_lt\x10\n\"1\n\tWatchNode\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x11\n\tnode_type\x18\x02 \x01(\t\"u\n\rWatchpointHit\x12%\n\x06tensor\x18\x01 \x01(\x0b\x32\x15.debugger.TensorProto\x12\x31\n\x0fwatch_condition\x18\x02 \x01(\x0b\x32\x18.debugger.WatchCondition\x12\n\n\x02id\x18\x03 \x01(\x05\x32\xc3\x02\n\rEventListener\x12\x35\n\x07WaitCMD\x12\x12.debugger.Metadata\x1a\x14.debugger.EventReply\"\x00\x12:\n\x0cSendMetadata\x12\x12.debugger.Metadata\x1a\x14.debugger.EventReply\"\x00\x12\x36\n\tSendGraph\x12\x0f.debugger.Chunk\x1a\x14.debugger.EventReply\"\x00(\x01\x12>\n\x0bSendTensors\x12\x15.debugger.TensorProto\x1a\x14.debugger.EventReply\"\x00(\x01\x12G\n\x12SendWatchpointHits\x12\x17.debugger.WatchpointHit\x1a\x14.debugger.EventReply\"\x00(\x01\x62\x06proto3' | |||
| serialized_pb=b'\n+mindinsight/debugger/proto/debug_grpc.proto\x12\x08\x64\x65\x62ugger\x1a)mindinsight/debugger/proto/ms_graph.proto\"~\n\x08Metadata\x12\x13\n\x0b\x64\x65vice_name\x18\x01 \x01(\t\x12\x10\n\x08\x63ur_step\x18\x02 \x01(\x05\x12\x0f\n\x07\x62\x61\x63kend\x18\x03 \x01(\t\x12\x10\n\x08\x63ur_node\x18\x04 \x01(\t\x12\x15\n\rtraining_done\x18\x05 \x01(\x08\x12\x11\n\tgraph_num\x18\x06 \x01(\x05\")\n\x05\x43hunk\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\x12\x10\n\x08\x66inished\x18\x02 \x01(\x08\"\xec\x01\n\nEventReply\x12+\n\x06status\x18\x01 \x01(\x0e\x32\x1b.debugger.EventReply.Status\x12\x0e\n\x04\x65xit\x18\x02 \x01(\x08H\x00\x12#\n\x07run_cmd\x18\x03 \x01(\x0b\x32\x10.debugger.RunCMDH\x00\x12#\n\x07set_cmd\x18\x04 \x01(\x0b\x32\x10.debugger.SetCMDH\x00\x12%\n\x08view_cmd\x18\x05 \x01(\x0b\x32\x11.debugger.ViewCMDH\x00\")\n\x06Status\x12\x06\n\x02OK\x10\x00\x12\n\n\x06\x46\x41ILED\x10\x01\x12\x0b\n\x07PENDING\x10\x02\x42\x05\n\x03\x63md\"L\n\x06RunCMD\x12\x11\n\trun_level\x18\x01 \x01(\t\x12\x13\n\trun_steps\x18\x02 \x01(\x05H\x00\x12\x13\n\tnode_name\x18\x03 \x01(\tH\x00\x42\x05\n\x03\x63md\"\x81\x01\n\x06SetCMD\x12(\n\x0bwatch_nodes\x18\x01 \x03(\x0b\x32\x13.debugger.WatchNode\x12\x31\n\x0fwatch_condition\x18\x02 \x01(\x0b\x32\x18.debugger.WatchCondition\x12\x0e\n\x06\x64\x65lete\x18\x03 \x01(\x08\x12\n\n\x02id\x18\x04 \x01(\x05\"1\n\x07ViewCMD\x12&\n\x07tensors\x18\x01 \x03(\x0b\x32\x15.debugger.TensorProto\"\xcc\x04\n\x0eWatchCondition\x12\x35\n\tcondition\x18\x01 \x01(\x0e\x32\".debugger.WatchCondition.Condition\x12\r\n\x05value\x18\x02 \x01(\x02\x12\x32\n\x06params\x18\x04 \x03(\x0b\x32\".debugger.WatchCondition.Parameter\x1aG\n\tParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64isabled\x18\x02 \x01(\x08\x12\r\n\x05value\x18\x03 \x01(\x01\x12\x0b\n\x03hit\x18\x04 \x01(\x08\"\xf6\x02\n\tCondition\x12\x07\n\x03nan\x10\x00\x12\x07\n\x03inf\x10\x01\x12\x0c\n\x08overflow\x10\x02\x12\n\n\x06max_gt\x10\x03\x12\n\n\x06max_lt\x10\x04\x12\n\n\x06min_gt\x10\x05\x12\n\n\x06min_lt\x10\x06\x12\x0e\n\nmax_min_gt\x10\x07\x12\x0e\n\nmax_min_lt\x10\x08\x12\x0b\n\x07mean_gt\x10\t\x12\x0b\n\x07mean_lt\x10\n\x12\t\n\x05sd_gt\x10\x0b\x12\t\n\x05sd_lt\x10\x0c\x12\x1b\n\x17tensor_general_overflow\x10\r\x12\x19\n\x15tensor_initialization\x10\x0e\x12\x14\n\x10tensor_too_large\x10\x0f\x12\x14\n\x10tensor_too_small\x10\x10\x12\x13\n\x0ftensor_all_zero\x10\x11\x12\x1b\n\x17tensor_change_too_large\x10\x12\x12\x1b\n\x17tensor_change_too_small\x10\x13\x12\x16\n\x12tensor_not_changed\x10\x14\"1\n\tWatchNode\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x11\n\tnode_type\x18\x02 \x01(\t\"u\n\rWatchpointHit\x12%\n\x06tensor\x18\x01 \x01(\x0b\x32\x15.debugger.TensorProto\x12\x31\n\x0fwatch_condition\x18\x02 \x01(\x0b\x32\x18.debugger.WatchCondition\x12\n\n\x02id\x18\x03 \x01(\x05\x32\x81\x03\n\rEventListener\x12\x35\n\x07WaitCMD\x12\x12.debugger.Metadata\x1a\x14.debugger.EventReply\"\x00\x12:\n\x0cSendMetadata\x12\x12.debugger.Metadata\x1a\x14.debugger.EventReply\"\x00\x12\x36\n\tSendGraph\x12\x0f.debugger.Chunk\x1a\x14.debugger.EventReply\"\x00(\x01\x12>\n\x0bSendTensors\x12\x15.debugger.TensorProto\x1a\x14.debugger.EventReply\"\x00(\x01\x12G\n\x12SendWatchpointHits\x12\x17.debugger.WatchpointHit\x1a\x14.debugger.EventReply\"\x00(\x01\x12<\n\x0fSendMultiGraphs\x12\x0f.debugger.Chunk\x1a\x14.debugger.EventReply\"\x00(\x01\x62\x06proto3' | |||
| , | |||
| dependencies=[mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2.DESCRIPTOR,]) | |||
| @@ -46,8 +46,8 @@ _EVENTREPLY_STATUS = _descriptor.EnumDescriptor( | |||
| ], | |||
| containing_type=None, | |||
| serialized_options=None, | |||
| serialized_start=423, | |||
| serialized_end=464, | |||
| serialized_start=460, | |||
| serialized_end=501, | |||
| ) | |||
| _sym_db.RegisterEnumDescriptor(_EVENTREPLY_STATUS) | |||
| @@ -101,11 +101,51 @@ _WATCHCONDITION_CONDITION = _descriptor.EnumDescriptor( | |||
| name='mean_lt', index=10, number=10, | |||
| serialized_options=None, | |||
| type=None), | |||
| _descriptor.EnumValueDescriptor( | |||
| name='sd_gt', index=11, number=11, | |||
| serialized_options=None, | |||
| type=None), | |||
| _descriptor.EnumValueDescriptor( | |||
| name='sd_lt', index=12, number=12, | |||
| serialized_options=None, | |||
| type=None), | |||
| _descriptor.EnumValueDescriptor( | |||
| name='tensor_general_overflow', index=13, number=13, | |||
| serialized_options=None, | |||
| type=None), | |||
| _descriptor.EnumValueDescriptor( | |||
| name='tensor_initialization', index=14, number=14, | |||
| serialized_options=None, | |||
| type=None), | |||
| _descriptor.EnumValueDescriptor( | |||
| name='tensor_too_large', index=15, number=15, | |||
| serialized_options=None, | |||
| type=None), | |||
| _descriptor.EnumValueDescriptor( | |||
| name='tensor_too_small', index=16, number=16, | |||
| serialized_options=None, | |||
| type=None), | |||
| _descriptor.EnumValueDescriptor( | |||
| name='tensor_all_zero', index=17, number=17, | |||
| serialized_options=None, | |||
| type=None), | |||
| _descriptor.EnumValueDescriptor( | |||
| name='tensor_change_too_large', index=18, number=18, | |||
| serialized_options=None, | |||
| type=None), | |||
| _descriptor.EnumValueDescriptor( | |||
| name='tensor_change_too_small', index=19, number=19, | |||
| serialized_options=None, | |||
| type=None), | |||
| _descriptor.EnumValueDescriptor( | |||
| name='tensor_not_changed', index=20, number=20, | |||
| serialized_options=None, | |||
| type=None), | |||
| ], | |||
| containing_type=None, | |||
| serialized_options=None, | |||
| serialized_start=824, | |||
| serialized_end=973, | |||
| serialized_start=986, | |||
| serialized_end=1360, | |||
| ) | |||
| _sym_db.RegisterEnumDescriptor(_WATCHCONDITION_CONDITION) | |||
| @@ -152,6 +192,13 @@ _METADATA = _descriptor.Descriptor( | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='graph_num', full_name='debugger.Metadata.graph_num', index=5, | |||
| number=6, type=5, cpp_type=1, label=1, | |||
| has_default_value=False, default_value=0, | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| ], | |||
| extensions=[ | |||
| ], | |||
| @@ -165,7 +212,7 @@ _METADATA = _descriptor.Descriptor( | |||
| oneofs=[ | |||
| ], | |||
| serialized_start=100, | |||
| serialized_end=207, | |||
| serialized_end=226, | |||
| ) | |||
| @@ -183,6 +230,13 @@ _CHUNK = _descriptor.Descriptor( | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='finished', full_name='debugger.Chunk.finished', index=1, | |||
| number=2, type=8, cpp_type=7, label=1, | |||
| has_default_value=False, default_value=False, | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| ], | |||
| extensions=[ | |||
| ], | |||
| @@ -195,8 +249,8 @@ _CHUNK = _descriptor.Descriptor( | |||
| extension_ranges=[], | |||
| oneofs=[ | |||
| ], | |||
| serialized_start=209, | |||
| serialized_end=232, | |||
| serialized_start=228, | |||
| serialized_end=269, | |||
| ) | |||
| @@ -258,8 +312,8 @@ _EVENTREPLY = _descriptor.Descriptor( | |||
| name='cmd', full_name='debugger.EventReply.cmd', | |||
| index=0, containing_type=None, fields=[]), | |||
| ], | |||
| serialized_start=235, | |||
| serialized_end=471, | |||
| serialized_start=272, | |||
| serialized_end=508, | |||
| ) | |||
| @@ -306,8 +360,8 @@ _RUNCMD = _descriptor.Descriptor( | |||
| name='cmd', full_name='debugger.RunCMD.cmd', | |||
| index=0, containing_type=None, fields=[]), | |||
| ], | |||
| serialized_start=473, | |||
| serialized_end=549, | |||
| serialized_start=510, | |||
| serialized_end=586, | |||
| ) | |||
| @@ -358,8 +412,8 @@ _SETCMD = _descriptor.Descriptor( | |||
| extension_ranges=[], | |||
| oneofs=[ | |||
| ], | |||
| serialized_start=552, | |||
| serialized_end=681, | |||
| serialized_start=589, | |||
| serialized_end=718, | |||
| ) | |||
| @@ -389,11 +443,62 @@ _VIEWCMD = _descriptor.Descriptor( | |||
| extension_ranges=[], | |||
| oneofs=[ | |||
| ], | |||
| serialized_start=683, | |||
| serialized_end=732, | |||
| serialized_start=720, | |||
| serialized_end=769, | |||
| ) | |||
| _WATCHCONDITION_PARAMETER = _descriptor.Descriptor( | |||
| name='Parameter', | |||
| full_name='debugger.WatchCondition.Parameter', | |||
| filename=None, | |||
| file=DESCRIPTOR, | |||
| containing_type=None, | |||
| fields=[ | |||
| _descriptor.FieldDescriptor( | |||
| name='name', full_name='debugger.WatchCondition.Parameter.name', index=0, | |||
| number=1, type=9, cpp_type=9, label=1, | |||
| has_default_value=False, default_value=b"".decode('utf-8'), | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='disabled', full_name='debugger.WatchCondition.Parameter.disabled', index=1, | |||
| number=2, type=8, cpp_type=7, label=1, | |||
| has_default_value=False, default_value=False, | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='value', full_name='debugger.WatchCondition.Parameter.value', index=2, | |||
| number=3, type=1, cpp_type=5, label=1, | |||
| has_default_value=False, default_value=float(0), | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='hit', full_name='debugger.WatchCondition.Parameter.hit', index=3, | |||
| number=4, type=8, cpp_type=7, label=1, | |||
| has_default_value=False, default_value=False, | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| ], | |||
| extensions=[ | |||
| ], | |||
| nested_types=[], | |||
| enum_types=[ | |||
| ], | |||
| serialized_options=None, | |||
| is_extendable=False, | |||
| syntax='proto3', | |||
| extension_ranges=[], | |||
| oneofs=[ | |||
| ], | |||
| serialized_start=912, | |||
| serialized_end=983, | |||
| ) | |||
| _WATCHCONDITION = _descriptor.Descriptor( | |||
| name='WatchCondition', | |||
| full_name='debugger.WatchCondition', | |||
| @@ -415,10 +520,17 @@ _WATCHCONDITION = _descriptor.Descriptor( | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| _descriptor.FieldDescriptor( | |||
| name='params', full_name='debugger.WatchCondition.params', index=2, | |||
| number=4, type=11, cpp_type=10, label=3, | |||
| has_default_value=False, default_value=[], | |||
| message_type=None, enum_type=None, containing_type=None, | |||
| is_extension=False, extension_scope=None, | |||
| serialized_options=None, file=DESCRIPTOR), | |||
| ], | |||
| extensions=[ | |||
| ], | |||
| nested_types=[], | |||
| nested_types=[_WATCHCONDITION_PARAMETER, ], | |||
| enum_types=[ | |||
| _WATCHCONDITION_CONDITION, | |||
| ], | |||
| @@ -428,8 +540,8 @@ _WATCHCONDITION = _descriptor.Descriptor( | |||
| extension_ranges=[], | |||
| oneofs=[ | |||
| ], | |||
| serialized_start=735, | |||
| serialized_end=973, | |||
| serialized_start=772, | |||
| serialized_end=1360, | |||
| ) | |||
| @@ -466,8 +578,8 @@ _WATCHNODE = _descriptor.Descriptor( | |||
| extension_ranges=[], | |||
| oneofs=[ | |||
| ], | |||
| serialized_start=975, | |||
| serialized_end=1024, | |||
| serialized_start=1362, | |||
| serialized_end=1411, | |||
| ) | |||
| @@ -511,8 +623,8 @@ _WATCHPOINTHIT = _descriptor.Descriptor( | |||
| extension_ranges=[], | |||
| oneofs=[ | |||
| ], | |||
| serialized_start=1026, | |||
| serialized_end=1143, | |||
| serialized_start=1413, | |||
| serialized_end=1530, | |||
| ) | |||
| _EVENTREPLY.fields_by_name['status'].enum_type = _EVENTREPLY_STATUS | |||
| @@ -541,7 +653,9 @@ _RUNCMD.fields_by_name['node_name'].containing_oneof = _RUNCMD.oneofs_by_name['c | |||
| _SETCMD.fields_by_name['watch_nodes'].message_type = _WATCHNODE | |||
| _SETCMD.fields_by_name['watch_condition'].message_type = _WATCHCONDITION | |||
| _VIEWCMD.fields_by_name['tensors'].message_type = mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2._TENSORPROTO | |||
| _WATCHCONDITION_PARAMETER.containing_type = _WATCHCONDITION | |||
| _WATCHCONDITION.fields_by_name['condition'].enum_type = _WATCHCONDITION_CONDITION | |||
| _WATCHCONDITION.fields_by_name['params'].message_type = _WATCHCONDITION_PARAMETER | |||
| _WATCHCONDITION_CONDITION.containing_type = _WATCHCONDITION | |||
| _WATCHPOINTHIT.fields_by_name['tensor'].message_type = mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2._TENSORPROTO | |||
| _WATCHPOINTHIT.fields_by_name['watch_condition'].message_type = _WATCHCONDITION | |||
| @@ -599,11 +713,19 @@ ViewCMD = _reflection.GeneratedProtocolMessageType('ViewCMD', (_message.Message, | |||
| _sym_db.RegisterMessage(ViewCMD) | |||
| WatchCondition = _reflection.GeneratedProtocolMessageType('WatchCondition', (_message.Message,), { | |||
| 'Parameter' : _reflection.GeneratedProtocolMessageType('Parameter', (_message.Message,), { | |||
| 'DESCRIPTOR' : _WATCHCONDITION_PARAMETER, | |||
| '__module__' : 'mindinsight.debugger.proto.debug_grpc_pb2' | |||
| # @@protoc_insertion_point(class_scope:debugger.WatchCondition.Parameter) | |||
| }) | |||
| , | |||
| 'DESCRIPTOR' : _WATCHCONDITION, | |||
| '__module__' : 'mindinsight.debugger.proto.debug_grpc_pb2' | |||
| # @@protoc_insertion_point(class_scope:debugger.WatchCondition) | |||
| }) | |||
| _sym_db.RegisterMessage(WatchCondition) | |||
| _sym_db.RegisterMessage(WatchCondition.Parameter) | |||
| WatchNode = _reflection.GeneratedProtocolMessageType('WatchNode', (_message.Message,), { | |||
| 'DESCRIPTOR' : _WATCHNODE, | |||
| @@ -627,8 +749,8 @@ _EVENTLISTENER = _descriptor.ServiceDescriptor( | |||
| file=DESCRIPTOR, | |||
| index=0, | |||
| serialized_options=None, | |||
| serialized_start=1146, | |||
| serialized_end=1469, | |||
| serialized_start=1533, | |||
| serialized_end=1918, | |||
| methods=[ | |||
| _descriptor.MethodDescriptor( | |||
| name='WaitCMD', | |||
| @@ -675,6 +797,15 @@ _EVENTLISTENER = _descriptor.ServiceDescriptor( | |||
| output_type=_EVENTREPLY, | |||
| serialized_options=None, | |||
| ), | |||
| _descriptor.MethodDescriptor( | |||
| name='SendMultiGraphs', | |||
| full_name='debugger.EventListener.SendMultiGraphs', | |||
| index=5, | |||
| containing_service=None, | |||
| input_type=_CHUNK, | |||
| output_type=_EVENTREPLY, | |||
| serialized_options=None, | |||
| ), | |||
| ]) | |||
| _sym_db.RegisterServiceDescriptor(_EVENTLISTENER) | |||
| @@ -1,4 +1,5 @@ | |||
| # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! | |||
| """Client and server classes corresponding to protobuf-defined services.""" | |||
| import grpc | |||
| from mindinsight.debugger.proto import debug_grpc_pb2 as mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2 | |||
| @@ -6,7 +7,7 @@ from mindinsight.debugger.proto import ms_graph_pb2 as mindinsight_dot_debugger_ | |||
| class EventListenerStub(object): | |||
| """Missing associated documentation comment in .proto file""" | |||
| """Missing associated documentation comment in .proto file.""" | |||
| def __init__(self, channel): | |||
| """Constructor. | |||
| @@ -39,37 +40,48 @@ class EventListenerStub(object): | |||
| request_serializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.WatchpointHit.SerializeToString, | |||
| response_deserializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | |||
| ) | |||
| self.SendMultiGraphs = channel.stream_unary( | |||
| '/debugger.EventListener/SendMultiGraphs', | |||
| request_serializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Chunk.SerializeToString, | |||
| response_deserializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | |||
| ) | |||
| class EventListenerServicer(object): | |||
| """Missing associated documentation comment in .proto file""" | |||
| """Missing associated documentation comment in .proto file.""" | |||
| def WaitCMD(self, request, context): | |||
| """Missing associated documentation comment in .proto file""" | |||
| """Missing associated documentation comment in .proto file.""" | |||
| context.set_code(grpc.StatusCode.UNIMPLEMENTED) | |||
| context.set_details('Method not implemented!') | |||
| raise NotImplementedError('Method not implemented!') | |||
| def SendMetadata(self, request, context): | |||
| """Missing associated documentation comment in .proto file""" | |||
| """Missing associated documentation comment in .proto file.""" | |||
| context.set_code(grpc.StatusCode.UNIMPLEMENTED) | |||
| context.set_details('Method not implemented!') | |||
| raise NotImplementedError('Method not implemented!') | |||
| def SendGraph(self, request_iterator, context): | |||
| """Missing associated documentation comment in .proto file""" | |||
| """Missing associated documentation comment in .proto file.""" | |||
| context.set_code(grpc.StatusCode.UNIMPLEMENTED) | |||
| context.set_details('Method not implemented!') | |||
| raise NotImplementedError('Method not implemented!') | |||
| def SendTensors(self, request_iterator, context): | |||
| """Missing associated documentation comment in .proto file""" | |||
| """Missing associated documentation comment in .proto file.""" | |||
| context.set_code(grpc.StatusCode.UNIMPLEMENTED) | |||
| context.set_details('Method not implemented!') | |||
| raise NotImplementedError('Method not implemented!') | |||
| def SendWatchpointHits(self, request_iterator, context): | |||
| """Missing associated documentation comment in .proto file""" | |||
| """Missing associated documentation comment in .proto file.""" | |||
| context.set_code(grpc.StatusCode.UNIMPLEMENTED) | |||
| context.set_details('Method not implemented!') | |||
| raise NotImplementedError('Method not implemented!') | |||
| def SendMultiGraphs(self, request_iterator, context): | |||
| """Missing associated documentation comment in .proto file.""" | |||
| context.set_code(grpc.StatusCode.UNIMPLEMENTED) | |||
| context.set_details('Method not implemented!') | |||
| raise NotImplementedError('Method not implemented!') | |||
| @@ -102,6 +114,11 @@ def add_EventListenerServicer_to_server(servicer, server): | |||
| request_deserializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.WatchpointHit.FromString, | |||
| response_serializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.SerializeToString, | |||
| ), | |||
| 'SendMultiGraphs': grpc.stream_unary_rpc_method_handler( | |||
| servicer.SendMultiGraphs, | |||
| request_deserializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Chunk.FromString, | |||
| response_serializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.SerializeToString, | |||
| ), | |||
| } | |||
| generic_handler = grpc.method_handlers_generic_handler( | |||
| 'debugger.EventListener', rpc_method_handlers) | |||
| @@ -110,7 +127,7 @@ def add_EventListenerServicer_to_server(servicer, server): | |||
| # This class is part of an EXPERIMENTAL API. | |||
| class EventListener(object): | |||
| """Missing associated documentation comment in .proto file""" | |||
| """Missing associated documentation comment in .proto file.""" | |||
| @staticmethod | |||
| def WaitCMD(request, | |||
| @@ -118,6 +135,7 @@ class EventListener(object): | |||
| options=(), | |||
| channel_credentials=None, | |||
| call_credentials=None, | |||
| insecure=False, | |||
| compression=None, | |||
| wait_for_ready=None, | |||
| timeout=None, | |||
| @@ -126,7 +144,7 @@ class EventListener(object): | |||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Metadata.SerializeToString, | |||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | |||
| options, channel_credentials, | |||
| call_credentials, compression, wait_for_ready, timeout, metadata) | |||
| insecure, call_credentials, compression, wait_for_ready, timeout, metadata) | |||
| @staticmethod | |||
| def SendMetadata(request, | |||
| @@ -134,6 +152,7 @@ class EventListener(object): | |||
| options=(), | |||
| channel_credentials=None, | |||
| call_credentials=None, | |||
| insecure=False, | |||
| compression=None, | |||
| wait_for_ready=None, | |||
| timeout=None, | |||
| @@ -142,7 +161,7 @@ class EventListener(object): | |||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Metadata.SerializeToString, | |||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | |||
| options, channel_credentials, | |||
| call_credentials, compression, wait_for_ready, timeout, metadata) | |||
| insecure, call_credentials, compression, wait_for_ready, timeout, metadata) | |||
| @staticmethod | |||
| def SendGraph(request_iterator, | |||
| @@ -150,6 +169,7 @@ class EventListener(object): | |||
| options=(), | |||
| channel_credentials=None, | |||
| call_credentials=None, | |||
| insecure=False, | |||
| compression=None, | |||
| wait_for_ready=None, | |||
| timeout=None, | |||
| @@ -158,7 +178,7 @@ class EventListener(object): | |||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Chunk.SerializeToString, | |||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | |||
| options, channel_credentials, | |||
| call_credentials, compression, wait_for_ready, timeout, metadata) | |||
| insecure, call_credentials, compression, wait_for_ready, timeout, metadata) | |||
| @staticmethod | |||
| def SendTensors(request_iterator, | |||
| @@ -166,6 +186,7 @@ class EventListener(object): | |||
| options=(), | |||
| channel_credentials=None, | |||
| call_credentials=None, | |||
| insecure=False, | |||
| compression=None, | |||
| wait_for_ready=None, | |||
| timeout=None, | |||
| @@ -174,7 +195,7 @@ class EventListener(object): | |||
| mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2.TensorProto.SerializeToString, | |||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | |||
| options, channel_credentials, | |||
| call_credentials, compression, wait_for_ready, timeout, metadata) | |||
| insecure, call_credentials, compression, wait_for_ready, timeout, metadata) | |||
| @staticmethod | |||
| def SendWatchpointHits(request_iterator, | |||
| @@ -182,6 +203,7 @@ class EventListener(object): | |||
| options=(), | |||
| channel_credentials=None, | |||
| call_credentials=None, | |||
| insecure=False, | |||
| compression=None, | |||
| wait_for_ready=None, | |||
| timeout=None, | |||
| @@ -190,4 +212,21 @@ class EventListener(object): | |||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.WatchpointHit.SerializeToString, | |||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | |||
| options, channel_credentials, | |||
| call_credentials, compression, wait_for_ready, timeout, metadata) | |||
| insecure, call_credentials, compression, wait_for_ready, timeout, metadata) | |||
| @staticmethod | |||
| def SendMultiGraphs(request_iterator, | |||
| target, | |||
| options=(), | |||
| channel_credentials=None, | |||
| call_credentials=None, | |||
| insecure=False, | |||
| compression=None, | |||
| wait_for_ready=None, | |||
| timeout=None, | |||
| metadata=None): | |||
| return grpc.experimental.stream_unary(request_iterator, target, '/debugger.EventListener/SendMultiGraphs', | |||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Chunk.SerializeToString, | |||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | |||
| options, channel_credentials, | |||
| insecure, call_credentials, compression, wait_for_ready, timeout, metadata) | |||
| @@ -14,16 +14,66 @@ | |||
| # ============================================================================ | |||
| """This file is used to define the basic graph.""" | |||
| from collections import deque | |||
| from copy import deepcopy | |||
| from mindinsight.datavisual.data_transform.graph.msgraph import MSGraph | |||
| from mindinsight.debugger.common.exceptions.exceptions import \ | |||
| DebuggerNodeNotInGraphError, DebuggerParamValueError | |||
| from mindinsight.debugger.common.log import logger as log | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from .node_type_identifier import NodeTypeIdentifier | |||
| def _is_match(identifier, node, condition): | |||
| """Check if the node is matched to the identifier. | |||
| Args: | |||
| identifier (NodeTypeIdentifier): The debug name of the node. | |||
| node (Node obj): The number of layers the user wants to trace. Default is 0. | |||
| Returns: | |||
| list, a list of the traced tensors' name and node type, | |||
| arranged in order from leaf node to root node. | |||
| int, the number of output tensors. | |||
| """ | |||
| if condition: | |||
| matched = identifier.is_match(node, condition) | |||
| else: | |||
| matched = identifier.is_match(node) | |||
| return matched | |||
| class DebuggerGraph(MSGraph): | |||
| """The `DebuggerGraph` object provides interfaces to describe a debugger graph.""" | |||
| @property | |||
| def leaf_nodes(self): | |||
| """Return the leaf nodes.""" | |||
| return self._leaf_nodes | |||
| @property | |||
| def normal_node_map(self): | |||
| """Return the normal_node_map""" | |||
| return self._normal_node_map | |||
| @property | |||
| def node_id_map_name(self): | |||
| """Return the node_id_map_name""" | |||
| return self._node_id_map_name | |||
| @property | |||
| def const_node_temp_cache(self): | |||
| """Return const_node_temp_cache""" | |||
| return self._const_node_temp_cache | |||
| @property | |||
| def parameter_node_temp_cache(self): | |||
| """Return parameter_node_temp_cache""" | |||
| return self._parameter_node_temp_cache | |||
| @property | |||
| def full_name_map_name(self): | |||
| """Return full_name_map_name""" | |||
| return self._full_name_map_name | |||
| def get_node_name_by_full_name(self, full_name): | |||
| """Get node name by full names.""" | |||
| inner_name = self._full_name_map_name.get(full_name, '') | |||
| @@ -33,12 +83,15 @@ class DebuggerGraph(MSGraph): | |||
| return inner_name | |||
| def get_full_name_by_node_name(self, node_name): | |||
| """Get full name by node name for leaf nodes.""" | |||
| """Get full name by node name.""" | |||
| if not node_name: | |||
| return '' | |||
| node = self._normal_node_map.get(node_name) | |||
| if not node: | |||
| log.warning("Node %s is not leaf node.", node_name) | |||
| log.error("Node <%s> is not in graph.", node_name) | |||
| raise DebuggerNodeNotInGraphError(node_name=node_name) | |||
| return node.full_name if node else '' | |||
| return node.full_name | |||
| def get_node_type(self, node_name): | |||
| """ | |||
| @@ -48,14 +101,48 @@ class DebuggerGraph(MSGraph): | |||
| node_name (str): The full name of the node with its scope. | |||
| Returns: | |||
| A string, leaf or name_scope. | |||
| str, node type or name_scope. | |||
| """ | |||
| if node_name and not self.exist_node(name=node_name): | |||
| if not node_name: | |||
| return 'name_scope' | |||
| node = self._normal_node_map.get(node_name) | |||
| if not node: | |||
| log.error("Node <%s> is not in graph.", node_name) | |||
| raise DebuggerNodeNotInGraphError(node_name=node_name) | |||
| node = self._normal_node_map.get(node_name) | |||
| return node.type | |||
| def search_nodes_by_category(self, node_category, condition=None): | |||
| """ | |||
| Search nodes by type. | |||
| Args: | |||
| node_category (TargetTypeEnum): The node type supported in | |||
| mindinsight.conditionmgr.condition.TargetTypeEnum. | |||
| condition (dict): Search condition. Default: None. | |||
| - activation_func (Union[str, list[str]): The target functions. Used when node_type | |||
| is TargetTypeEnum.ACTIVATION. | |||
| - search_range (list[Node]): The list of nodes to be searched from. | |||
| Returns: | |||
| list[Node], list of nodes. | |||
| """ | |||
| identifier = NodeTypeIdentifier(node_category.value) | |||
| # get search range | |||
| condition = {} if condition is None else condition | |||
| search_range = condition.pop('search_range', None) | |||
| if not search_range: | |||
| search_range = self._leaf_nodes.values() | |||
| # search match nodes | |||
| matched_nodes = [] | |||
| for node in search_range: | |||
| matched = _is_match(identifier, node, condition) | |||
| if matched: | |||
| matched_nodes.append(node) | |||
| return matched_nodes | |||
| def get_tensor_history(self, node_name, depth=0): | |||
| """ | |||
| Get the tensor history of a specified node. | |||
| @@ -188,3 +275,86 @@ class DebuggerGraph(MSGraph): | |||
| raise DebuggerParamValueError(msg) | |||
| return default_root | |||
| def get_tensor_graph(self, node_name): | |||
| """ | |||
| Get graph relative to a node. | |||
| Args: | |||
| node_name (str): Node name. | |||
| Returns: | |||
| dict, tensor graph, format is: | |||
| {'nodes': [ | |||
| {'name': <node name>, | |||
| 'full_name': <node full name>, | |||
| 'type': <node type> | |||
| 'input': <input objects>, | |||
| 'output': <output objects>, | |||
| 'slot': {'id': <slot id>} | |||
| } | |||
| ]} | |||
| """ | |||
| graph_nodes = [] | |||
| cur_node = self._leaf_nodes.get(node_name) | |||
| node_detail_info = cur_node.to_dict() | |||
| cur_node_info = self._get_node_info_for_tensor_graph(cur_node) | |||
| cur_node_info['input'] = deepcopy(node_detail_info.get('input')) | |||
| cur_node_info['output'] = deepcopy(node_detail_info.get('output')) | |||
| self._add_input_node_info(cur_node_info=cur_node_info, graph_nodes=graph_nodes) | |||
| self._add_output_node_info(cur_node=cur_node, cur_node_info=cur_node_info, graph_nodes=graph_nodes) | |||
| graph_nodes.append(cur_node_info) | |||
| return {'nodes': graph_nodes} | |||
| @staticmethod | |||
| def _get_node_info_for_tensor_graph(node): | |||
| """Get node infos for tensor graph.""" | |||
| node_info = { | |||
| 'name': node.name, | |||
| 'full_name': node.full_name, | |||
| 'type': node.type, | |||
| 'input': {}, | |||
| 'output': {}, | |||
| 'slots': [{'slot': str(slot)} for slot in range(node.output_nums)] | |||
| } | |||
| return node_info | |||
| def _add_output_node_info(self, cur_node, cur_node_info, graph_nodes): | |||
| """ | |||
| Add output node info into cur_node_info and node list. | |||
| Args: | |||
| cur_node (Node): The current node object. | |||
| cur_node_info (dict): Current node info. | |||
| graph_nodes (list[<Node info>]): The nodes in tensor graph. | |||
| """ | |||
| output_slot_mapping = self._get_slot_mapping(cur_node) | |||
| for node_name, edge_info in cur_node_info.get('output').items(): | |||
| edge_info['slot_mapping'] = output_slot_mapping | |||
| # add output node info into graph | |||
| output_node = self._leaf_nodes.get(node_name) | |||
| output_node_info = self._get_node_info_for_tensor_graph(output_node) | |||
| output_node_info['input'][cur_node.name] = edge_info | |||
| graph_nodes.append(output_node_info) | |||
| def _add_input_node_info(self, cur_node_info, graph_nodes): | |||
| """ | |||
| Add input node info into cur_node_info and node list. | |||
| Args: | |||
| cur_node_info (dict): Current node info. | |||
| graph_nodes (list[<Node info>]): The nodes in tensor graph. | |||
| """ | |||
| cur_node_name = cur_node_info.get('name') | |||
| for node_name, edge_info in cur_node_info.get('input').items(): | |||
| input_node = self._leaf_nodes.get(node_name) | |||
| edge_info['slot_mapping'] = self._get_slot_mapping(input_node) | |||
| # add input node info into graph | |||
| input_node_info = self._get_node_info_for_tensor_graph(input_node) | |||
| input_node_info['output'][cur_node_name] = edge_info | |||
| graph_nodes.append(input_node_info) | |||
| @staticmethod | |||
| def _get_slot_mapping(input_node): | |||
| """Get slot mapping between nodes.""" | |||
| return [[str(slot), ''] for slot in range(input_node.output_nums)] | |||
| @@ -0,0 +1,81 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """This file is used to define the basic graph.""" | |||
| import copy | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.datavisual.data_transform.graph.node import Node, NodeTypeEnum | |||
| from .debugger_graph import DebuggerGraph | |||
| class DebuggerMultiGraph(DebuggerGraph): | |||
| """The `DebuggerMultiGraph` object provides interfaces to describe a debugger multigraph.""" | |||
| def add_graph(self, graph_dict): | |||
| """ | |||
| add graphs to DebuggerMultiGraph | |||
| Args: | |||
| graph_dict (dict): The <graph_name, graph_object> dict. | |||
| """ | |||
| if len(graph_dict) == 1: | |||
| graph = list(graph_dict.values())[0] | |||
| self._normal_node_map = graph.normal_node_map | |||
| self._node_id_map_name = graph.node_id_map_name | |||
| self._const_node_temp_cache = graph.const_node_temp_cache | |||
| self._parameter_node_temp_cache = graph.parameter_node_temp_cache | |||
| self._leaf_nodes = graph.leaf_nodes | |||
| self._full_name_map_name = graph.full_name_map_name | |||
| else: | |||
| for graph_name, graph in graph_dict.items(): | |||
| log.debug("add graph %s into whole graph.", graph_name) | |||
| # add nodes | |||
| normal_nodes = copy.deepcopy(graph.normal_node_map) | |||
| for _, node_obj in normal_nodes.items(): | |||
| pre_scope = graph_name + "/" | |||
| node_obj.name = pre_scope + node_obj.name | |||
| node_obj.full_name = pre_scope + node_obj.full_name | |||
| if node_obj.scope: | |||
| node_obj.scope = pre_scope + node_obj.scope | |||
| else: | |||
| node_obj.scope = graph_name | |||
| # update inputs | |||
| old_inputs = copy.deepcopy(node_obj.inputs) | |||
| for src_name, input_attr in old_inputs.items(): | |||
| new_src_name = graph_name + "/" + src_name | |||
| node_obj.add_inputs(new_src_name, input_attr) | |||
| node_obj.delete_inputs(src_name) | |||
| # update_outputs | |||
| old_outputs = copy.deepcopy(node_obj.outputs) | |||
| for dst_name, output_attr in old_outputs.items(): | |||
| new_dst_name = graph_name + "/" + dst_name | |||
| node_obj.add_outputs(new_dst_name, output_attr) | |||
| node_obj.delete_outputs(dst_name) | |||
| self._cache_node(node_obj) | |||
| # add graph_node | |||
| node = Node(name=graph_name, node_id=graph_name) | |||
| node.type = NodeTypeEnum.NAME_SCOPE.value | |||
| node.subnode_count = len(graph.list_node_by_scope()) | |||
| self._cache_node(node) | |||
| self._leaf_nodes = self._get_leaf_nodes() | |||
| self._full_name_map_name = self._get_leaf_node_full_name_map() | |||
| log.info( | |||
| "Build multi_graph end, all node count: %s, const count: %s, parameter count: %s.", | |||
| self.normal_node_count, len(self._const_node_temp_cache), | |||
| len(self._parameter_node_temp_cache)) | |||
| @@ -0,0 +1,143 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """This file is used to identify the type of the node.""" | |||
| import sys | |||
| from mindinsight.datavisual.data_transform.graph import NodeTypeEnum | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | |||
| _ACTIVATIONS = [ | |||
| 'Softmax', | |||
| 'LogSoftmax', | |||
| 'ReLU', | |||
| 'ReLU6', | |||
| 'Tanh', | |||
| 'GELU', | |||
| 'ELU', | |||
| 'Sigmoid', | |||
| 'PReLU', | |||
| 'LeakyReLU', | |||
| 'HSwish', | |||
| 'HSigmoid', | |||
| 'LogSigmoid' | |||
| ] | |||
| class NodeTypeIdentifier: | |||
| """Node type identifier.""" | |||
| def __init__(self, node_type): | |||
| self.identify_func = self.get_identify_func(node_type) | |||
| @staticmethod | |||
| def get_identify_func(node_type): | |||
| """ | |||
| Get the identify function in this module. | |||
| Args: | |||
| node_type (str): The node type. | |||
| Returns: | |||
| function, the identify function. | |||
| """ | |||
| # the name of the identity function should start with 'is_' and end with '_node' | |||
| target_name = 'is_' + node_type + '_node' | |||
| cur_module = sys.modules[__name__] | |||
| for sub_module in dir(cur_module): | |||
| # the rule to get the identify function | |||
| if sub_module == target_name: | |||
| return getattr(cur_module, sub_module) | |||
| raise DebuggerParamValueError("Invalid identify type.") | |||
| def is_match(self, *args, **kwargs): | |||
| """Check if the input match the idenfity function.""" | |||
| return self.identify_func(*args, **kwargs) | |||
| def is_weight_node(node): | |||
| """ | |||
| Check if the node is weight type. | |||
| Args: | |||
| node (Node): The node object. | |||
| Returns: | |||
| bool, if the node is weight type. | |||
| """ | |||
| if node.type == NodeTypeEnum.PARAMETER.value: | |||
| node_name = node.name.lower() | |||
| weight_flag = False | |||
| if node_name.endswith('.weight') or node_name.endswith('.bias'): | |||
| weight_flag = True | |||
| if weight_flag and 'optimizer-' not in node_name and not node_name.startswith('gradients/'): | |||
| return True | |||
| return False | |||
| def is_activation_node(node, condition=None): | |||
| """ | |||
| Check if the node is activation type. | |||
| Args: | |||
| node (Node): The node object. | |||
| condition (dict): Filter condition. | |||
| - activation_func (Union[str, list[str]): The target functions. | |||
| Returns: | |||
| bool, if the node is activation type. | |||
| """ | |||
| activation_funcs = condition.get('activation_func') if condition else _ACTIVATIONS | |||
| if not activation_funcs: | |||
| activation_funcs = _ACTIVATIONS | |||
| if not isinstance(activation_funcs, list): | |||
| activation_funcs = [activation_funcs] | |||
| if not is_gradient_node(node): | |||
| node_type = node.type | |||
| for activation_name in activation_funcs: | |||
| if node_type == activation_name: | |||
| return True | |||
| return False | |||
| def is_gradient_node(node): | |||
| """ | |||
| Check if the node is gradient type. | |||
| Args: | |||
| node (Node): The node object. | |||
| Returns: | |||
| bool, if the node is gradient type. | |||
| """ | |||
| if node.name.startswith('Gradients/') and node.type != NodeTypeEnum.PARAMETER.value: | |||
| return True | |||
| return False | |||
| def is_tensor_node(node): | |||
| """ | |||
| Check if the node is tensor type. | |||
| Args: | |||
| node (Node): The node object. | |||
| Returns: | |||
| bool, if the node is tensor type. | |||
| """ | |||
| if node is not None: | |||
| return True | |||
| return False | |||
| @@ -19,7 +19,7 @@ import numpy as np | |||
| from mindinsight.utils.tensor import TensorUtils | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | |||
| from mindinsight.debugger.common.log import logger as log | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.common.utils import NUMPY_TYPE_MAP | |||
| from mindinsight.debugger.proto.ms_graph_pb2 import DataType | |||
| @@ -177,6 +177,18 @@ class OpTensor(BaseTensor): | |||
| return res | |||
| def get_tensor_statistics(self): | |||
| """ | |||
| Get Tensor statistics. | |||
| Returns: | |||
| dict, overall statistics. | |||
| """ | |||
| if not self._stats: | |||
| self._stats = TensorUtils.get_statistics_from_tensor(self.value) | |||
| statistics = TensorUtils.get_overall_statistic_dict(self._stats) | |||
| return statistics | |||
| def update_tensor_comparisons(self, tensor_comparison): | |||
| """ | |||
| Update tensor comparison for tensor. | |||
| @@ -13,23 +13,45 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Define the watchpoint stream.""" | |||
| from mindinsight.datavisual.data_transform.graph.node import NodeTypeEnum | |||
| from mindinsight.conditionmgr.common.utils import NodeBasicInfo | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | |||
| from mindinsight.debugger.common.log import logger as log | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.common.utils import is_scope_type | |||
| from mindinsight.debugger.proto.debug_grpc_pb2 import SetCMD, WatchCondition | |||
| from mindinsight.conditionmgr.condition import ConditionIdEnum | |||
| WATCHPOINT_CONDITION_MAPPING = { | |||
| 'INF': WatchCondition.Condition.inf, | |||
| 'NAN': WatchCondition.Condition.nan, | |||
| 'OVERFLOW': WatchCondition.Condition.overflow, | |||
| 'MAX_GT': WatchCondition.Condition.max_gt, | |||
| 'MAX_LT': WatchCondition.Condition.max_lt, | |||
| 'MIN_GT': WatchCondition.Condition.min_gt, | |||
| 'MIN_LT': WatchCondition.Condition.min_lt, | |||
| 'MAX_MIN_GT': WatchCondition.Condition.max_min_gt, | |||
| 'MAX_MIN_LT': WatchCondition.Condition.max_min_lt, | |||
| 'MEAN_GT': WatchCondition.Condition.mean_gt, | |||
| 'MEAN_LT': WatchCondition.Condition.mean_lt | |||
| ConditionIdEnum.NAN.value: WatchCondition.Condition.nan, | |||
| ConditionIdEnum.INF.value: WatchCondition.Condition.inf, | |||
| ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value: WatchCondition.Condition.overflow, | |||
| ConditionIdEnum.MAX_GT.value: WatchCondition.Condition.max_gt, | |||
| ConditionIdEnum.MAX_LT.value: WatchCondition.Condition.max_lt, | |||
| ConditionIdEnum.MIN_GT.value: WatchCondition.Condition.min_gt, | |||
| ConditionIdEnum.MIN_LT.value: WatchCondition.Condition.min_lt, | |||
| ConditionIdEnum.MAX_MIN_GT.value: WatchCondition.Condition.max_min_gt, | |||
| ConditionIdEnum.MAX_MIN_LT.value: WatchCondition.Condition.max_min_lt, | |||
| ConditionIdEnum.MEAN_GT.value: WatchCondition.Condition.mean_gt, | |||
| ConditionIdEnum.MEAN_LT.value: WatchCondition.Condition.mean_lt, | |||
| ConditionIdEnum.TENSOR_OVERFLOW.value: WatchCondition.Condition.tensor_general_overflow, | |||
| ConditionIdEnum.WEIGHT_OVERFLOW.value: WatchCondition.Condition.tensor_general_overflow, | |||
| ConditionIdEnum.OPERATOR_OVERFLOW.value: WatchCondition.Condition.overflow, | |||
| ConditionIdEnum.TENSOR_INITIALIZATION.value: WatchCondition.Condition.tensor_initialization, | |||
| ConditionIdEnum.WEIGHT_INITIALIZATION.value: WatchCondition.Condition.tensor_initialization, | |||
| ConditionIdEnum.TENSOR_TOO_LARGE.value: WatchCondition.Condition.tensor_too_large, | |||
| ConditionIdEnum.WEIGHT_TOO_LARGE.value: WatchCondition.Condition.tensor_too_large, | |||
| ConditionIdEnum.GRADIENT_TOO_LARGE.value: WatchCondition.Condition.tensor_too_large, | |||
| ConditionIdEnum.GRADIENT_EXPLODING.value: WatchCondition.Condition.tensor_general_overflow, | |||
| ConditionIdEnum.TENSOR_TOO_SMALL.value: WatchCondition.Condition.tensor_too_small, | |||
| ConditionIdEnum.WEIGHT_TOO_SMALL.value: WatchCondition.Condition.tensor_too_small, | |||
| ConditionIdEnum.GRADIENT_VANISHING.value: WatchCondition.Condition.tensor_too_small, | |||
| ConditionIdEnum.TENSOR_ALL_ZERO.value: WatchCondition.Condition.tensor_all_zero, | |||
| ConditionIdEnum.TENSOR_CHANGE_TOO_LARGE.value: WatchCondition.Condition.tensor_change_too_large, | |||
| ConditionIdEnum.WEIGHT_CHANGE_TOO_LARGE.value: WatchCondition.Condition.tensor_change_too_large, | |||
| ConditionIdEnum.TENSOR_CHANGE_TOO_SMALL.value: WatchCondition.Condition.tensor_change_too_small, | |||
| ConditionIdEnum.WEIGHT_CHANGE_TOO_SMALL.value: WatchCondition.Condition.tensor_change_too_small, | |||
| ConditionIdEnum.TENSOR_NOT_CHANGED.value: WatchCondition.Condition.tensor_not_changed, | |||
| ConditionIdEnum.WEIGHT_NOT_CHANGED.value: WatchCondition.Condition.tensor_not_changed | |||
| } | |||
| @@ -81,10 +103,8 @@ class WatchNodeTree: | |||
| def _translate_node_type(node_type): | |||
| """Translate node type to watch node type.""" | |||
| flag = node_type | |||
| if not node_type or node_type == NodeTypeEnum.NAME_SCOPE.value: | |||
| if not node_type or is_scope_type(node_type): | |||
| flag = 'scope' | |||
| elif node_type != NodeTypeEnum.AGGREGATION_SCOPE.value: | |||
| flag = 'leaf' | |||
| return flag | |||
| def get(self, sub_name): | |||
| @@ -191,7 +211,7 @@ class Watchpoint: | |||
| self._watch_node = other_watchpoint.nodes | |||
| def add_nodes(self, nodes): | |||
| """Add node into watchcpoint.""" | |||
| """Add node into watchpoint.""" | |||
| if not nodes: | |||
| log.warning("Add empty nodes.") | |||
| return | |||
| @@ -208,8 +228,7 @@ class Watchpoint: | |||
| if not isinstance(nodes, list): | |||
| nodes = [nodes] | |||
| for node in nodes: | |||
| node_name = node.split(':')[0] | |||
| self._watch_node.remove_node(node_name) | |||
| self._watch_node.remove_node(node.name) | |||
| def get_node_status(self, node_name, node_type, full_name): | |||
| """Judge if the node is in watch nodes.""" | |||
| @@ -229,40 +248,56 @@ class Watchpoint: | |||
| return status | |||
| def get_watch_node(self, cur_watch_node, watch_node_list): | |||
| def _get_watch_node(self, cur_watch_node, watch_node_list): | |||
| """ | |||
| Traverse the watch nodes and add total watched node list to `watch_node_list`. | |||
| Args: | |||
| cur_watch_node (WatchNodeTree): The current watch node. | |||
| watch_node_list (list[WatchNodeTree]): The list of total watched node. | |||
| watch_node_list (list[NodeBasicInfo]): The list of watch node basic infos. | |||
| """ | |||
| if cur_watch_node.watch_status == WatchNodeTree.TOTAL_WATCH and \ | |||
| cur_watch_node.node_type != NodeTypeEnum.AGGREGATION_SCOPE.value: | |||
| watch_node_list.append(cur_watch_node) | |||
| if cur_watch_node.watch_status == WatchNodeTree.TOTAL_WATCH: | |||
| node_info = NodeBasicInfo(name=cur_watch_node.node_name, | |||
| full_name=cur_watch_node.full_name, | |||
| type=cur_watch_node.node_type) | |||
| watch_node_list.append(node_info) | |||
| return | |||
| for _, watch_node in cur_watch_node.get_children(): | |||
| self.get_watch_node(watch_node, watch_node_list) | |||
| self._get_watch_node(watch_node, watch_node_list) | |||
| def get_set_cmd(self): | |||
| """Return the watchpoint in proto format.""" | |||
| # get watch nodes. | |||
| def get_watch_nodes(self): | |||
| """ | |||
| Get the name of all total watched nodes. | |||
| Returns: | |||
| list[NodeBasicInfo], the list of watch node basic infos. | |||
| """ | |||
| watch_nodes = [] | |||
| self.get_watch_node(self._watch_node, watch_nodes) | |||
| self._get_watch_node(self._watch_node, watch_nodes) | |||
| return watch_nodes | |||
| def get_pending_cmd(self, watch_nodes): | |||
| """Return the watchpoint in proto format.""" | |||
| # construct SetCMD | |||
| set_cmd = SetCMD() | |||
| set_cmd.id = self._id | |||
| set_cmd.delete = False | |||
| set_cmd.watch_condition.condition = WATCHPOINT_CONDITION_MAPPING.get( | |||
| self._condition.get('condition')) | |||
| if self._condition.get('param'): | |||
| self._condition.get('id')) | |||
| for param in self._condition.get('params'): | |||
| # at most one param is provided | |||
| set_cmd.watch_condition.value = self._condition.get('param') | |||
| param_proto = set_cmd.watch_condition.params.add() | |||
| param_proto.name = param.get('name') | |||
| param_proto.value = param.get('value') | |||
| param_proto.disabled = param.get('disable') | |||
| # Only one parameter of condition in current version. | |||
| set_cmd.watch_condition.value = param.get('value') | |||
| for watch_node in watch_nodes: | |||
| event_node = set_cmd.watch_nodes.add() | |||
| event_node.node_name = watch_node.full_name | |||
| event_node.node_type = watch_node.node_type | |||
| event_node.node_type = watch_node.type | |||
| return set_cmd | |||
| def get_watch_condition_info(self): | |||
| @@ -277,22 +312,17 @@ class Watchpoint: | |||
| class WatchpointHit: | |||
| """The watchpoint hit structure.""" | |||
| def __init__(self, tensor_proto, watchpoint, node_name): | |||
| self._node_name = node_name | |||
| def __init__(self, tensor_proto, watchpoint, node_name, graph_name): | |||
| self._full_name = tensor_proto.node_name | |||
| self._slot = tensor_proto.slot | |||
| self._watchpoint = watchpoint | |||
| self.node_name = node_name | |||
| self.slot = tensor_proto.slot | |||
| self.graph_name = graph_name | |||
| @property | |||
| def tensor_full_name(self): | |||
| """The property of tensor full name.""" | |||
| tensor_name = ':'.join([self._full_name, self._slot]) | |||
| return tensor_name | |||
| @property | |||
| def tensor_name(self): | |||
| """The property of tensor ui name.""" | |||
| tensor_name = ':'.join([self._node_name, self._slot]) | |||
| tensor_name = ':'.join([self._full_name, self.slot]) | |||
| return tensor_name | |||
| @property | |||
| @@ -303,5 +333,7 @@ class WatchpointHit: | |||
| def __eq__(self, other): | |||
| """Define the equal condition.""" | |||
| flag = self.tensor_full_name == other.tensor_full_name and self.watchpoint == other.watchpoint | |||
| flag = self.tensor_full_name == other.tensor_full_name \ | |||
| and self.watchpoint == other.watchpoint \ | |||
| and self.graph_name == other.graph_name | |||
| return flag | |||
| @@ -18,7 +18,7 @@ from queue import Queue, Empty | |||
| from threading import Lock | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | |||
| from mindinsight.debugger.common.log import logger as log | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | |||
| @@ -13,10 +13,14 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Define the graph stream handler.""" | |||
| from mindinsight.conditionmgr.common.utils import NodeBasicInfo | |||
| from mindinsight.conditionmgr.condition import TargetTypeEnum as CategoryTypeEnum | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | |||
| DebuggerNodeNotInGraphError, DebuggerGraphNotExistError | |||
| from mindinsight.debugger.common.log import logger as log | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.common.utils import is_scope_type | |||
| from mindinsight.debugger.stream_cache.debugger_graph import DebuggerGraph | |||
| from mindinsight.debugger.stream_cache.debugger_multigraph import DebuggerMultiGraph | |||
| from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | |||
| @@ -24,16 +28,41 @@ class GraphHandler(StreamHandlerBase): | |||
| """Metadata Handler.""" | |||
| def __init__(self): | |||
| self._graph_proto = None | |||
| self._graph = None | |||
| self._searched_node_list = [] | |||
| # dict of <graph_name, GraphProto object> | |||
| self._graph_proto = {} | |||
| # dict of <graph_name, DebuggerGraph object> | |||
| self._graph = {} | |||
| self._searched_node_list = {} | |||
| # list of node names in bfs order | |||
| self.bfs_order = [] | |||
| # dict of <node full name, graph_name> | |||
| self.graph_node_map = {} | |||
| # dict of <node ui name, Node object> for all graphs | |||
| self._all_leaf_nodes = {} | |||
| # the whole graph | |||
| self._whole_graph = None | |||
| @property | |||
| def whole_graph(self): | |||
| """The property of whole_graph.""" | |||
| return self._whole_graph | |||
| @property | |||
| def graph(self): | |||
| """The property of graph.""" | |||
| return self._graph_proto | |||
| @property | |||
| def graph_names(self): | |||
| """The property of graph names.""" | |||
| return list(self._graph) | |||
| @property | |||
| def debugger_graph_obj(self): | |||
| """The property of graph object.""" | |||
| return self._graph | |||
| def put(self, value): | |||
| """ | |||
| Put value into graph cache. Called by grpc server. | |||
| @@ -41,14 +70,23 @@ class GraphHandler(StreamHandlerBase): | |||
| Args: | |||
| value (GraphProto): The Graph proto message. | |||
| """ | |||
| self._graph_proto = value | |||
| log.info("Put graph into cache.") | |||
| # build graph | |||
| graph = DebuggerGraph() | |||
| graph.build_graph(value) | |||
| self._graph = graph | |||
| self.bfs_order = self._graph.get_bfs_order() | |||
| for graph_name, graph_value in value.items(): | |||
| self._graph_proto[graph_name] = graph_value | |||
| # build sub graph | |||
| graph = DebuggerGraph() | |||
| graph.build_graph(graph_value) | |||
| self._graph[graph_name] = graph | |||
| self.bfs_order.extend(graph.get_bfs_order()) | |||
| leaf_nodes = graph.leaf_nodes | |||
| self._all_leaf_nodes.update(leaf_nodes) | |||
| for _, node in leaf_nodes.items(): | |||
| self.graph_node_map[node.full_name] = graph_name | |||
| # build whole graph | |||
| graph = DebuggerMultiGraph() | |||
| graph.add_graph(self._graph) | |||
| self._whole_graph = graph | |||
| def get(self, filter_condition=None): | |||
| """ | |||
| @@ -58,7 +96,7 @@ class GraphHandler(StreamHandlerBase): | |||
| filter_condition (dict): | |||
| - name (str): The full debug node name. | |||
| - graph_name (str): The relative graph_name of the node. | |||
| - single_node (bool): If True, return the graph from root | |||
| to the specific node; else, return the sublayer of the | |||
| graph. Default: False. | |||
| @@ -73,47 +111,121 @@ class GraphHandler(StreamHandlerBase): | |||
| 'please start the training script first.') | |||
| return {'graph': {}} | |||
| graph = {} | |||
| if filter_condition is None: | |||
| filter_condition = {} | |||
| graph = {'graph_names': self.graph_names} | |||
| single_node = filter_condition.get('single_node', False) | |||
| name = filter_condition.get('name') | |||
| graph = {} | |||
| graph_name = filter_condition.get('graph_name') | |||
| if single_node is True: | |||
| nodes = self.get_single_node(name) | |||
| nodes = self._get_single_node(name, graph_name) | |||
| else: | |||
| nodes = self.list_nodes(name) | |||
| nodes = self._list_nodes(name, graph_name) | |||
| graph.update(nodes) | |||
| return {'graph': graph} | |||
| def get_tensor_history(self, node_name, depth=0): | |||
| def _get_single_node(self, name, graph_name=None): | |||
| """ | |||
| Search node, and return every layer nodes until this node. | |||
| Args: | |||
| graph_name(str): The graph_name. | |||
| name (str): The name of node. | |||
| Returns: | |||
| dict, every layer nodes until this node. | |||
| """ | |||
| if graph_name: | |||
| graph = self._get_graph(graph_name=graph_name) | |||
| searched_graph = graph.search_single_node(name) | |||
| else: | |||
| searched_graph = self._whole_graph.search_single_node(name) | |||
| return searched_graph | |||
| def _list_nodes(self, scope, graph_name): | |||
| """ | |||
| Get the nodes of every layer in graph. | |||
| Args: | |||
| scope (str): The name of a scope. | |||
| graph_name(str): The graph name. | |||
| Returns: | |||
| TypedDict{'nodes': ['Node_1', ...], 'graph_names': ['graph_name_1', ...]}, | |||
| format is {'nodes': [<NodeObject>], 'graph_names': [<str>]}. | |||
| example: | |||
| { | |||
| "nodes" : [ | |||
| { | |||
| "attr" : | |||
| { | |||
| "index" : "i: 0\n" | |||
| }, | |||
| "input" : {}, | |||
| "name" : "input_tensor", | |||
| "output" : | |||
| { | |||
| "Default/TensorAdd-op17" : | |||
| { | |||
| "edge_type" : "data", | |||
| "scope" : "name_scope", | |||
| "shape" : [1, 16, 128, 128] | |||
| } | |||
| }, | |||
| "output_i" : -1, | |||
| "proxy_input" : {}, | |||
| "proxy_output" : {}, | |||
| "independent_layout" : False, | |||
| "subnode_count" : 0, | |||
| "type" : "Data" | |||
| } | |||
| ] | |||
| } | |||
| """ | |||
| if graph_name: | |||
| graph = self._get_graph(graph_name, scope) | |||
| nodes = graph.list_node_by_scope(scope=scope) | |||
| res = {'nodes': nodes} | |||
| else: | |||
| nodes = self._whole_graph.list_node_by_scope(scope=scope) | |||
| res = {'nodes': nodes} | |||
| return res | |||
| def get_tensor_history(self, node_name, graph_name=None, depth=0): | |||
| """ | |||
| Get the tensor history of a specified node. | |||
| Args: | |||
| node_name (str): The debug name of the node. | |||
| graph_name (str): The graph_name. Default: None. | |||
| depth (int): The number of layers the user | |||
| wants to trace. Default is 0. | |||
| Returns: | |||
| dict, basic tensor history, only including tensor name and tensor type and node type. | |||
| """ | |||
| self._graph_exists() | |||
| if not self._graph.exist_node(node_name): | |||
| raise DebuggerNodeNotInGraphError(node_name) | |||
| tensor_history, cur_outputs_nums = self._graph.get_tensor_history( | |||
| node_name, depth | |||
| ) | |||
| graph_name, node_name = self._parse_node_name(node_name, graph_name) | |||
| graph = self._get_graph(graph_name=graph_name, node_name=node_name) | |||
| # validate node type, scope node has no tensor history | |||
| node_type = graph.get_node_type(node_name) | |||
| if is_scope_type(node_type): | |||
| log.error("Scope type node has no tensor history.") | |||
| raise DebuggerParamValueError("Invalid leaf node name.") | |||
| # get tensor history | |||
| tensor_history, cur_outputs_nums = graph.get_tensor_history(node_name, depth) | |||
| # add the tensor type for tensor history | |||
| self._update_tensor_history(tensor_history[0:cur_outputs_nums], 'output') | |||
| self._update_tensor_history(tensor_history[cur_outputs_nums:], 'input') | |||
| self._update_tensor_history(tensor_history[0:cur_outputs_nums], 'output', graph_name) | |||
| self._update_tensor_history(tensor_history[cur_outputs_nums:], 'input', graph_name) | |||
| log.debug("Get %d tensors in tensor history for node <%s>.", len(tensor_history), node_name) | |||
| return {'tensor_history': tensor_history} | |||
| @staticmethod | |||
| def _update_tensor_history(tensor_history, tensor_type): | |||
| def _update_tensor_history(tensor_history, tensor_type, graph_name): | |||
| """ | |||
| Add tensor source type for tensor history. | |||
| @@ -122,115 +234,285 @@ class GraphHandler(StreamHandlerBase): | |||
| keys: `node_type` and `name`. `node_type` refers to the type of the node which | |||
| the tensor come from. `name` refers to the tensor name. | |||
| tensor_type (str): The source type of the tensor. `input` or `output`. | |||
| graph_name (str): The graph name. | |||
| """ | |||
| for single_tensor_info in tensor_history: | |||
| single_tensor_info['type'] = tensor_type | |||
| single_tensor_info['graph_name'] = graph_name | |||
| def search_nodes(self, pattern): | |||
| """ | |||
| Search nodes by given pattern. | |||
| Args: | |||
| pattern (Union[str, None]): The pattern of the node to search, | |||
| if None, return all node names. | |||
| pattern (dict): Filter condition. | |||
| - name (str): The name pattern. | |||
| - graph_name (str): The graph name. | |||
| - node_category (str): The node_category. Default: None | |||
| - condition (dict): The additional filter condition. | |||
| Returns: | |||
| dict, the searched node. | |||
| """ | |||
| self._graph_exists() | |||
| self._searched_node_list = self._graph.search_nodes_by_pattern(pattern) | |||
| nodes = self._graph.get_nodes(self._searched_node_list) | |||
| graph_name = pattern.pop('graph_name', None) | |||
| search_nodes = self.get_searched_nodes(pattern, graph_name) | |||
| # construct to search tree | |||
| if not self._has_graph_scope(graph_name): | |||
| for graph_name, searched_node_list in search_nodes.items(): | |||
| graph = self._get_graph(graph_name=graph_name) | |||
| format_nodes = graph.get_nodes(searched_node_list) | |||
| return {'nodes': format_nodes} | |||
| # deal with graph_name is None | |||
| res = [] | |||
| for graph_name, graph in self._graph.items(): | |||
| format_nodes = graph.get_nodes(search_nodes.get(graph_name, [])) | |||
| if not format_nodes: | |||
| continue | |||
| self._add_graph_scope_for_nodes(format_nodes, graph_name) | |||
| search_graph = { | |||
| 'name': graph_name, | |||
| 'type': 'name_scope', | |||
| 'nodes': format_nodes | |||
| } | |||
| res.append(search_graph) | |||
| return {'nodes': res} | |||
| def get_searched_node_list(self, pattern, graph_name): | |||
| """Get searched node list in single graph.""" | |||
| searched_nodes = self.get_searched_nodes(pattern, graph_name) | |||
| return searched_nodes.get(graph_name, []) | |||
| def get_searched_nodes(self, pattern, graph_name=None): | |||
| """ | |||
| Search nodes by given pattern. | |||
| Args: | |||
| pattern (dict): Filter condition. | |||
| - name (str): The name pattern. | |||
| - node_category (str): The node_category. Default: None | |||
| - condition (dict): The additional filter condition. | |||
| graph_name (str): The graph name. If not given, search in all sub graphs. Default: None. | |||
| Returns: | |||
| dict, the searched nodes. The format is dict of <graph_name, list[Node]>. | |||
| """ | |||
| if not graph_name: | |||
| graph_names = self.graph_names | |||
| else: | |||
| graph_names = [graph_name] | |||
| search_nodes = {} | |||
| for sub_graph_name in graph_names: | |||
| search_nodes[sub_graph_name] = self._search_in_single_graph(pattern, sub_graph_name) | |||
| return search_nodes | |||
| return {'nodes': nodes} | |||
| def _search_in_single_graph(self, pattern, graph_name=None): | |||
| """ | |||
| Search nodes by given pattern. | |||
| def get_nodes_by_scope(self, scope_name): | |||
| Args: | |||
| pattern (dict): Filter condition. | |||
| - name (str): The name pattern. | |||
| - node_category (str): The node_category. Default: None. | |||
| - condition (dict): The additional filter condition. | |||
| graph_name (str): The graph name. | |||
| Returns: | |||
| list, the searched node list. | |||
| """ | |||
| temp_node_list = [] | |||
| node_category = pattern.get('node_category') | |||
| if graph_name: | |||
| graph = self._get_graph(graph_name=graph_name) | |||
| else: | |||
| graph = self._whole_graph | |||
| # filter nodes by name | |||
| if pattern.get('name'): | |||
| if node_category: | |||
| # get leaf nodes for forward filter | |||
| temp_node_list = graph.search_leaf_nodes_by_pattern(pattern.get('name')) | |||
| else: | |||
| # optimize search nodes | |||
| temp_node_list = graph.search_nodes_by_pattern(pattern.get('name')) | |||
| if not temp_node_list: | |||
| log.debug("No node named %s", pattern.get('name')) | |||
| return [] | |||
| # filter nodes by category | |||
| if node_category: | |||
| node_category = self._get_inner_node_category(node_category) | |||
| condition = pattern['condition'].copy() if pattern.get('condition') else {} | |||
| condition['search_range'] = temp_node_list | |||
| temp_node_list = graph.search_nodes_by_category(node_category, condition=condition) | |||
| return temp_node_list | |||
| @staticmethod | |||
| def _get_inner_node_category(node_category): | |||
| """ | |||
| Get inner node category. | |||
| Args: | |||
| node_category (str): The node category supported in | |||
| mindinsight.conditionmgr.condition.TargetTypeEnum. | |||
| Returns: | |||
| CategoryTypeEnum, the translated value. | |||
| """ | |||
| try: | |||
| res = CategoryTypeEnum(node_category) | |||
| except ValueError as err: | |||
| log.error("Invalid node category. %s", err) | |||
| raise DebuggerParamValueError("Invalid node_category.") | |||
| return res | |||
| def get_nodes_by_scope(self, scope_name, graph_name): | |||
| """ | |||
| Get node by a given scope name. | |||
| Args: | |||
| scope_name (str): The name of scope. | |||
| graph_name (str): The relative graph_name of the watched node. Default: None. | |||
| Returns: | |||
| list[Node], a list of node. | |||
| """ | |||
| return self._graph.search_leaf_nodes_by_pattern(scope_name) | |||
| if graph_name: | |||
| graph = self._get_graph(graph_name) | |||
| else: | |||
| graph = self._whole_graph | |||
| return graph.search_leaf_nodes_by_pattern(scope_name) | |||
| def get_graph_id_by_name(self, node_name): | |||
| """ | |||
| Get graph id by full name. | |||
| Args: | |||
| node_name (str): The name of the node. | |||
| Returns: | |||
| str, the graph name of the node. | |||
| Raises: | |||
| DebuggerNodeNotInGraphError: If can not find the node in all graphs. | |||
| """ | |||
| if node_name: | |||
| for graph_name, sub_graph in self._graph.items(): | |||
| if sub_graph.exist_node(name=node_name): | |||
| return graph_name | |||
| log.error('Failed to find node %s in graph. Please make sure the graph has been sent and ' | |||
| 'the node name is correct, and try again.', node_name) | |||
| raise DebuggerGraphNotExistError | |||
| def get_graph_id_by_full_name(self, node_name): | |||
| """ | |||
| Get graph id by full name. | |||
| def get_searched_node_list(self): | |||
| """Get searched node list.""" | |||
| return self._searched_node_list | |||
| Args: | |||
| node_name (str): The full name of the node. | |||
| Returns: | |||
| str, the graph name of the node. | |||
| Raises: | |||
| DebuggerNodeNotInGraphError: If can not find the node in all graphs. | |||
| """ | |||
| graph_id = self.graph_node_map.get(node_name) if node_name else None | |||
| if not graph_id: | |||
| log.error("Failed to get graph id by full name: %s", node_name) | |||
| raise DebuggerNodeNotInGraphError(node_name) | |||
| return graph_id | |||
| def get_node_type(self, node_name): | |||
| def get_node_type(self, node_name, graph_name=None): | |||
| """ | |||
| Get the type of the specified node. | |||
| Args: | |||
| node_name (str): The debug name of the node. | |||
| graph_name (str): The relative graph_name of the node. Default: None. | |||
| Returns: | |||
| A string of the node type, name_scope or leaf. | |||
| """ | |||
| self._graph_exists() | |||
| node_type = self._graph.get_node_type(node_name) | |||
| if graph_name: | |||
| graph = self._get_graph(node_name=node_name, graph_name=graph_name) | |||
| else: | |||
| graph = self._whole_graph | |||
| node_type = graph.get_node_type(node_name) | |||
| return node_type | |||
| def get_full_name(self, node_name): | |||
| def get_full_name(self, node_name, graph_name=None): | |||
| """Get full name according to ui node name.""" | |||
| full_name = self._graph.get_full_name_by_node_name(node_name) if node_name else '' | |||
| full_name = '' | |||
| if node_name: | |||
| if graph_name: | |||
| graph = self._get_graph(node_name=node_name, graph_name=graph_name) | |||
| else: | |||
| graph = self._whole_graph | |||
| full_name = graph.get_full_name_by_node_name(node_name) | |||
| return full_name | |||
| def get_node_name_by_full_name(self, full_name): | |||
| """Get UI node name by full name.""" | |||
| if self._graph: | |||
| node_name = self._graph.get_node_name_by_full_name(full_name) | |||
| else: | |||
| node_name = '' | |||
| log.info("No graph received yet.") | |||
| return node_name | |||
| def get_node_basic_info(self, node_name, graph_name): | |||
| """Get node basic info with graph scope.""" | |||
| graph_name, node_name = self._parse_node_name(node_name=node_name, graph_name=graph_name) | |||
| graph = self._get_graph(graph_name, node_name) | |||
| full_name = graph.get_full_name_by_node_name(node_name) | |||
| node_type = graph.get_node_type(node_name) | |||
| return self.construct_node_basic_info(full_name, graph_name, node_name, node_type) | |||
| def list_nodes(self, scope): | |||
| def get_tensor_graph(self, tensor_name, graph_name): | |||
| """ | |||
| Get the nodes of every layer in graph. | |||
| Get tensor graph according to node name. | |||
| Args: | |||
| scope (str): The name of a scope. | |||
| tensor_name (str): Tensor name, format is "node_name:<node_value>". | |||
| graph_name (str): The relative graph_name of the node. Default: None. | |||
| Returns: | |||
| TypedDict('Nodes', {'nodes': list[Node]}), format is {'nodes': [<Node object>]}. | |||
| example: | |||
| { | |||
| "nodes" : [ | |||
| { | |||
| "attr" : | |||
| { | |||
| "index" : "i: 0\n" | |||
| }, | |||
| "input" : {}, | |||
| "name" : "input_tensor", | |||
| "output" : | |||
| { | |||
| "Default/TensorAdd-op17" : | |||
| { | |||
| "edge_type" : "data", | |||
| "scope" : "name_scope", | |||
| "shape" : [1, 16, 128, 128] | |||
| } | |||
| }, | |||
| "output_i" : -1, | |||
| "proxy_input" : {}, | |||
| "proxy_output" : {}, | |||
| "independent_layout" : False, | |||
| "subnode_count" : 0, | |||
| "type" : "Data" | |||
| } | |||
| ] | |||
| } | |||
| dict, relative node. | |||
| """ | |||
| node_name, _ = tensor_name.rsplit(':', 1) | |||
| graph = self._get_graph(graph_name=graph_name, node_name=node_name) | |||
| tensor_graph = graph.get_tensor_graph(node_name) | |||
| return {'graph': tensor_graph} | |||
| @staticmethod | |||
| def construct_node_basic_info(full_name, graph_name, node_name, node_type): | |||
| """Construct node basic info.""" | |||
| node_name_with_graph_scope = '/'.join([graph_name, node_name]) if node_name else graph_name | |||
| return NodeBasicInfo(name=node_name_with_graph_scope, full_name=full_name, type=node_type) | |||
| def get_node_basic_info_by_scope(self, scope_name, graph_name): | |||
| """ | |||
| if scope and not self._graph.exist_node(scope): | |||
| raise DebuggerNodeNotInGraphError(node_name=scope) | |||
| Get node by a given scope name. | |||
| nodes = self._graph.list_node_by_scope(scope=scope) | |||
| return {'nodes': nodes} | |||
| Args: | |||
| scope_name (str): The name of scope. | |||
| graph_name (str): The relative graph_name of the watched node. Default: None. | |||
| Returns: | |||
| list[NodeBasicInfo], a list of node. | |||
| """ | |||
| graph_name, node_name = self._parse_node_name(scope_name, graph_name) | |||
| graph = self._get_graph(graph_name) | |||
| nodes = graph.search_leaf_nodes_by_pattern(node_name) | |||
| res = [self.construct_node_basic_info(full_name=node.full_name, | |||
| graph_name=graph_name, | |||
| node_name=node.name, | |||
| node_type=node.type) for node in nodes] | |||
| return res | |||
| def get_node_name_by_full_name(self, full_name, graph_name): | |||
| """Get UI node name by full name and graph name.""" | |||
| if graph_name and full_name: | |||
| graph = self._get_graph(graph_name) | |||
| node_name = graph.get_node_name_by_full_name(full_name) | |||
| else: | |||
| node_name = '' | |||
| log.debug("Get empty full name.") | |||
| return node_name | |||
| def get_node_by_bfs_order(self, node_name=None, ascend=True): | |||
| """ | |||
| @@ -240,11 +522,9 @@ class GraphHandler(StreamHandlerBase): | |||
| node_name (str): The name of current chosen leaf node. | |||
| ascend (bool): If True, traverse the input nodes; | |||
| If False, traverse the output nodes. Default is True. | |||
| Returns: | |||
| Union[None, dict], the next node object in dict type or None. | |||
| """ | |||
| self._graph_exists() | |||
| bfs_order = self.bfs_order | |||
| length = len(bfs_order) | |||
| @@ -269,11 +549,11 @@ class GraphHandler(StreamHandlerBase): | |||
| f'Please check the node name {err}.' | |||
| raise DebuggerParamValueError(msg) | |||
| next_node = self.get_next_node_in_bfs(index, length, ascend) | |||
| next_node = self._get_next_node_in_bfs(index, length, ascend) | |||
| return next_node | |||
| def get_next_node_in_bfs(self, index, length, ascend): | |||
| def _get_next_node_in_bfs(self, index, length, ascend): | |||
| """ | |||
| Get the next node in bfs order. | |||
| @@ -294,28 +574,116 @@ class GraphHandler(StreamHandlerBase): | |||
| return next_node | |||
| def get_single_node(self, name): | |||
| def _graph_exists(self): | |||
| """ | |||
| Search node, and return every layer nodes until this node. | |||
| Check if the graph has been loaded in the debugger cache. | |||
| Raises: | |||
| DebuggerGraphNotExistError: If the graph does not exist. | |||
| """ | |||
| if not self._graph: | |||
| log.error('The graph does not exist. Please start the ' | |||
| 'training script and try again.') | |||
| raise DebuggerGraphNotExistError | |||
| def _get_graph(self, graph_name=None, node_name=None): | |||
| """ | |||
| Get the graph object according to graph name and node name. | |||
| Args: | |||
| name (str): The name of node. | |||
| graph_name (str): The graph name. | |||
| node_name (str): The node name. | |||
| Returns: | |||
| dict, every layer nodes until this node. | |||
| DebuggerGraph, the graph object. | |||
| Raises: | |||
| DebuggerGraphNotExistError: If the graph does not exist. | |||
| """ | |||
| nodes = self._graph.search_single_node(name) | |||
| if not graph_name and not node_name and len(self._graph) == 1: | |||
| # get the graph if there is only one graph | |||
| return list(self._graph.values())[0] | |||
| graph_name = graph_name if graph_name else self.get_graph_id_by_name(node_name) | |||
| graph = self._graph.get(graph_name) if graph_name else None | |||
| # get graph according to graph name and check the node | |||
| if graph and (not node_name or graph.exist_node(name=node_name)): | |||
| return graph | |||
| log.error('The graph %s does not exist node %s.', graph_name, node_name) | |||
| raise DebuggerGraphNotExistError | |||
| def _has_graph_scope(self, graph_name): | |||
| """Check if query with graph_scope.""" | |||
| return bool(graph_name is None and len(self._graph) > 1) | |||
| def validate_graph_name(self, graph_name): | |||
| """Validate graph_name.""" | |||
| if graph_name and self._graph.get(graph_name) is None: | |||
| log.error("No graph named %s in debugger cache.", graph_name) | |||
| raise DebuggerGraphNotExistError | |||
| if not graph_name and len(self._graph) == 1: | |||
| graph_name = self.graph_names[0] | |||
| return graph_name | |||
| return nodes | |||
| def _graph_exists(self): | |||
| def _add_graph_scope_for_nodes(self, nodes, graph_name): | |||
| """ | |||
| Check if the graph has been loaded in the debugger cache. | |||
| Add graph scope for nodes. | |||
| Args: | |||
| nodes (list[Node]): List of nodes object. | |||
| graph_name (str): The graph name. | |||
| """ | |||
| def _get_updated_node_info(cur_node, node_type): | |||
| """Add graph scope in key.""" | |||
| old_node = cur_node.get(node_type) | |||
| if not old_node: | |||
| return | |||
| new_values = {} | |||
| for old_name, node_info in old_node.items(): | |||
| new_name = '/'.join([graph_name, old_name]) if old_name else graph_name | |||
| new_values[new_name] = node_info | |||
| cur_node[node_type] = new_values | |||
| for node in nodes: | |||
| node['name'] = '/'.join([graph_name, node['name']]) if node['name'] else graph_name | |||
| _get_updated_node_info(node, 'input') | |||
| _get_updated_node_info(node, 'output') | |||
| if node.get('nodes'): | |||
| self._add_graph_scope_for_nodes(node.get('nodes'), graph_name) | |||
| def _parse_node_name(self, node_name, graph_name): | |||
| """ | |||
| Check if the node name should have graph scope. | |||
| Args: | |||
| node_name (str): The ui node name. | |||
| graph_name (str): The graph name. | |||
| Returns: | |||
| str, parsed graph name. | |||
| str, parsed node name. | |||
| """ | |||
| node_name = '' if node_name is None else node_name | |||
| if self._has_graph_scope(graph_name): | |||
| names = node_name.split("/", 1) | |||
| graph_name = names[0] | |||
| node_name = names[1] if len(names) == 2 else '' | |||
| if graph_name is None and len(self._graph) == 1: | |||
| graph_name = self.graph_names[0] | |||
| return graph_name, node_name | |||
| def validate_node_name(self, node_name, graph_name): | |||
| """ | |||
| Validate the graph exist the specified node. | |||
| Args: | |||
| node_name (str): The ui node name. | |||
| graph_name (str): The graph name. | |||
| Raises: | |||
| DebuggerGraphNotExistError: If the graph does not exist. | |||
| DebuggerNodeNotInGraphError: If can not find the node in all graphs. | |||
| """ | |||
| if self._graph is None: | |||
| log.error('The graph does not exist. Please start the ' | |||
| 'training script and try again.') | |||
| raise DebuggerGraphNotExistError | |||
| graph = self._get_graph(graph_name=graph_name) | |||
| if not graph.exist_node(name=node_name): | |||
| log.error("graph %s doesn't find node: %s.", graph_name, node_name) | |||
| raise DebuggerNodeNotInGraphError(node_name) | |||
| @@ -13,7 +13,7 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Define the metadata stream handler.""" | |||
| from mindinsight.debugger.common.log import logger as log | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.common.utils import ServerStatus | |||
| from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | |||
| @@ -29,6 +29,8 @@ class MetadataHandler(StreamHandlerBase): | |||
| self._cur_node_name = "" | |||
| self._cur_full_name = "" | |||
| self._backend = "" | |||
| self._enable_recheck = False | |||
| self._cur_graph_name = "" | |||
| @property | |||
| def device_name(self): | |||
| @@ -50,6 +52,16 @@ class MetadataHandler(StreamHandlerBase): | |||
| """The property of current node name.""" | |||
| self._cur_node_name = node_name | |||
| @property | |||
| def graph_name(self): | |||
| """The property of current node name.""" | |||
| return self._cur_graph_name | |||
| @graph_name.setter | |||
| def graph_name(self, graph_name): | |||
| """The property of current node name.""" | |||
| self._cur_graph_name = graph_name if graph_name else '' | |||
| @property | |||
| def full_name(self): | |||
| """The property of current node name.""" | |||
| @@ -90,6 +102,21 @@ class MetadataHandler(StreamHandlerBase): | |||
| """ | |||
| self._client_ip = str(value) | |||
| @property | |||
| def enable_recheck(self): | |||
| """The property of enable_recheck.""" | |||
| return self._enable_recheck and self._state == ServerStatus.WAITING and self._step > 0 | |||
| @enable_recheck.setter | |||
| def enable_recheck(self, value): | |||
| """ | |||
| Set the property of enable_recheck. | |||
| Args: | |||
| value (bool): The new ip. | |||
| """ | |||
| self._enable_recheck = bool(value) | |||
| def put(self, value): | |||
| """ | |||
| Put value into metadata cache. Called by grpc server. | |||
| @@ -108,7 +135,7 @@ class MetadataHandler(StreamHandlerBase): | |||
| Get updated value. Called by main server. | |||
| Args: | |||
| filter_condition (str): The filter property. | |||
| filter_condition (Union[str, list[str]]): The filter property. | |||
| Returns: | |||
| dict, the metadata. | |||
| @@ -122,10 +149,15 @@ class MetadataHandler(StreamHandlerBase): | |||
| 'pos': '0', | |||
| 'ip': self.client_ip, | |||
| 'node_name': self.node_name, | |||
| 'backend': self.backend | |||
| 'backend': self.backend, | |||
| 'enable_recheck': self.enable_recheck, | |||
| 'graph_name': self.graph_name | |||
| } | |||
| else: | |||
| metadata[filter_condition] = getattr(self, filter_condition) if \ | |||
| hasattr(self, filter_condition) else '' | |||
| if not isinstance(filter_condition, list): | |||
| filter_condition = [filter_condition] | |||
| for field in filter_condition: | |||
| metadata[field] = getattr(self, field) if \ | |||
| hasattr(self, field) else None | |||
| return {'metadata': metadata} | |||
| @@ -17,7 +17,7 @@ import numpy as np | |||
| from mindinsight.datavisual.data_transform.graph.node import NodeTypeEnum | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | |||
| from mindinsight.debugger.common.log import logger as log | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.proto.ms_graph_pb2 import DataType | |||
| from mindinsight.debugger.stream_cache.tensor import OpTensor, ConstTensor | |||
| from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | |||
| @@ -32,6 +32,16 @@ class TensorHandler(StreamHandlerBase): | |||
| self._tensors = {} | |||
| self._cur_step = 0 | |||
| @property | |||
| def cur_step(self): | |||
| """The property of current step.""" | |||
| return self._cur_step | |||
| @property | |||
| def prev_step(self): | |||
| """The property of previous step.""" | |||
| return self._cur_step - 1 | |||
| def put(self, value): | |||
| """ | |||
| Put value into tensor cache. Called by grpc server. | |||
| @@ -98,7 +108,7 @@ class TensorHandler(StreamHandlerBase): | |||
| self._tensors[tensor.name] = cache_tensor | |||
| old_tensor = cache_tensor.get(step) | |||
| if old_tensor and not self.is_value_diff(old_tensor.value, tensor.value): | |||
| if old_tensor and not self._is_value_diff(old_tensor.value, tensor.value): | |||
| log.debug("Tensor %s of step %s has no change. Ignore it.", tensor.name, step) | |||
| return False | |||
| cache_tensor[step] = tensor | |||
| @@ -106,7 +116,7 @@ class TensorHandler(StreamHandlerBase): | |||
| return True | |||
| @staticmethod | |||
| def is_value_diff(old_value, new_value): | |||
| def _is_value_diff(old_value, new_value): | |||
| """Check tensor value if there are equal.""" | |||
| log.debug("old value type: %s, new_value type: %s", type(old_value), type(new_value)) | |||
| if old_value is None and new_value is None: | |||
| @@ -142,22 +152,28 @@ class TensorHandler(StreamHandlerBase): | |||
| Args: | |||
| filter_condition (dict): Filter condition. | |||
| - name (str): The name of tensor. | |||
| - name (str): The full name of tensor. | |||
| - node_type (str): The type of the node. | |||
| - prev (bool): Whether to get previous tensor. | |||
| Returns: | |||
| dict, the tensor_value. | |||
| """ | |||
| name = filter_condition.get('name') | |||
| node_type = filter_condition.get('node_type') | |||
| shape = filter_condition.get('shape') | |||
| tensor = self._get_tensor(name, node_type) | |||
| if filter_condition.get('prev'): | |||
| step = self.prev_step | |||
| else: | |||
| step = self.cur_step | |||
| tensor = self._get_tensor(name, node_type, step) | |||
| if not tensor: | |||
| log.error("No tensor named %s", name) | |||
| log.error("No tensor named %s at the step %s", name, step) | |||
| raise DebuggerParamValueError("No tensor named {}".format(name)) | |||
| tensor_info = tensor.get_full_info(shape) | |||
| self._update_has_prev_step_field(tensor_info, name, node_type) | |||
| self._update_has_prev_step_field(tensor_info, name, node_type, step) | |||
| return {'tensor_value': tensor_info} | |||
| def _get_tensor(self, tensor_name, node_type=None, step=None): | |||
| @@ -167,7 +183,7 @@ class TensorHandler(StreamHandlerBase): | |||
| Args: | |||
| tensor_name (str): Tensor name, format like `node_name:slot`. | |||
| node_type (str): Node type. | |||
| step (int): The step of tensor info. Default: None. Noe | |||
| step (int): The step of tensor info. Default: None. | |||
| Returns: | |||
| Union[OPTensor, ConstTensor], the tensor object. | |||
| @@ -178,7 +194,8 @@ class TensorHandler(StreamHandlerBase): | |||
| if not tensor and node_type == NodeTypeEnum.CONST.value: | |||
| const_name = tensor_name.rsplit('/', 1)[-1] | |||
| tensor = self._const_vals.get(const_name) | |||
| self._tensors[tensor_name] = {step: tensor} | |||
| if tensor: | |||
| self._tensors[tensor_name] = {step: tensor} | |||
| return tensor | |||
| @@ -205,7 +222,7 @@ class TensorHandler(StreamHandlerBase): | |||
| tensor_name = tensor_info.get('full_name') | |||
| node_type = tensor_info.get('node_type') | |||
| basic_info = self._get_basic_info(tensor_name, node_type) | |||
| flag = self._update_has_prev_step_field(basic_info, tensor_name, node_type) | |||
| flag = self._update_has_prev_step_field(basic_info, tensor_name, node_type, self.cur_step) | |||
| if flag is False: | |||
| missed_tensor = tensor_info.copy() | |||
| missed_tensor['iter'] = 'prev' | |||
| @@ -223,22 +240,23 @@ class TensorHandler(StreamHandlerBase): | |||
| return missed_tensors | |||
| def _update_has_prev_step_field(self, tensor_info, tensor_name, node_type): | |||
| def _update_has_prev_step_field(self, tensor_info, tensor_name, node_type, step): | |||
| """Update has_prev_step field in tensor info.""" | |||
| flag = None | |||
| cur_tensor_value = bool(tensor_info and tensor_info.get('value') is not None) | |||
| if node_type == NodeTypeEnum.PARAMETER.value: | |||
| flag = self._get_prev_tensor_value_status(tensor_name) | |||
| flag = self._get_prev_tensor_value_status(tensor_name, step) | |||
| if flag and cur_tensor_value: | |||
| tensor_info['has_prev_step'] = True | |||
| return flag | |||
| def _get_prev_tensor_value_status(self, tensor_name): | |||
| def _get_prev_tensor_value_status(self, tensor_name, step): | |||
| """ | |||
| Get the status of tensor value of previous step. | |||
| Args: | |||
| tensor_name (str): Tensor name. | |||
| step (int): The step of the tensor. | |||
| Returns: | |||
| Union[None, bool], the status of previous tensor value. If True, there is valid previous | |||
| @@ -247,7 +265,7 @@ class TensorHandler(StreamHandlerBase): | |||
| """ | |||
| flag = None | |||
| # check if the tensor has previous step value. | |||
| prev_step = self._cur_step - 1 | |||
| prev_step = step - 1 | |||
| if prev_step < 0: | |||
| return flag | |||
| tensor = self._get_tensor(tensor_name, step=prev_step) | |||
| @@ -314,6 +332,8 @@ class TensorHandler(StreamHandlerBase): | |||
| tensor_comparison = curr_tensor.tensor_comparison | |||
| if not tensor_comparison or tensor_comparison.tolerance != tolerance: | |||
| if isinstance(curr_tensor.value, np.ndarray) and isinstance(prev_tensor.value, np.ndarray): | |||
| if curr_tensor.value.shape != prev_tensor.value.shape: | |||
| raise DebuggerParamValueError("The shape of these two step tensors is not the same.") | |||
| tensor_diff = TensorUtils.calc_diff_between_two_tensor(curr_tensor.value, prev_tensor.value, tolerance) | |||
| if not tensor_comparison: | |||
| stats = TensorUtils.get_statistics_from_tensor(tensor_diff) | |||
| @@ -333,9 +353,34 @@ class TensorHandler(StreamHandlerBase): | |||
| result = np.stack([prev_tensor_slice, curr_tensor_slice, tensor_diff_slice], axis=-1) | |||
| tensor_info['diff'] = result.tolist() | |||
| stats = TensorUtils.get_statistics_from_tensor(tensor_diff_slice) | |||
| curr_tensor_stats = TensorUtils.get_statistics_from_tensor(curr_tensor.value) | |||
| curr_tensor_slice_stats = TensorUtils.get_statistics_from_tensor(curr_tensor_slice) | |||
| prev_tensor_stats = TensorUtils.get_statistics_from_tensor(prev_tensor.value) | |||
| prev_tensor_slice_stats = TensorUtils.get_statistics_from_tensor(prev_tensor_slice) | |||
| tensor_info['curr_step_statistics'] = TensorUtils.get_statistics_dict(stats=curr_tensor_slice_stats, | |||
| overall_stats=curr_tensor_stats) | |||
| tensor_info['prev_step_statistics'] = TensorUtils.get_statistics_dict(stats=prev_tensor_slice_stats, | |||
| overall_stats=prev_tensor_stats) | |||
| tensor_info['statistics'] = TensorUtils.get_statistics_dict(stats=stats, | |||
| overall_stats=tensor_comparison.stats) | |||
| elif isinstance(curr_tensor_slice, str): | |||
| tensor_info['diff'] = curr_tensor_slice | |||
| reply = {'tensor_value': tensor_info} | |||
| return reply | |||
| def get_tensor_statistics(self, tensor_name, node_type): | |||
| """ | |||
| Get Tensor statistics. | |||
| Args: | |||
| tensor_name (str): Tensor name, format like `node_name:slot`. | |||
| node_type (str): Node type. | |||
| Returns: | |||
| dict, overall statistics. | |||
| """ | |||
| res = {} | |||
| tensor = self._get_tensor(tensor_name, node_type) | |||
| if tensor: | |||
| res = tensor.get_tensor_statistics() | |||
| return res | |||
| @@ -13,25 +13,37 @@ | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Define the watchpoint stream handler.""" | |||
| import numpy as np | |||
| from mindinsight.conditionmgr.condition import ValueTypeEnum | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | |||
| DebuggerParamTypeError | |||
| from mindinsight.debugger.common.log import logger as log | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.common.utils import is_scope_type | |||
| from mindinsight.debugger.proto.debug_grpc_pb2 import SetCMD | |||
| from mindinsight.debugger.stream_cache.watchpoint import Watchpoint, WatchpointHit, \ | |||
| WATCHPOINT_CONDITION_MAPPING | |||
| WatchNodeTree | |||
| from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | |||
| class WatchpointHandler(StreamHandlerBase): | |||
| """watchpoint Handler.""" | |||
| """Watchpoint Handler.""" | |||
| def __init__(self): | |||
| self._watchpoints = {} | |||
| # list of ids of new created watchpoints | |||
| self._created_watchpoints = [] | |||
| # list of SetCMD of watchpoints to be deleted | |||
| self._deleted_watchpoints = [] | |||
| # dict of <id, SetCMD> of watchpoint to be updated | |||
| self._updated_watchpoints = {} | |||
| # the collection of watched node full names, which have been sent to MindSpore | |||
| self._all_watched_node_full_names = set() | |||
| # the collection of new watched node full names, which have not been sent to MindSpore | |||
| self._new_watched_node_full_names = set() | |||
| # record the temp stored nodes in MS, which could be set as watch node for recheck on GPU | |||
| # should be clean at the beginning of each step | |||
| self._temp_cached_node_full_names = set() | |||
| self._latest_id = 0 | |||
| self._cache_set_cmd = {} | |||
| def put(self, value): | |||
| """ | |||
| @@ -42,34 +54,50 @@ class WatchpointHandler(StreamHandlerBase): | |||
| """ | |||
| new_id = value.watchpoint_id | |||
| self._watchpoints[new_id] = value | |||
| self._created_watchpoints.append(new_id) | |||
| self._updated_watchpoints[new_id] = value | |||
| self._latest_id = new_id | |||
| log.debug("Put watchpoint %d into cache.", new_id) | |||
| def sync_set_cmd(self): | |||
| def clean_temp_cached_names(self): | |||
| """Clean temp cached node.""" | |||
| self._temp_cached_node_full_names.clear() | |||
| def add_temp_cached_name(self, node_full_name): | |||
| """Add temp stored node in cache.""" | |||
| if node_full_name: | |||
| self._temp_cached_node_full_names.add(node_full_name) | |||
| def sync_set_cmd(self, set_cmds): | |||
| """Clean temp watchpoints.""" | |||
| self._new_watched_node_full_names = set() | |||
| self._created_watchpoints = [] | |||
| self._deleted_watchpoints = [] | |||
| self._updated_watchpoints = {} | |||
| for set_cmd in set_cmds: | |||
| self._cache_set_cmd[set_cmd.id] = set_cmd | |||
| def clean_cache_set_cmd(self, set_cmd): | |||
| """Clean cache set command.""" | |||
| self._cache_set_cmd.pop(set_cmd.id, None) | |||
| def get_watchpoint_by_id(self, watchpoint_id): | |||
| """Get watchpoint by watchpoint id.""" | |||
| watchpoint = self._watchpoints.get(watchpoint_id) | |||
| if not watchpoint: | |||
| log.error("Invalid watchpoint id %d", watchpoint_id) | |||
| raise DebuggerParamValueError("Invalid watchpoint id {}".format(watchpoint_id)) | |||
| res = self.get(watchpoint_id) | |||
| watchpoint = res.get('watch_points')[0] | |||
| return watchpoint | |||
| def get(self, filter_condition=False): | |||
| def get(self, filter_condition=None): | |||
| """ | |||
| Get the watchpoints. | |||
| Args: | |||
| filter_condition (bool): If True, get all watchpoints without nodes. If False, | |||
| get updated watchpoints in SetCMD proto format. Default: False. | |||
| filter_condition (Union[None, int]): The filter conditions. Get watchpoint by | |||
| id. If None, return all watchpoint. Default: None. | |||
| Returns: | |||
| dict, the watchpoints. | |||
| dict, the watchpoint list. | |||
| """ | |||
| reply = [] | |||
| if not filter_condition: | |||
| @@ -78,17 +106,85 @@ class WatchpointHandler(StreamHandlerBase): | |||
| watchpoint_info = watchpoint.get_watch_condition_info() | |||
| reply.append(watchpoint_info) | |||
| else: | |||
| # get updated watchpoint list | |||
| for _, watchpoint in self._updated_watchpoints.items(): | |||
| set_cmd = watchpoint.get_set_cmd() | |||
| reply.append(set_cmd) | |||
| reply.extend(self._deleted_watchpoints) | |||
| self.validate_watchpoint_id(filter_condition) | |||
| reply = [self._watchpoints.get(filter_condition)] | |||
| log.debug("get the watch points with filter_condition:%s", filter_condition) | |||
| return {'watch_points': reply} | |||
| def set_watch_nodes(self, graph, graph_stream, watch_point_id): | |||
| def get_pending_commands(self, graph_stream): | |||
| """ | |||
| Get all watchpoint in SetCMD proto format. | |||
| Args: | |||
| graph_stream (GraphHandler): Graph handler. | |||
| Returns: | |||
| list[SetCMD], updated watchpoint to be sent to MindSpore. | |||
| """ | |||
| res = [] | |||
| new_watched_nodes = set() | |||
| self._all_watched_node_full_names.clear() | |||
| for _, watchpoint in self._updated_watchpoints.items(): | |||
| # construct set command with leaf nodes | |||
| watch_nodes = watchpoint.get_watch_nodes() | |||
| leaf_watch_nodes = self._expand_to_leaf_nodes(graph_stream, watch_nodes) | |||
| res.append(watchpoint.get_pending_cmd(leaf_watch_nodes)) | |||
| # update all watched node names | |||
| watch_node_names = [watch_node.full_name for watch_node in [*watch_nodes, *leaf_watch_nodes]] | |||
| new_watched_nodes.update(watch_node_names) | |||
| res.extend(self._deleted_watchpoints) | |||
| for _, set_cmd in self._cache_set_cmd.items(): | |||
| res.append(set_cmd) | |||
| self._all_watched_node_full_names = new_watched_nodes | |||
| return res | |||
| @staticmethod | |||
| def _expand_to_leaf_nodes(graph_stream, watch_nodes): | |||
| """ | |||
| Get all leaf node basic info according to watch nodes. | |||
| Args: | |||
| graph_stream (GraphHandler): Graph handler. | |||
| watch_nodes (list[NodeBasicInfo]): The list of watch node basic infos. | |||
| Returns: | |||
| list[NodeBasicInfo], expanded leaf basic node infos. | |||
| """ | |||
| leaf_watch_nodes = [] | |||
| for node in watch_nodes: | |||
| if is_scope_type(node.type): | |||
| pure_node_name = None | |||
| if len(node.name.split('/')) > 1: | |||
| graph_name, pure_node_name = node.name.split('/', 1) | |||
| else: | |||
| graph_name = node.name | |||
| search_node_infos = graph_stream.get_node_basic_info_by_scope(pure_node_name, graph_name=graph_name) | |||
| leaf_watch_nodes.extend(search_node_infos) | |||
| else: | |||
| leaf_watch_nodes.append(node) | |||
| return leaf_watch_nodes | |||
| def is_recheckable(self, backend=None): | |||
| """ | |||
| Check if current status is able to recheck. | |||
| Args: | |||
| backend (str): The backend info. 'Ascend' or 'GPU'. Default: None. | |||
| Returns: | |||
| bool, if enable to recheck. | |||
| """ | |||
| enable_recheck = bool(self._updated_watchpoints or self._deleted_watchpoints) | |||
| if backend == 'GPU' and enable_recheck: | |||
| # on GPU, disable to recheck if there are new watched node of which the tensor | |||
| # has not been stored on MindSpore | |||
| diff_set = self._new_watched_node_full_names - self._all_watched_node_full_names | |||
| enable_recheck = not diff_set or diff_set.issubset(self._temp_cached_node_full_names) | |||
| return enable_recheck | |||
| def set_watch_nodes(self, graph, graph_stream, watch_point_id, graph_name=None): | |||
| """ | |||
| set watch nodes for graph. | |||
| @@ -96,54 +192,88 @@ class WatchpointHandler(StreamHandlerBase): | |||
| graph (dict): The graph with list of nodes. | |||
| graph_stream (GraphHandler): The graph handler. | |||
| watch_point_id (int): The id of watchpoint. | |||
| graph_name (str): The graph name. | |||
| """ | |||
| if not (watch_point_id and graph): | |||
| return | |||
| log.debug("add watch flags") | |||
| watchpoint = self._watchpoints.get(watch_point_id) | |||
| self._set_watch_status_recursively(graph, graph_stream, watchpoint) | |||
| self._set_watch_status_recursively(graph, graph_stream, watchpoint, graph_name) | |||
| def _set_watch_status_recursively(self, graph, graph_stream, watchpoint): | |||
| def _set_watch_status_recursively(self, graph, graph_stream, watchpoint, graph_name=None): | |||
| """Set watch status to graph.""" | |||
| if not isinstance(graph, dict): | |||
| log.warning("The graph is not dict.") | |||
| return | |||
| if graph.get('children'): | |||
| self._set_watch_status_recursively(graph.get('children'), graph_stream, watchpoint) | |||
| self._set_watch_status_recursively( | |||
| graph.get('children'), graph_stream, watchpoint, graph_name) | |||
| for node in graph.get('nodes', []): | |||
| if not isinstance(node, dict): | |||
| log.warning("The node is not dict.") | |||
| return | |||
| if graph.get('nodes'): | |||
| _ = self._set_watch_state_for_nodes(graph['nodes'], graph_stream, watchpoint, graph_name) | |||
| def _set_watch_state_for_nodes(self, nodes, graph_stream, watchpoint, graph_name): | |||
| """ | |||
| Set watch state for nodes. | |||
| Args: | |||
| nodes (list[Node]): List of node info. | |||
| Returns: | |||
| int, the number of all watched nodes. | |||
| """ | |||
| all_watched_num = 0 | |||
| for node in nodes: | |||
| node_name = node.get('name') | |||
| if not node_name: | |||
| continue | |||
| full_name = graph_stream.get_full_name(node_name) | |||
| flag = watchpoint.get_node_status(node_name, node.get('type'), full_name) | |||
| node['watched'] = flag | |||
| # search result could have `nodes` in nodes object | |||
| if node.get('nodes'): | |||
| self._set_watch_status_recursively(node, graph_stream, watchpoint) | |||
| flag = self._set_watch_state_for_nodes(node.get('nodes'), graph_stream, watchpoint, graph_name) | |||
| else: | |||
| full_name = graph_stream.get_full_name(node_name, graph_name) | |||
| new_node_name = node_name if graph_name is None else '/'.join([graph_name, node_name]) | |||
| flag = watchpoint.get_node_status(new_node_name, node.get('type'), full_name) | |||
| node['watched'] = flag | |||
| if flag == WatchNodeTree.TOTAL_WATCH: | |||
| all_watched_num += 1 | |||
| # calculate the state of current node. | |||
| if not all_watched_num: | |||
| state = WatchNodeTree.NOT_WATCH | |||
| elif all_watched_num == len(nodes): | |||
| state = WatchNodeTree.TOTAL_WATCH | |||
| else: | |||
| state = WatchNodeTree.PARTIAL_WATCH | |||
| return state | |||
| def create_watchpoint(self, watch_condition, watch_nodes=None, watch_point_id=None): | |||
| def create_watchpoint(self, condition_mgr, watch_condition, watch_nodes=None, watch_point_id=None): | |||
| """ | |||
| Create watchpoint. | |||
| Args: | |||
| condition_mgr (ConditionMgr): Instance of ConditionMgr. | |||
| watch_condition (dict): The watch condition. | |||
| - condition (str): Accept `INF` or `NAN`. | |||
| - param (list[float]): Not defined yet. | |||
| "condition": { | |||
| id: "tensor_too_large", | |||
| "params": [ | |||
| { | |||
| "name": "abs_mean_gt", | |||
| "disable": false, | |||
| "value": 1.1 | |||
| } | |||
| ] | |||
| } | |||
| - id (str): Id of condition. | |||
| - param (list[dict]): The list of param for this condition. | |||
| watch_nodes (list[NodeBasicInfo]): The list of node basic info. | |||
| watch_point_id (int): The id of watchpoint. | |||
| Returns: | |||
| int, the new id of watchpoint. | |||
| """ | |||
| validate_watch_condition(watch_condition) | |||
| validate_watch_condition(condition_mgr, watch_condition) | |||
| watch_condition = set_default_param(condition_mgr, watch_condition) | |||
| new_id = self._latest_id + 1 | |||
| watchpoint = Watchpoint(new_id, watch_condition) | |||
| if watch_nodes: | |||
| watchpoint.add_nodes(watch_nodes) | |||
| self._add_watch_node_in_cache(watch_nodes) | |||
| elif watch_point_id: | |||
| self.validate_watchpoint_id(watch_point_id) | |||
| watchpoint.copy_nodes_from(self._watchpoints.get(watch_point_id)) | |||
| @@ -157,34 +287,51 @@ class WatchpointHandler(StreamHandlerBase): | |||
| Args: | |||
| watch_point_id (int): The id of watchpoint. | |||
| watch_nodes (list[str]): The list of node names. | |||
| watch_nodes (list[NodeBasicInfo]): The list of node basic info. | |||
| watched (bool): The update operator on nodes. If False, remove nodes from watch nodes. | |||
| If True, add nodes to watch nodes. Default: False. | |||
| Returns: | |||
| dict, empty response. | |||
| """ | |||
| self.validate_watchpoint_id(watch_point_id) | |||
| watchpoint = self._watchpoints.get(watch_point_id) | |||
| if watched: | |||
| watchpoint.add_nodes(watch_nodes) | |||
| self._add_watch_node_in_cache(watch_nodes) | |||
| else: | |||
| watchpoint.remove_nodes(watch_nodes) | |||
| self._remove_watch_node_from_cache(watch_nodes) | |||
| self._updated_watchpoints[watch_point_id] = watchpoint | |||
| log.debug("Update watchpoint %d in cache.", watch_point_id) | |||
| def delete_watchpoint(self, watch_point_id): | |||
| def delete_watchpoint(self, watch_point_id=None): | |||
| """ | |||
| Delete watchpoint. | |||
| Args: | |||
| watch_point_id (int): The id of watchpoint. | |||
| watch_point_id (Union[None, int]): The id of watchpoint. | |||
| If None, delete all watchpoints. Default: None. | |||
| """ | |||
| if watch_point_id is None: | |||
| watch_point_ids = [sub_id for sub_id, _ in self._watchpoints.items()] | |||
| else: | |||
| self.validate_watchpoint_id(watch_point_id) | |||
| watch_point_ids = [watch_point_id] | |||
| for single_id in watch_point_ids: | |||
| self._delete_single_watchpoint(single_id) | |||
| Returns: | |||
| dict, empty response. | |||
| def _delete_single_watchpoint(self, watch_point_id): | |||
| """ | |||
| Delete single watchpoint. | |||
| Args: | |||
| watch_point_id (int): The id of watchpoint. | |||
| """ | |||
| self.validate_watchpoint_id(watch_point_id) | |||
| self._watchpoints.pop(watch_point_id) | |||
| # if the watchpoint has not been created by MindSpore, clean the relative cache directly | |||
| if watch_point_id in self._created_watchpoints: | |||
| self._created_watchpoints.remove(watch_point_id) | |||
| self._updated_watchpoints.pop(watch_point_id) | |||
| log.debug("Cancel create watchpoint %d in cache.", watch_point_id) | |||
| return | |||
| set_cmd = SetCMD() | |||
| set_cmd.id = watch_point_id | |||
| set_cmd.delete = True | |||
| @@ -200,11 +347,33 @@ class WatchpointHandler(StreamHandlerBase): | |||
| log.error("Invalid watchpoint id: %d.", watch_point_id) | |||
| raise DebuggerParamValueError("Invalid watchpoint id: {}".format(watch_point_id)) | |||
| def _add_watch_node_in_cache(self, watch_nodes): | |||
| """ | |||
| Add watch nodes in cache. | |||
| Args: | |||
| watch_nodes (list[NodeBasicInfo]): The list of node basic info. | |||
| """ | |||
| node_full_names = [node.full_name for node in watch_nodes] | |||
| self._new_watched_node_full_names.update(node_full_names) | |||
| def _remove_watch_node_from_cache(self, watch_nodes): | |||
| """ | |||
| Remove watch nodes from cache. | |||
| Args: | |||
| watch_nodes (list[NodeBasicInfo]): The list of node basic info. | |||
| """ | |||
| for node in watch_nodes: | |||
| if node.full_name in self._new_watched_node_full_names: | |||
| self._new_watched_node_full_names.remove(node.full_name) | |||
| class WatchpointHitHandler(StreamHandlerBase): | |||
| """Watchpoint hit handler.""" | |||
| def __init__(self): | |||
| # dict of <ui node_name, dict of <slot, WatchpointHit>>, | |||
| self._hits = {} | |||
| @property | |||
| @@ -224,20 +393,41 @@ class WatchpointHitHandler(StreamHandlerBase): | |||
| - watchpoint (Watchpoint): The Watchpoint that a node hit. | |||
| - node_name (str): The UI node name. | |||
| - graph_name (str): The graph name. | |||
| """ | |||
| watchpoint_hit = WatchpointHit( | |||
| tensor_proto=value.get('tensor_proto'), | |||
| watchpoint=value.get('watchpoint'), | |||
| node_name=value.get('node_name') | |||
| node_name=value.get('node_name'), | |||
| graph_name=value.get('graph_name') | |||
| ) | |||
| # get all hit watchpoints according to node name ans tensor slot | |||
| watchpoint_hits = self._get_watchpoints_by_tensor_name(watchpoint_hit.node_name, | |||
| watchpoint_hit.slot) | |||
| if watchpoint_hit not in watchpoint_hits: | |||
| watchpoint_hits.append(watchpoint_hit) | |||
| def _get_watchpoints_by_tensor_name(self, node_name, slot): | |||
| """ | |||
| Get hit tensors according to ui node name and slot. | |||
| node_name = value.get('node_name') | |||
| hit_tensors = self._hits.get(node_name) | |||
| Args: | |||
| node_name (str): The node name. | |||
| slot (str): The tensor slot. | |||
| Returns: | |||
| list, list of watchpoints. | |||
| """ | |||
| hit_node = self._hits.get(node_name) | |||
| if hit_node is None: | |||
| hit_node = {} | |||
| self._hits[node_name] = hit_node | |||
| hit_tensors = hit_node.get(slot) | |||
| if hit_tensors is None: | |||
| hit_tensors = [] | |||
| self._hits[node_name] = hit_tensors | |||
| if watchpoint_hit not in hit_tensors: | |||
| hit_tensors.append(watchpoint_hit) | |||
| hit_node[slot] = hit_tensors | |||
| return hit_tensors | |||
| def get(self, filter_condition=None): | |||
| """ | |||
| @@ -263,34 +453,55 @@ class WatchpointHitHandler(StreamHandlerBase): | |||
| """Return the list of watchpoint hits.""" | |||
| watch_point_hits = [] | |||
| for node_name, watchpoint_hits in self._hits.items(): | |||
| watch_points = [watchpoint_hit.watchpoint for watchpoint_hit in watchpoint_hits] | |||
| tensors = [] | |||
| graph_name = None | |||
| for slot, tensor_hits in watchpoint_hits.items(): | |||
| if graph_name is None: | |||
| graph_name = tensor_hits[0].graph_name | |||
| tensor_info = self._get_tensor_hit_info(slot, tensor_hits) | |||
| tensors.append(tensor_info) | |||
| watch_point_hits.append({ | |||
| 'node_name': node_name, | |||
| 'watch_points': watch_points | |||
| 'tensors': tensors, | |||
| 'graph_name': graph_name | |||
| }) | |||
| return {'watch_point_hits': watch_point_hits} | |||
| @staticmethod | |||
| def _get_tensor_hit_info(slot, tensor_hits): | |||
| """ | |||
| Get watchpoint hit info of specified tensor. | |||
| Args: | |||
| slot (str): Slot id. | |||
| tensor_hits (list): A list of watchpoint hit objects that the tensor hit. | |||
| Returns: | |||
| dict, tensor hit info. | |||
| """ | |||
| res = {} | |||
| watch_points = [tensor_hit.watchpoint for tensor_hit in tensor_hits] | |||
| if watch_points: | |||
| res = { | |||
| 'slot': slot, | |||
| 'watch_points': watch_points | |||
| } | |||
| return res | |||
| def _is_tensor_hit(self, tensor_name): | |||
| """ | |||
| Check if the tensor is record in hit cache. | |||
| Args: | |||
| tensor_name (str): The name of full tensor name. | |||
| tensor_name (str): The name of ui tensor name. | |||
| Returns: | |||
| bool, if the tensor is hit. | |||
| """ | |||
| node_name = tensor_name.split(':')[0] | |||
| watchpoint_hits = self.get(node_name) | |||
| if watchpoint_hits is None: | |||
| return False | |||
| for watchpoint_hit in watchpoint_hits: | |||
| if tensor_name == watchpoint_hit.tensor_name: | |||
| return True | |||
| return False | |||
| node_name, slot = tensor_name.rsplit(':', 1) | |||
| watchpoint_hits = self._hits.get(node_name, {}).get(slot) | |||
| return bool(watchpoint_hits) | |||
| def update_tensor_history(self, tensor_history): | |||
| """ | |||
| @@ -308,45 +519,109 @@ class WatchpointHitHandler(StreamHandlerBase): | |||
| hit_flag = self._is_tensor_hit(tensor_name) | |||
| tensor_info['is_hit'] = hit_flag | |||
| def get_tensor_hit_infos(self, tensor_name): | |||
| """ | |||
| Get all hit information of a tensor. | |||
| Args: | |||
| tensor_name (str): Tensor name showed on UI. | |||
| def validate_watch_condition(watch_condition): | |||
| Returns: | |||
| dict, tensor hit info. | |||
| """ | |||
| tensor_hit_info = {} | |||
| if self._is_tensor_hit(tensor_name): | |||
| node_name, slot = tensor_name.rsplit(':', 1) | |||
| tensor_hits = self._get_watchpoints_by_tensor_name(node_name, slot) | |||
| tensor_hit_info = self._get_tensor_hit_info(slot, tensor_hits) | |||
| return tensor_hit_info | |||
| def validate_watch_condition(condition_mgr, watch_condition): | |||
| """Validate watch condition.""" | |||
| if not isinstance(watch_condition, dict): | |||
| log.error("<watch_condition> should be dict. %s received.", watch_condition) | |||
| raise DebuggerParamTypeError("<watch_condition> should be dict.") | |||
| # validate condition | |||
| condition = watch_condition.get('condition') | |||
| if condition not in WATCHPOINT_CONDITION_MAPPING.keys(): | |||
| log.error("Invalid watch condition. Acceptable values are <%s>.", | |||
| str(WATCHPOINT_CONDITION_MAPPING.keys())) | |||
| # validate condition_id | |||
| condition_id = watch_condition.get('id') | |||
| if condition_id not in condition_mgr.conditions.keys(): | |||
| log.error("Invalid watch condition. Acceptable values are <%s>. %s received.", | |||
| str(condition_mgr.conditions.keys()), condition_id) | |||
| raise DebuggerParamValueError("Invalid watch condition value.") | |||
| # validate param | |||
| validate_watch_condition_params(watch_condition) | |||
| validate_watch_condition_params(condition_mgr, watch_condition) | |||
| def validate_watch_condition_params(watch_condition): | |||
| def validate_watch_condition_params(condition_mgr, watch_condition): | |||
| """ | |||
| Validate watch condition parameters. | |||
| Args: | |||
| condition_mgr (ConditionMgr): Instance of ConditionMgr. | |||
| watch_condition (dict): Watch condition. | |||
| - condition (str): Condition type. Should be in WATCHPOINT_CONDITION_MAPPING. | |||
| - id (str): Condition id. Should be in WATCHPOINT_CONDITION_MAPPING. | |||
| - param (list): Condition value. Should be given for comparison condition. The value will | |||
| be translated to np.float32. | |||
| - param (list): Condition value. Should be given for comparison condition. The value | |||
| will be translated to np.float32. | |||
| """ | |||
| condition = watch_condition.get('condition') | |||
| param = watch_condition.get('param') | |||
| if condition in ['NAN', 'INF', 'OVERFLOW']: | |||
| if param: | |||
| log.error("No param is expected for %s condition.", condition) | |||
| condition_id = watch_condition.get('id') | |||
| params = watch_condition.get('params') | |||
| condition = condition_mgr.get_condition(condition_id) | |||
| if condition_id in condition_mgr.get_no_param_condition(): | |||
| if params: | |||
| log.error("No param is expected for %s condition", condition_id) | |||
| raise DebuggerParamValueError("No param is expected.") | |||
| else: | |||
| if not isinstance(param, (float, int)): | |||
| log.error("Number param should be given for condition <%s>.", | |||
| condition) | |||
| return | |||
| for param in params: | |||
| if param.get("name") not in condition.names: | |||
| log.error("Invalid name of parameter for condition: %s, available values: %s", | |||
| condition_id, condition.names) | |||
| raise DebuggerParamValueError("Invalid name of parameter.") | |||
| condition_param = condition.get_parameter_definition(param.get("name")) | |||
| if condition_param.type.name in (ValueTypeEnum.FLOAT64.name, ValueTypeEnum.INT64.name) \ | |||
| and not isinstance(param.get("value"), (float, int)): | |||
| log.error("Number param should be given for condition: %s", condition_id) | |||
| raise DebuggerParamValueError("Number param should be given.") | |||
| if np.isinf(np.float32(param)): | |||
| log.error("Condition param should be float32.") | |||
| raise DebuggerParamValueError("The value of condition param should be within float32.") | |||
| if condition_param.type.name == ValueTypeEnum.BOOL.name \ | |||
| and not isinstance(param.get("value"), bool): | |||
| log.error("Bool param should be given for condition: %s", condition_id) | |||
| raise DebuggerParamValueError("Bool param should be given.") | |||
| def set_default_param(condition_mgr, watch_condition): | |||
| """ | |||
| Set default param. | |||
| Args: | |||
| condition_mgr (ConditionMgr): Instance of ConditionMgr. | |||
| watch_condition (dict): The watch condition. | |||
| "condition": { | |||
| id: "tensor_too_large", | |||
| "params": [ | |||
| { | |||
| "name": "abs_mean_gt", | |||
| "disable": false, | |||
| "value": 1.1 | |||
| } | |||
| ] | |||
| } | |||
| - id (str): Id of condition. | |||
| - param (list[dict]): The list of param for this condition. | |||
| Returns: | |||
| dict, the new watch_condition. | |||
| """ | |||
| condition_id = watch_condition.get('id') | |||
| condition = condition_mgr.get_condition(condition_id) | |||
| for param in condition.parameters: | |||
| if not param.visible_on_ui and not param.support_disable: | |||
| watch_condition["params"].append({ | |||
| "name": param.name, | |||
| "disable": False, | |||
| "value": param.default_value | |||
| }) | |||
| watch_condition["abbr"] = condition.abbr | |||
| return watch_condition | |||
| @@ -0,0 +1,15 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """This package contains operators using multiple streams to deal with specific task.""" | |||
| @@ -0,0 +1,120 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """This module is aimed to provide with tensor detail info.""" | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.common.utils import Streams | |||
| class TensorDetailInfo: | |||
| """Manage tensor detail information.""" | |||
| def __init__(self, cache): | |||
| self._tensor_stream = cache.get_stream_handler(Streams.TENSOR) | |||
| self._graph_stream = cache.get_stream_handler(Streams.GRAPH) | |||
| self._hit_stream = cache.get_stream_handler(Streams.WATCHPOINT_HIT) | |||
| def validate_tensor_name(self, tensor_name, graph_name): | |||
| """ | |||
| Get the graph id of the tensor. | |||
| Args: | |||
| tensor_name (str): The tensor name on UI. | |||
| graph_name (str): The graph name. | |||
| """ | |||
| # validate tensor name format | |||
| if not isinstance(tensor_name, str) or ':' not in tensor_name: | |||
| log.error("Invalid tensor name. Received: %s", tensor_name) | |||
| raise DebuggerParamValueError("Invalid tensor name.") | |||
| node_name, _ = tensor_name.rsplit(':', 1) | |||
| # check if the node name is in graph | |||
| self._graph_stream.validate_node_name(node_name=node_name, graph_name=graph_name) | |||
| def get_tensor_graph(self, tensor_name, graph_name): | |||
| """ | |||
| Get the graph related to specific tensor. | |||
| Args: | |||
| tensor_name (str): The name of tensor. Format like {node_name}:{slot}. | |||
| graph_name (str): The graph name. | |||
| Returns: | |||
| dict, tensor graph, format is {'nodes': [Node object]}. | |||
| The Node object = { | |||
| 'graph_name': <graph_name>, | |||
| 'name': <node name>, | |||
| 'input': {<node name>: <Edge object>}, | |||
| 'output: {<node name>: <Edge object>}, | |||
| 'slots': [<Slot object>]. | |||
| } | |||
| Edge object = { | |||
| 'data_type': <data type>, | |||
| 'edge_type': <edge type>, | |||
| 'independent_layout': bool, | |||
| 'shape': list[<dim>], | |||
| 'slot_mapping': list[pair<slot, slot>], | |||
| }. | |||
| """ | |||
| self.validate_tensor_name(tensor_name=tensor_name, graph_name=graph_name) | |||
| graph = self._graph_stream.get_tensor_graph(tensor_name, graph_name) | |||
| # add watchpoint hits info and statistics info for each tensor in tensor graph. | |||
| nodes = graph.get('graph', {}).get('nodes', []) | |||
| for node in nodes: | |||
| node['graph_name'] = graph_name | |||
| for slot_info in node.get('slots', []): | |||
| self._add_watchpoint_hit_info(slot_info, node) | |||
| self._add_statistic_info(slot_info, node) | |||
| return graph | |||
| def _add_watchpoint_hit_info(self, slot_info, node): | |||
| """ | |||
| Get the watchpoint that the tensor hit. | |||
| Args: | |||
| slot_info (dict): Slot object. | |||
| node (dict): Node object. | |||
| """ | |||
| tensor_name = ':'.join([node.get('name'), slot_info.get('slot')]) | |||
| slot_info.update(self._hit_stream.get_tensor_hit_infos(tensor_name)) | |||
| def _add_statistic_info(self, slot_info, node): | |||
| """ | |||
| Get the watchpoint that the tensor hit. | |||
| Args: | |||
| slot_info (dict): Slot object. | |||
| node (dict): Node object. | |||
| """ | |||
| tensor_name = ':'.join([node.get('full_name'), slot_info.get('slot')]) | |||
| node_type = node.get('type') | |||
| slot_info['statistics'] = self._tensor_stream.get_tensor_statistics(tensor_name, node_type) | |||
| def get_tensor_watch_points(self, tensor_name, graph_name): | |||
| """ | |||
| Get all watchpoints that the tensor hit. | |||
| Args: | |||
| tensor_name (str): Tensor name from UI. | |||
| graph_name (str): The graph name. | |||
| Returns: | |||
| list, watchpoint hit infos. | |||
| """ | |||
| # validate tensor_name | |||
| self.validate_tensor_name(tensor_name=tensor_name, graph_name=graph_name) | |||
| # get watchpoint info that the tensor hit | |||
| tensor_hit_info = self._hit_stream.get_tensor_hit_infos(tensor_name) | |||
| watch_points = tensor_hit_info.get('watch_points', []) | |||
| return watch_points | |||
| @@ -32,16 +32,23 @@ class Statistics: | |||
| avg_value (float): avg value of tensor data. | |||
| count (int): total count of tensor data. | |||
| nan_count (int): count of NAN. | |||
| neg_zero_count (int): count of negative zero. | |||
| pos_zero_count (int): count of positive zero. | |||
| zero_count (int): count of zero. | |||
| neg_inf_count (int): count of negative INF. | |||
| pos_inf_count (int): count of positive INF. | |||
| """ | |||
| def __init__(self, max_value=0, min_value=0, avg_value=0, | |||
| count=0, nan_count=0, neg_inf_count=0, pos_inf_count=0): | |||
| def __init__(self, max_value=0, min_value=0, avg_value=0, count=0, | |||
| neg_zero_count=0, pos_zero_count=0, zero_count=0, | |||
| nan_count=0, neg_inf_count=0, pos_inf_count=0): | |||
| self._max = max_value | |||
| self._min = min_value | |||
| self._avg = avg_value | |||
| self._count = count | |||
| self._neg_zero_count = neg_zero_count | |||
| self._pos_zero_count = pos_zero_count | |||
| self._zero_count = zero_count | |||
| self._nan_count = nan_count | |||
| self._neg_inf_count = neg_inf_count | |||
| self._pos_inf_count = pos_inf_count | |||
| @@ -81,6 +88,21 @@ class Statistics: | |||
| """Get count of positive INF.""" | |||
| return self._pos_inf_count | |||
| @property | |||
| def neg_zero_count(self): | |||
| """Get count of negative zero.""" | |||
| return self._neg_zero_count | |||
| @property | |||
| def pos_zero_count(self): | |||
| """Get count of positive zero.""" | |||
| return self._pos_zero_count | |||
| @property | |||
| def zero_count(self): | |||
| """Get count of zero.""" | |||
| return self._zero_count | |||
| class TensorComparison: | |||
| """TensorComparison class. | |||
| @@ -204,7 +226,7 @@ class TensorUtils: | |||
| tensors (numpy.ndarray): An numpy.ndarray of tensor data. | |||
| Returns: | |||
| an instance of Statistics. | |||
| Statistics, an instance of Statistics. | |||
| """ | |||
| ma_value = np.ma.masked_invalid(tensors) | |||
| total, valid = tensors.size, ma_value.count() | |||
| @@ -240,10 +262,19 @@ class TensorUtils: | |||
| tensor_min = ma_value.min() | |||
| tensor_max = ma_value.max() | |||
| tensor_sum = ma_value.sum(dtype=np.float64) | |||
| with np.errstate(invalid='ignore'): | |||
| neg_zero_count = np.sum(ma_value < 0) | |||
| with np.errstate(invalid='ignore'): | |||
| pos_zero_count = np.sum(ma_value > 0) | |||
| with np.errstate(invalid='ignore'): | |||
| zero_count = np.sum(ma_value == 0) | |||
| statistics = Statistics(max_value=tensor_max, | |||
| min_value=tensor_min, | |||
| avg_value=tensor_sum / valid, | |||
| count=total, | |||
| neg_zero_count=neg_zero_count, | |||
| pos_zero_count=pos_zero_count, | |||
| zero_count=zero_count, | |||
| nan_count=nan_count, | |||
| neg_inf_count=neg_inf_count, | |||
| pos_inf_count=pos_inf_count) | |||
| @@ -269,11 +300,35 @@ class TensorUtils: | |||
| "count": stats.count, | |||
| "nan_count": stats.nan_count, | |||
| "neg_inf_count": stats.neg_inf_count, | |||
| "pos_inf_count": stats.pos_inf_count, | |||
| "pos_inf_count": stats.pos_inf_count} | |||
| overall_statistics = TensorUtils.get_overall_statistic_dict(overall_stats) | |||
| statistics.update(overall_statistics) | |||
| return statistics | |||
| @staticmethod | |||
| def get_overall_statistic_dict(overall_stats): | |||
| """ | |||
| Get overall statistics dict according to statistics value. | |||
| Args: | |||
| overall_stats (Statistics): An instance of Statistics for whole tensor. | |||
| Returns: | |||
| dict, overall statistics. | |||
| """ | |||
| res = { | |||
| "overall_max": float(overall_stats.max), | |||
| "overall_min": float(overall_stats.min) | |||
| "overall_min": float(overall_stats.min), | |||
| "overall_avg": float(overall_stats.avg), | |||
| "overall_count": overall_stats.count, | |||
| "overall_nan_count": overall_stats.nan_count, | |||
| "overall_neg_inf_count": overall_stats.neg_inf_count, | |||
| "overall_pos_inf_count": overall_stats.pos_inf_count, | |||
| "overall_zero_count": float(overall_stats.zero_count), | |||
| "overall_neg_zero_count": float(overall_stats.neg_zero_count), | |||
| "overall_pos_zero_count": float(overall_stats.pos_zero_count) | |||
| } | |||
| return statistics | |||
| return res | |||
| @staticmethod | |||
| def calc_diff_between_two_tensor(first_tensor, second_tensor, tolerance): | |||
| @@ -51,8 +51,9 @@ def init_graph_handler(): | |||
| @pytest.fixture(scope='session') | |||
| def app_client(): | |||
| """This fixture is flask server.""" | |||
| packages = ["mindinsight.backend.debugger"] | |||
| packages = ["mindinsight.backend.debugger", "mindinsight.backend.conditionmgr"] | |||
| settings.ENABLE_DEBUGGER = True | |||
| mock_obj = Mock(return_value=packages) | |||
| tools.find_app_package = mock_obj | |||
| @@ -60,5 +61,10 @@ def app_client(): | |||
| from mindinsight.backend.debugger.debugger_api import BACKEND_SERVER | |||
| APP.response_class = Response | |||
| client = APP.test_client() | |||
| yield client | |||
| original_val = settings.ENABLE_RECOMMENDED_WATCHPOINTS | |||
| settings.ENABLE_RECOMMENDED_WATCHPOINTS = False | |||
| try: | |||
| yield client | |||
| finally: | |||
| settings.ENABLE_RECOMMENDED_WATCHPOINTS = original_val | |||
| BACKEND_SERVER.stop() | |||
| @@ -1 +1 @@ | |||
| {"metadata": {"state": "pending", "step": 0, "device_name": "", "ip": "", "node_name": "", "backend": ""}} | |||
| {"metadata": {"state": "pending", "step": 0, "device_name": "", "ip": "", "node_name": "", "backend": "", "enable_recheck": false, "graph_name": ""}} | |||
| @@ -1,58 +1,77 @@ | |||
| { | |||
| "tensor_value": { | |||
| "full_name": "Default/args0:0", | |||
| "step": 3, | |||
| "dtype": "DT_FLOAT32", | |||
| "shape": [ | |||
| 2, | |||
| 3 | |||
| ], | |||
| "diff": [ | |||
| [ | |||
| [ | |||
| 1.0, | |||
| 1.0, | |||
| 0.0 | |||
| ], | |||
| [ | |||
| 2.0, | |||
| 2.0, | |||
| 0.0 | |||
| ], | |||
| [ | |||
| 3.0, | |||
| 3.0, | |||
| 0.0 | |||
| ] | |||
| ], | |||
| [ | |||
| [ | |||
| 4.0, | |||
| 4.0, | |||
| 0.0 | |||
| ], | |||
| [ | |||
| 5.0, | |||
| 5.0, | |||
| 0.0 | |||
| ], | |||
| [ | |||
| 6.0, | |||
| 6.0, | |||
| 0.0 | |||
| ] | |||
| ] | |||
| ], | |||
| "statistics": { | |||
| "max": 0.0, | |||
| "min": 0.0, | |||
| "avg": 0.0, | |||
| "count": 6, | |||
| "nan_count": 0, | |||
| "neg_inf_count": 0, | |||
| "pos_inf_count": 0, | |||
| "overall_max": 0.0, | |||
| "overall_min": 0.0 | |||
| } | |||
| } | |||
| "tensor_value": { | |||
| "full_name": "Default/args0:0", | |||
| "step": 3, | |||
| "dtype": "DT_FLOAT32", | |||
| "shape": [2, 3], | |||
| "diff": [ | |||
| [ | |||
| [1.0, 1.0, 0.0], | |||
| [2.0, 2.0, 0.0], | |||
| [3.0, 3.0, 0.0] | |||
| ], | |||
| [ | |||
| [4.0, 4.0, 0.0], | |||
| [5.0, 5.0, 0.0], | |||
| [6.0, 6.0, 0.0] | |||
| ] | |||
| ], | |||
| "curr_step_statistics": { | |||
| "max": 6.0, | |||
| "min": 1.0, | |||
| "avg": 3.5, | |||
| "count": 6, | |||
| "nan_count": 0, | |||
| "neg_inf_count": 0, | |||
| "pos_inf_count": 0, | |||
| "overall_max": 6.0, | |||
| "overall_min": 1.0, | |||
| "overall_avg": 3.5, | |||
| "overall_count": 6, | |||
| "overall_nan_count": 0, | |||
| "overall_neg_inf_count": 0, | |||
| "overall_pos_inf_count": 0, | |||
| "overall_zero_count": 0.0, | |||
| "overall_neg_zero_count": 0.0, | |||
| "overall_pos_zero_count": 6.0 | |||
| }, | |||
| "prev_step_statistics": { | |||
| "max": 6.0, | |||
| "min": 1.0, | |||
| "avg": 3.5, | |||
| "count": 6, | |||
| "nan_count": 0, | |||
| "neg_inf_count": 0, | |||
| "pos_inf_count": 0, | |||
| "overall_max": 6.0, | |||
| "overall_min": 1.0, | |||
| "overall_avg": 3.5, | |||
| "overall_count": 6, | |||
| "overall_nan_count": 0, | |||
| "overall_neg_inf_count": 0, | |||
| "overall_pos_inf_count": 0, | |||
| "overall_zero_count": 0.0, | |||
| "overall_neg_zero_count": 0.0, | |||
| "overall_pos_zero_count": 6.0 | |||
| }, | |||
| "statistics": { | |||
| "max": 0.0, | |||
| "min": 0.0, | |||
| "avg": 0.0, | |||
| "count": 6, | |||
| "nan_count": 0, | |||
| "neg_inf_count": 0, | |||
| "pos_inf_count": 0, | |||
| "overall_max": 0.0, | |||
| "overall_min": 0.0, | |||
| "overall_avg": 0.0, | |||
| "overall_count": 6, | |||
| "overall_nan_count": 0, | |||
| "overall_neg_inf_count": 0, | |||
| "overall_pos_inf_count": 0, | |||
| "overall_zero_count": 6.0, | |||
| "overall_neg_zero_count": 0.0, | |||
| "overall_pos_zero_count": 0.0 | |||
| } | |||
| } | |||
| } | |||
| @@ -1 +1 @@ | |||
| {"watch_points": [{"id": 1, "watch_condition": {"condition": "MAX_GT", "param": 1.0}}, {"id": 2, "watch_condition": {"condition": "MAX_LT", "param": -1.0}}, {"id": 3, "watch_condition": {"condition": "MIN_GT", "param": 1e+32}}, {"id": 5, "watch_condition": {"condition": "MAX_MIN_GT", "param": 0}}, {"id": 6, "watch_condition": {"condition": "MAX_MIN_LT", "param": 0}}, {"id": 7, "watch_condition": {"condition": "MEAN_GT", "param": 0}}, {"id": 8, "watch_condition": {"condition": "MEAN_LT", "param": 0}}, {"id": 9, "watch_condition": {"condition": "INF"}}, {"id": 10, "watch_condition": {"condition": "OVERFLOW"}}]} | |||
| {"watch_points": [{"id": 1, "watch_condition": {"id": "max_gt", "params": [{"name": "param", "value": 1.0, "disable": false}], "abbr": "MAX>"}}, {"id": 2, "watch_condition": {"id": "max_lt", "params": [{"name": "param", "value": -1.0, "disable": false}], "abbr": "MAX<"}}, {"id": 3, "watch_condition": {"id": "min_gt", "params": [{"name": "param", "value": 1e+32, "disable": false}], "abbr": "MIN>"}}, {"id": 5, "watch_condition": {"id": "max_min_gt", "params": [{"name": "param", "value": 0, "disable": false}], "abbr": "MAX-MIN>"}}, {"id": 6, "watch_condition": {"id": "max_min_lt", "params": [{"name": "param", "value": 0, "disable": false}], "abbr": "MAX-Min<"}}, {"id": 7, "watch_condition": {"id": "mean_gt", "params": [{"name": "param", "value": 0, "disable": false}], "abbr": "MEAN>"}}, {"id": 8, "watch_condition": {"id": "mean_lt", "params": [{"name": "param", "value": 0, "disable": false}], "abbr": "MEAN<"}}, {"id": 9, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}, {"id": 10, "watch_condition": {"id": "overflow", "params": [], "abbr": "OVERFLOW"}}]} | |||
| @@ -0,0 +1 @@ | |||
| {"conditions": [{"id": "inf", "parameters": [], "supported_target_type": "TENSOR"}, {"id": "max_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_min_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_min_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "mean_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "mean_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "min_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "min_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "overflow", "parameters": [], "supported_target_type": "TENSOR"}]} | |||
| @@ -0,0 +1 @@ | |||
| {"conditions": [{"id": "inf", "parameters": [], "supported_target_type": "TENSOR"}, {"id": "max_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_min_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_min_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "mean_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "mean_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "min_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "min_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "nan", "parameters": [], "supported_target_type": "TENSOR"}]} | |||
| @@ -0,0 +1 @@ | |||
| {"metadata": {"state": "waiting", "step": 1, "device_name": "0", "node_name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0", "backend": "GPU", "enable_recheck": false, "graph_name": "graph_1"}, "graph": {"graph_names": ["graph_0", "graph_1"], "nodes": [{"name": "graph_0", "type": "name_scope", "attr": {}, "input": {}, "output": {}, "output_i": 0, "proxy_input": {}, "proxy_output": {}, "subnode_count": 2, "independent_layout": false}, {"name": "graph_1", "type": "name_scope", "attr": {}, "input": {}, "output": {}, "output_i": 0, "proxy_input": {}, "proxy_output": {}, "subnode_count": 2, "independent_layout": false}]}, "watch_points": []} | |||
| @@ -0,0 +1,672 @@ | |||
| { | |||
| "graph": { | |||
| "nodes": [ | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/fc3.bias", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[10]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22": { | |||
| "shape": [ | |||
| [ | |||
| 10 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[10]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22": { | |||
| "shape": [ | |||
| [ | |||
| 10 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/learning_rate", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op30": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op33": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op41": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op49": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op56": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/momentum", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op30": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op33": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op41": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op49": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op56": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/fc3.weight", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[10, 84]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25": { | |||
| "shape": [ | |||
| [ | |||
| 10, | |||
| 84 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.weight", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[10, 84]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25": { | |||
| "shape": [ | |||
| [ | |||
| 10, | |||
| 84 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/fc2.bias", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[84]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op30": { | |||
| "shape": [ | |||
| [ | |||
| 84 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.fc2.bias", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[84]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op30": { | |||
| "shape": [ | |||
| [ | |||
| 84 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/fc2.weight", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[84, 120]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op33": { | |||
| "shape": [ | |||
| [ | |||
| 84, | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.fc2.weight", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[84, 120]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op33": { | |||
| "shape": [ | |||
| [ | |||
| 84, | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/fc1.bias", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[120]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38": { | |||
| "shape": [ | |||
| [ | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[120]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38": { | |||
| "shape": [ | |||
| [ | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/fc1.weight", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[120, 400]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op41": { | |||
| "shape": [ | |||
| [ | |||
| 120, | |||
| 400 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.weight", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[120, 400]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op41": { | |||
| "shape": [ | |||
| [ | |||
| 120, | |||
| 400 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/conv2.weight", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[16, 6, 5, 5]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op49": { | |||
| "shape": [ | |||
| [ | |||
| 16, | |||
| 6, | |||
| 5, | |||
| 5 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.conv2.weight", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[16, 6, 5, 5]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op49": { | |||
| "shape": [ | |||
| [ | |||
| 16, | |||
| 6, | |||
| 5, | |||
| 5 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/conv1.weight", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[6, 1, 5, 5]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op56": { | |||
| "shape": [ | |||
| [ | |||
| 6, | |||
| 1, | |||
| 5, | |||
| 5 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| }, | |||
| { | |||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.conv1.weight", | |||
| "type": "Parameter", | |||
| "attr": { | |||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||
| "shape": "[[6, 1, 5, 5]]" | |||
| }, | |||
| "input": {}, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op56": { | |||
| "shape": [ | |||
| [ | |||
| 6, | |||
| 1, | |||
| 5, | |||
| 5 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||
| "edge_type": "data" | |||
| } | |||
| }, | |||
| "subnode_count": 0, | |||
| "independent_layout": true | |||
| } | |||
| ] | |||
| } | |||
| } | |||
| @@ -0,0 +1,44 @@ | |||
| { | |||
| "metadata": { | |||
| "state": "waiting", | |||
| "step": 1, | |||
| "device_name": "0", | |||
| "node_name": "", | |||
| "backend": "Ascend", | |||
| "enable_recheck": false, | |||
| "graph_name": "" | |||
| }, | |||
| "graph": { | |||
| "graph_names": [ | |||
| "graph_0", | |||
| "graph_1" | |||
| ], | |||
| "nodes": [ | |||
| { | |||
| "name": "graph_0", | |||
| "type": "name_scope", | |||
| "attr": {}, | |||
| "input": {}, | |||
| "output": {}, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": {}, | |||
| "subnode_count": 2, | |||
| "independent_layout": false | |||
| }, | |||
| { | |||
| "name": "graph_1", | |||
| "type": "name_scope", | |||
| "attr": {}, | |||
| "input": {}, | |||
| "output": {}, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": {}, | |||
| "subnode_count": 2, | |||
| "independent_layout": false | |||
| } | |||
| ] | |||
| }, | |||
| "watch_points": [] | |||
| } | |||
| @@ -0,0 +1,534 @@ | |||
| { | |||
| "graph": { | |||
| "nodes": [ | |||
| { | |||
| "name": "graph_0/Default", | |||
| "type": "name_scope", | |||
| "attr": {}, | |||
| "input": { | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op21": { | |||
| "shape": [ | |||
| [ | |||
| 10 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op24": { | |||
| "shape": [ | |||
| [ | |||
| 10, | |||
| 84 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op29": { | |||
| "shape": [ | |||
| [ | |||
| 84 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op32": { | |||
| "shape": [ | |||
| [ | |||
| 84, | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op37": { | |||
| "shape": [ | |||
| [ | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op40": { | |||
| "shape": [ | |||
| [ | |||
| 120, | |||
| 400 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/Conv2DBackpropFilter-op48": { | |||
| "shape": [ | |||
| [ | |||
| 16, | |||
| 6, | |||
| 5, | |||
| 5 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/Conv2DBackpropFilter-op55": { | |||
| "shape": [ | |||
| [ | |||
| 6, | |||
| 1, | |||
| 5, | |||
| 5 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output": { | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/Conv2DBackpropInput-op52": { | |||
| "shape": [ | |||
| [ | |||
| 16, | |||
| 6, | |||
| 5, | |||
| 5 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/Conv2DBackpropFilter-op55": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 1, | |||
| 32, | |||
| 32 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPoolWithArgmax/MaxPoolGradWithArgmax-op53": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 6, | |||
| 4, | |||
| 14 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_UINT16]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPoolWithArgmax/MaxPoolGradWithArgmax-op46": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 16, | |||
| 4, | |||
| 3 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_UINT16]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op40": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 400 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGrad-op36": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op32": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGrad-op28": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 84 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op24": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 84 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/Conv2DBackpropFilter-op48": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 6, | |||
| 14, | |||
| 14 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSoftmaxCrossEntropyWithLogits/Mul-op20": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 10 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 1, | |||
| 10, | |||
| 10, | |||
| 2 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_UINT8]" | |||
| }, | |||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op94": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 1, | |||
| 28, | |||
| 28, | |||
| 2 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_UINT8]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": {}, | |||
| "subnode_count": 7, | |||
| "independent_layout": false | |||
| }, | |||
| { | |||
| "name": "graph_0/Gradients", | |||
| "type": "name_scope", | |||
| "attr": {}, | |||
| "input": { | |||
| "graph_0/Default/tuple_getitem[10]_0/tuple_getitem-op210": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 10 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op15": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 84 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op12": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op205": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 16, | |||
| 10, | |||
| 10 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/tuple_getitem[10]_0/tuple_getitem-op206": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 16, | |||
| 4, | |||
| 3 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_UINT16]" | |||
| }, | |||
| "graph_0/Default/tuple_getitem[10]_0/tuple_getitem-op202": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 1, | |||
| 10, | |||
| 10, | |||
| 2 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_UINT8]" | |||
| }, | |||
| "graph_0/Default/tuple_getitem[10]_0/tuple_getitem-op197": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 6, | |||
| 14, | |||
| 14 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Cast-op188": { | |||
| "shape": [ | |||
| [ | |||
| 16, | |||
| 6, | |||
| 5, | |||
| 5 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op195": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 6, | |||
| 28, | |||
| 28 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/tuple_getitem[10]_0/tuple_getitem-op196": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 6, | |||
| 4, | |||
| 14 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_UINT16]" | |||
| }, | |||
| "graph_0/Default/tuple_getitem[10]_0/tuple_getitem-op192": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 1, | |||
| 28, | |||
| 28, | |||
| 2 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_UINT8]" | |||
| }, | |||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 1, | |||
| 32, | |||
| 32 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/flatten-Flatten/Reshape-op9": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 400 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output": { | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22": { | |||
| "shape": [ | |||
| [ | |||
| 10 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op30": { | |||
| "shape": [ | |||
| [ | |||
| 84 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38": { | |||
| "shape": [ | |||
| [ | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op49": { | |||
| "shape": [ | |||
| [ | |||
| 16, | |||
| 6, | |||
| 5, | |||
| 5 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op56": { | |||
| "shape": [ | |||
| [ | |||
| 6, | |||
| 1, | |||
| 5, | |||
| 5 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25": { | |||
| "shape": [ | |||
| [ | |||
| 10, | |||
| 84 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op33": { | |||
| "shape": [ | |||
| [ | |||
| 84, | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| }, | |||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op41": { | |||
| "shape": [ | |||
| [ | |||
| 120, | |||
| 400 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||
| } | |||
| }, | |||
| "output_i": 0, | |||
| "proxy_input": {}, | |||
| "proxy_output": {}, | |||
| "subnode_count": 1, | |||
| "independent_layout": false | |||
| } | |||
| ] | |||
| } | |||
| } | |||
| @@ -0,0 +1 @@ | |||
| {"watch_points": [{"id": 1, "watch_condition": {"id": "overflow", "params": [], "abbr": "OVERFLOW"}}]} | |||
| @@ -0,0 +1 @@ | |||
| {"metadata": {"state": "waiting", "step": 2, "device_name": "0", "node_name": "", "backend": "GPU", "enable_recheck": false, "graph_name": ""}, "graph": {"graph_names": ["graph_0", "graph_1"], "nodes": [{"name": "graph_0", "type": "name_scope", "attr": {}, "input": {}, "output": {}, "output_i": 0, "proxy_input": {}, "proxy_output": {}, "subnode_count": 2, "independent_layout": false}, {"name": "graph_1", "type": "name_scope", "attr": {}, "input": {}, "output": {}, "output_i": 0, "proxy_input": {}, "proxy_output": {}, "subnode_count": 2, "independent_layout": false}]}, "watch_points": [{"id": 1, "watch_condition": {"id": "weight_initialization", "params": [{"name": "zero_percentage_ge", "disable": false, "value": 100}], "abbr": "WI"}}, {"id": 2, "watch_condition": {"id": "weight_change_too_large", "params": [{"name": "abs_update_ratio_mean_gt", "disable": false, "value": 0.1}], "abbr": "WCL"}}, {"id": 3, "watch_condition": {"id": "gradient_vanishing", "params": [{"name": "abs_mean_lt", "disable": false, "value": 1e-09}], "abbr": "GV"}}, {"id": 4, "watch_condition": {"id": "tensor_overflow", "params": [], "abbr": "TO"}}, {"id": 5, "watch_condition": {"id": "tensor_all_zero", "params": [{"name": "zero_percentage_ge", "disable": false, "value": 100}], "abbr": "TZ"}}]} | |||
| @@ -0,0 +1 @@ | |||
| {"watch_points": [{"id": 1, "watch_condition": {"id": "overflow", "params": [], "abbr": "OVERFLOW"}}]} | |||
| @@ -4,9 +4,14 @@ | |||
| "step": 1, | |||
| "device_name": "0", | |||
| "node_name": "", | |||
| "backend": "Ascend" | |||
| "backend": "Ascend", | |||
| "enable_recheck": false, | |||
| "graph_name": "graph_0" | |||
| }, | |||
| "graph": { | |||
| "graph_names": [ | |||
| "graph_0" | |||
| ], | |||
| "nodes": [ | |||
| { | |||
| "name": "Default", | |||
| @@ -4,20 +4,19 @@ | |||
| "name": "Default/TransData-op99:0", | |||
| "full_name": "Default/TransData-op99:0", | |||
| "node_type": "TransData", | |||
| "type": "output" | |||
| "type": "output", | |||
| "graph_name": "graph_0" | |||
| }, | |||
| { | |||
| "name": "Default/args0:0", | |||
| "full_name": "Default/args0:0", | |||
| "node_type": "Parameter", | |||
| "type": "input" | |||
| "type": "input", | |||
| "graph_name": "graph_0" | |||
| } | |||
| ], | |||
| "metadata": { | |||
| "state": "waiting", | |||
| "step": 1, | |||
| "device_name": "0", | |||
| "node_name": "", | |||
| "backend": "Ascend" | |||
| "step": 1 | |||
| } | |||
| } | |||
| @@ -5,6 +5,7 @@ | |||
| "full_name": "Default/TransData-op99:0", | |||
| "node_type": "TransData", | |||
| "type": "output", | |||
| "graph_name": "graph_0", | |||
| "step": 1, | |||
| "dtype": "DT_FLOAT32", | |||
| "shape": [ | |||
| @@ -19,6 +20,7 @@ | |||
| "full_name": "Default/args0:0", | |||
| "node_type": "Parameter", | |||
| "type": "input", | |||
| "graph_name": "graph_0", | |||
| "step": 1, | |||
| "dtype": "DT_FLOAT32", | |||
| "shape": [ | |||
| @@ -31,9 +33,6 @@ | |||
| ], | |||
| "metadata": { | |||
| "state": "waiting", | |||
| "step": 1, | |||
| "device_name": "0", | |||
| "node_name": "", | |||
| "backend": "Ascend" | |||
| "step": 1 | |||
| } | |||
| } | |||
| @@ -0,0 +1,138 @@ | |||
| { | |||
| "graph": { | |||
| "nodes": [ | |||
| { | |||
| "name": "Default/args0", | |||
| "full_name": "Default/args0", | |||
| "type": "Parameter", | |||
| "input": {}, | |||
| "output": { | |||
| "Default/TransData-op99": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 1, | |||
| 32, | |||
| 32 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "slots": [ | |||
| { | |||
| "slot": "0", | |||
| "statistics": {} | |||
| } | |||
| ], | |||
| "graph_name": "graph_0" | |||
| }, | |||
| { | |||
| "name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", | |||
| "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", | |||
| "type": "Cast", | |||
| "input": { | |||
| "Default/TransData-op99": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 1, | |||
| 32, | |||
| 32 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "output": {}, | |||
| "slots": [ | |||
| { | |||
| "slot": "0", | |||
| "statistics": {} | |||
| } | |||
| ], | |||
| "graph_name": "graph_0" | |||
| }, | |||
| { | |||
| "name": "Default/TransData-op99", | |||
| "full_name": "Default/TransData-op99", | |||
| "type": "TransData", | |||
| "input": { | |||
| "Default/args0": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 1, | |||
| 32, | |||
| 32 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "output": { | |||
| "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 1, | |||
| 32, | |||
| 32 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "slots": [ | |||
| { | |||
| "slot": "0", | |||
| "watch_points": [ | |||
| { | |||
| "id": 1, | |||
| "watch_condition": { | |||
| "id": "inf", | |||
| "params": [], | |||
| "abbr": "INF" | |||
| } | |||
| } | |||
| ], | |||
| "statistics": {} | |||
| } | |||
| ], | |||
| "graph_name": "graph_0" | |||
| } | |||
| ] | |||
| } | |||
| } | |||
| @@ -0,0 +1,72 @@ | |||
| { | |||
| "graph": { | |||
| "nodes": [ | |||
| { | |||
| "name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38", | |||
| "full_name": "Default/optimizer-Momentum/ApplyMomentum-op38", | |||
| "type": "ApplyMomentum", | |||
| "input": { | |||
| "Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias": { | |||
| "shape": [ | |||
| [ | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "output": {}, | |||
| "slots": [ | |||
| { | |||
| "slot": "0", | |||
| "statistics": {} | |||
| }, | |||
| { | |||
| "slot": "1", | |||
| "statistics": {} | |||
| } | |||
| ], | |||
| "graph_name": "graph_0" | |||
| }, | |||
| { | |||
| "name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias", | |||
| "full_name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias", | |||
| "type": "Parameter", | |||
| "input": {}, | |||
| "output": { | |||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38": { | |||
| "shape": [ | |||
| [ | |||
| 120 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": true, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "slots": [ | |||
| { | |||
| "slot": "0", | |||
| "statistics": {} | |||
| } | |||
| ], | |||
| "graph_name": "graph_0" | |||
| } | |||
| ] | |||
| } | |||
| } | |||
| @@ -0,0 +1 @@ | |||
| {"watch_points": []} | |||
| @@ -0,0 +1 @@ | |||
| {"watch_points": []} | |||
| @@ -1 +1 @@ | |||
| {"tensor_value": {"full_name": "Default/TransData-op99:0", "step": 1, "dtype": "DT_FLOAT32", "shape": [2, 3], "has_prev_step": false, "statistics": {"max": 6.0, "min": 5.0, "avg": 5.5, "count": 2, "nan_count": 0, "neg_inf_count": 0, "pos_inf_count": 0, "overall_max": 6.0, "overall_min": 1.0}, "value": [5.0, 6.0], "name": "Default/TransData-op99:0"}} | |||
| {"tensor_value": {"full_name": "Default/TransData-op99:0", "step": 1, "dtype": "DT_FLOAT32", "shape": [2, 3], "has_prev_step": false, "statistics": {"max": 6.0, "min": 5.0, "avg": 5.5, "count": 2, "nan_count": 0, "neg_inf_count": 0, "pos_inf_count": 0, "overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "value": [5.0, 6.0], "name": "Default/TransData-op99:0"}} | |||
| @@ -1 +1 @@ | |||
| {"watch_point_hits": [{"node_name": "Default/TransData-op99", "watch_points": [{"id": 1, "watch_condition": {"condition": "INF"}}]}, {"node_name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25", "watch_points": [{"id": 1, "watch_condition": {"condition": "INF"}}]}]} | |||
| {"watch_point_hits": [{"node_name": "Default/TransData-op99", "tensors": [{"slot": "0", "watch_points": [{"id": 1, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}]}], "graph_name": "graph_0"}, {"node_name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25", "tensors": [{"slot": "0", "watch_points": [{"id": 1, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}]}], "graph_name": "graph_0"}]} | |||
| @@ -0,0 +1 @@ | |||
| {"nodes": [{"name": "Default", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op12", "type": "ReLU", "nodes": []}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op15", "type": "ReLU", "nodes": []}]}]}]}]}]} | |||
| @@ -0,0 +1 @@ | |||
| {"nodes": [{"name": "graph_0", "type": "name_scope", "nodes": [{"name": "graph_0/Default", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op12", "type": "ReLU", "nodes": []}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op15", "type": "ReLU", "nodes": []}]}]}]}]}]}, {"name": "graph_1", "type": "name_scope", "nodes": [{"name": "graph_1/Default", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op12", "type": "ReLU", "nodes": []}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op15", "type": "ReLU", "nodes": []}]}]}]}]}]}]} | |||
| @@ -0,0 +1 @@ | |||
| {"nodes": [{"name": "Gradients", "type": "name_scope", "nodes": [{"name": "Gradients/Default", "type": "name_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense", "type": "name_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd", "type": "name_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op21", "type": "BiasAddGrad", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op29", "type": "BiasAddGrad", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op37", "type": "BiasAddGrad", "nodes": []}]}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul", "type": "name_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5", "type": "aggregation_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op24", "type": "MatMul", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op27", "type": "MatMul", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op32", "type": "MatMul", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op35", "type": "MatMul", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op40", "type": "MatMul", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op44", "type": "MatMul", "nodes": []}]}]}]}]}]}]}]}]} | |||
| @@ -1 +1,31 @@ | |||
| {"nodes": [{"name": "Default", "type": "name_scope", "nodes": [{"name": "Default/optimizer-Momentum", "type": "name_scope", "nodes": [{"name": "Default/optimizer-Momentum/Parameter[18]_7", "type": "aggregation_scope", "nodes": [{"name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias", "type": "Parameter", "nodes": [], "watched": 0}], "watched": 1}], "watched": 1}], "watched": 1}]} | |||
| { | |||
| "nodes": [ | |||
| { | |||
| "name": "Default", | |||
| "type": "name_scope", | |||
| "nodes": [ | |||
| { | |||
| "name": "Default/optimizer-Momentum", | |||
| "type": "name_scope", | |||
| "nodes": [ | |||
| { | |||
| "name": "Default/optimizer-Momentum/Parameter[18]_7", | |||
| "type": "aggregation_scope", | |||
| "nodes": [ | |||
| { | |||
| "name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias", | |||
| "type": "Parameter", | |||
| "nodes": [], | |||
| "watched": 0 | |||
| } | |||
| ], | |||
| "watched": 0 | |||
| } | |||
| ], | |||
| "watched": 0 | |||
| } | |||
| ], | |||
| "watched": 0 | |||
| } | |||
| ] | |||
| } | |||
| @@ -0,0 +1 @@ | |||
| {"nodes": [{"name": "Default", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6", "type": "aggregation_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.weight", "type": "Parameter", "nodes": []}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.bias", "type": "Parameter", "nodes": []}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.weight", "type": "Parameter", "nodes": []}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.bias", "type": "Parameter", "nodes": []}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.weight", "type": "Parameter", "nodes": []}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.bias", "type": "Parameter", "nodes": []}]}]}]}]}]}]} | |||
| @@ -0,0 +1 @@ | |||
| {"nodes": [{"name": "graph_0", "type": "name_scope", "nodes": [{"name": "graph_0/Default", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/conv2.weight", "type": "Parameter", "nodes": []}]}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/conv1.weight", "type": "Parameter", "nodes": []}]}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6", "type": "aggregation_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.weight", "type": "Parameter", "nodes": []}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.bias", "type": "Parameter", "nodes": []}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.weight", "type": "Parameter", "nodes": []}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.bias", "type": "Parameter", "nodes": []}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.weight", "type": "Parameter", "nodes": []}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.bias", "type": "Parameter", "nodes": []}]}]}]}]}]}]}, {"name": "graph_1", "type": "name_scope", "nodes": [{"name": "graph_1/Default", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/conv2.weight", "type": "Parameter", "nodes": []}]}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/conv1.weight", "type": "Parameter", "nodes": []}]}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6", "type": "aggregation_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.weight", "type": "Parameter", "nodes": []}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.bias", "type": "Parameter", "nodes": []}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.weight", "type": "Parameter", "nodes": []}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.bias", "type": "Parameter", "nodes": []}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.weight", "type": "Parameter", "nodes": []}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.bias", "type": "Parameter", "nodes": []}]}]}]}]}]}]}]} | |||
| @@ -28,7 +28,7 @@ from tests.st.func.debugger.conftest import GRAPH_PROTO_FILE | |||
| class MockDebuggerClient: | |||
| """Mocked Debugger client.""" | |||
| def __init__(self, hostname='localhost:50051', backend='Ascend'): | |||
| def __init__(self, hostname='localhost:50051', backend='Ascend', graph_num=1): | |||
| channel = grpc.insecure_channel(hostname) | |||
| self.stub = EventListenerStub(channel) | |||
| self.flag = True | |||
| @@ -37,6 +37,7 @@ class MockDebuggerClient: | |||
| self._leaf_node = [] | |||
| self._cur_node = '' | |||
| self._backend = backend | |||
| self._graph_num = graph_num | |||
| def _clean(self): | |||
| """Clean cache.""" | |||
| @@ -122,16 +123,32 @@ class MockDebuggerClient: | |||
| assert response.status == EventReply.Status.OK | |||
| if training_done is False: | |||
| self.send_graph_cmd() | |||
| print("finish") | |||
| def send_graph_cmd(self): | |||
| """Send graph to debugger server.""" | |||
| self._step = 1 | |||
| if self._graph_num > 1: | |||
| chunks = [] | |||
| for i in range(self._graph_num): | |||
| chunks.extend(self._get_graph_chunks('graph_' + str(i))) | |||
| response = self.stub.SendMultiGraphs(self._generate_graph(chunks)) | |||
| else: | |||
| chunks = self._get_graph_chunks() | |||
| response = self.stub.SendGraph(self._generate_graph(chunks)) | |||
| assert response.status == EventReply.Status.OK | |||
| # go to command loop | |||
| self.command_loop() | |||
| def _get_graph_chunks(self, graph_name='graph_0'): | |||
| """Get graph chunks.""" | |||
| with open(GRAPH_PROTO_FILE, 'rb') as file_handle: | |||
| content = file_handle.read() | |||
| size = len(content) | |||
| graph = ms_graph_pb2.GraphProto() | |||
| graph.ParseFromString(content) | |||
| graph.name = 'graph_name' | |||
| graph.name = graph_name | |||
| content = graph.SerializeToString() | |||
| self._leaf_node = [node.full_name for node in graph.node] | |||
| # the max limit of grpc data size is 4kb | |||
| # split graph into 3kb per chunk | |||
| @@ -141,10 +158,8 @@ class MockDebuggerClient: | |||
| sub_size = min(chunk_size, size - index) | |||
| sub_chunk = Chunk(buffer=content[index: index + sub_size]) | |||
| chunks.append(sub_chunk) | |||
| response = self.stub.SendGraph(self._generate_graph(chunks)) | |||
| assert response.status == EventReply.Status.OK | |||
| # go to command loop | |||
| self.command_loop() | |||
| chunks[-1].finished = True | |||
| return chunks | |||
| @staticmethod | |||
| def _generate_graph(chunks): | |||
| @@ -202,5 +217,5 @@ class MockDebuggerClientThread: | |||
| return self._debugger_client_thread | |||
| def __exit__(self, exc_type, exc_val, exc_tb): | |||
| self._debugger_client_thread.join(timeout=5) | |||
| self._debugger_client_thread.join(timeout=3) | |||
| self._debugger_client.flag = False | |||
| @@ -22,12 +22,20 @@ import os | |||
| import pytest | |||
| from mindinsight.conf import settings | |||
| from tests.st.func.debugger.conftest import DEBUGGER_BASE_URL | |||
| from tests.st.func.debugger.mock_ms_client import MockDebuggerClient | |||
| from tests.st.func.debugger.utils import check_waiting_state, get_request_result, \ | |||
| send_and_compare_result | |||
| def send_terminate_cmd(app_client): | |||
| """Send terminate command to debugger client.""" | |||
| url = os.path.join(DEBUGGER_BASE_URL, 'control') | |||
| body_data = {'mode': 'terminate'} | |||
| send_and_compare_result(app_client, url, body_data) | |||
| class TestAscendDebugger: | |||
| """Test debugger on Ascend backend.""" | |||
| @@ -36,23 +44,6 @@ class TestAscendDebugger: | |||
| """Setup class.""" | |||
| cls._debugger_client = MockDebuggerClient(backend='Ascend') | |||
| @staticmethod | |||
| def _send_terminate_cmd(app_client): | |||
| """Send terminate command to debugger client.""" | |||
| url = os.path.join(DEBUGGER_BASE_URL, 'control') | |||
| body_data = {'mode': 'terminate'} | |||
| send_and_compare_result(app_client, url, body_data) | |||
| @staticmethod | |||
| def _create_watchpoint(app_client, condition, expect_id): | |||
| """Create watchpoint.""" | |||
| url = 'create_watchpoint' | |||
| body_data = {'condition': condition, | |||
| 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7', | |||
| 'Default/TransData-op99']} | |||
| res = get_request_result(app_client, url, body_data) | |||
| assert res.get('id') == expect_id | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @@ -86,10 +77,47 @@ class TestAscendDebugger: | |||
| """Test retrieve when train_begin.""" | |||
| url = 'retrieve' | |||
| with self._debugger_client.get_thread_instance(): | |||
| flag = check_waiting_state(app_client) | |||
| assert flag is True | |||
| check_waiting_state(app_client) | |||
| send_and_compare_result(app_client, url, body_data, expect_file) | |||
| self._send_terminate_cmd(app_client) | |||
| send_terminate_cmd(app_client) | |||
| def test_get_conditions(self, app_client): | |||
| """Test get conditions for ascend.""" | |||
| url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/conditions' | |||
| body_data = {} | |||
| expect_file = 'get_conditions_for_ascend.json' | |||
| with self._debugger_client.get_thread_instance(): | |||
| check_waiting_state(app_client) | |||
| send_and_compare_result(app_client, url, body_data, expect_file, method='get', full_url=True) | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.parametrize("body_data, expect_file", [ | |||
| ({'mode': 'all'}, 'multi_retrieve_all.json'), | |||
| ({'mode': 'node', 'params': {'name': 'Default', 'graph_name': 'graph_1'}}, 'retrieve_scope_node.json'), | |||
| ({'mode': 'node', 'params': {'name': 'graph_0'}}, 'multi_retrieve_scope_node.json'), | |||
| ({'mode': 'node', 'params': {'name': 'graph_0/Default/optimizer-Momentum/Parameter[18]_7'}}, | |||
| 'multi_retrieve_aggregation_scope_node.json'), | |||
| ({'mode': 'node', 'params': { | |||
| 'name': 'graph_0/Default/TransData-op99', | |||
| 'single_node': True}}, 'multi_retrieve_single_node.json'), | |||
| ({'mode': 'node', 'params': { | |||
| 'name': 'Default/TransData-op99', | |||
| 'single_node': True, 'graph_name': 'graph_0'}}, 'retrieve_single_node.json') | |||
| ]) | |||
| def test_multi_retrieve_when_train_begin(self, app_client, body_data, expect_file): | |||
| """Test retrieve when train_begin.""" | |||
| url = 'retrieve' | |||
| debugger_client = MockDebuggerClient(backend='Ascend', graph_num=2) | |||
| with debugger_client.get_thread_instance(): | |||
| check_waiting_state(app_client) | |||
| send_and_compare_result(app_client, url, body_data, expect_file) | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @@ -100,22 +128,21 @@ class TestAscendDebugger: | |||
| def test_create_and_delete_watchpoint(self, app_client): | |||
| """Test create and delete watchpoint.""" | |||
| with self._debugger_client.get_thread_instance(): | |||
| flag = check_waiting_state(app_client) | |||
| assert flag is True | |||
| check_waiting_state(app_client) | |||
| conditions = [ | |||
| {'condition': 'MAX_GT', 'param': 1.0}, | |||
| {'condition': 'MAX_LT', 'param': -1.0}, | |||
| {'condition': 'MIN_GT', 'param': 1e+32}, | |||
| {'condition': 'MIN_LT', 'param': -1e+32}, | |||
| {'condition': 'MAX_MIN_GT', 'param': 0}, | |||
| {'condition': 'MAX_MIN_LT', 'param': 0}, | |||
| {'condition': 'MEAN_GT', 'param': 0}, | |||
| {'condition': 'MEAN_LT', 'param': 0}, | |||
| {'condition': 'INF'}, | |||
| {'condition': 'OVERFLOW'}, | |||
| {'id': 'max_gt', 'params': [{'name': 'param', 'value': 1.0, 'disable': False}]}, | |||
| {'id': 'max_lt', 'params': [{'name': 'param', 'value': -1.0, 'disable': False}]}, | |||
| {'id': 'min_gt', 'params': [{'name': 'param', 'value': 1e+32, 'disable': False}]}, | |||
| {'id': 'min_lt', 'params': [{'name': 'param', 'value': -1e+32, 'disable': False}]}, | |||
| {'id': 'max_min_gt', 'params': [{'name': 'param', 'value': 0, 'disable': False}]}, | |||
| {'id': 'max_min_lt', 'params': [{'name': 'param', 'value': 0, 'disable': False}]}, | |||
| {'id': 'mean_gt', 'params': [{'name': 'param', 'value': 0, 'disable': False}]}, | |||
| {'id': 'mean_lt', 'params': [{'name': 'param', 'value': 0, 'disable': False}]}, | |||
| {'id': 'inf', 'params': []}, | |||
| {'id': 'overflow', 'params': []}, | |||
| ] | |||
| for idx, condition in enumerate(conditions): | |||
| self._create_watchpoint(app_client, condition, idx + 1) | |||
| create_watchpoint(app_client, condition, idx + 1) | |||
| # delete 4-th watchpoint | |||
| url = 'delete_watchpoint' | |||
| body_data = {'watch_point_id': 4} | |||
| @@ -125,7 +152,7 @@ class TestAscendDebugger: | |||
| body_data = {'mode': 'watchpoint'} | |||
| expect_file = 'create_and_delete_watchpoint.json' | |||
| send_and_compare_result(app_client, url, body_data, expect_file) | |||
| self._send_terminate_cmd(app_client) | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @@ -138,10 +165,9 @@ class TestAscendDebugger: | |||
| watch_point_id = 1 | |||
| leaf_node_name = 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias' | |||
| with self._debugger_client.get_thread_instance(): | |||
| flag = check_waiting_state(app_client) | |||
| assert flag is True | |||
| condition = {'condition': 'INF'} | |||
| self._create_watchpoint(app_client, condition, watch_point_id) | |||
| check_waiting_state(app_client) | |||
| condition = {'id': 'inf', 'params': []} | |||
| create_watchpoint(app_client, condition, watch_point_id) | |||
| # update watchpoint watchpoint list | |||
| url = 'update_watchpoint' | |||
| body_data = {'watch_point_id': watch_point_id, | |||
| @@ -153,7 +179,7 @@ class TestAscendDebugger: | |||
| body_data = {'name': leaf_node_name, 'watch_point_id': watch_point_id} | |||
| expect_file = 'search_unwatched_leaf_node.json' | |||
| send_and_compare_result(app_client, url, body_data, expect_file, method='get') | |||
| self._send_terminate_cmd(app_client) | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @@ -164,18 +190,7 @@ class TestAscendDebugger: | |||
| def test_watchpoint_hit(self, app_client): | |||
| """Test retrieve watchpoint hit.""" | |||
| with self._debugger_client.get_thread_instance(): | |||
| flag = check_waiting_state(app_client) | |||
| assert flag is True | |||
| self._create_watchpoint(app_client, condition={'condition': 'INF'}, expect_id=1) | |||
| # send run command to get watchpoint hit | |||
| url = 'control' | |||
| body_data = {'mode': 'continue', | |||
| 'steps': 2} | |||
| res = get_request_result(app_client, url, body_data) | |||
| assert res == {'metadata': {'state': 'running'}} | |||
| # wait for server has received watchpoint hit | |||
| flag = check_waiting_state(app_client) | |||
| assert flag is True | |||
| create_watchpoint_and_wait(app_client) | |||
| # check watchpoint hit list | |||
| url = 'retrieve' | |||
| body_data = {'mode': 'watchpoint_hit'} | |||
| @@ -188,11 +203,11 @@ class TestAscendDebugger: | |||
| 'name': 'Default/TransData-op99', | |||
| 'single_node': True, | |||
| 'watch_point_id': 1 | |||
| } | |||
| } | |||
| } | |||
| expect_file = 'retrieve_single_watchpoint_hit.json' | |||
| send_and_compare_result(app_client, url, body_data, expect_file) | |||
| self._send_terminate_cmd(app_client) | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @@ -204,8 +219,7 @@ class TestAscendDebugger: | |||
| """Test retrieve tensor value.""" | |||
| node_name = 'Default/TransData-op99' | |||
| with self._debugger_client.get_thread_instance(): | |||
| flag = check_waiting_state(app_client) | |||
| assert flag is True | |||
| check_waiting_state(app_client) | |||
| # prepare tensor value | |||
| url = 'retrieve_tensor_history' | |||
| body_data = {'name': node_name} | |||
| @@ -226,7 +240,7 @@ class TestAscendDebugger: | |||
| } | |||
| expect_file = 'retrieve_tensor_value.json' | |||
| send_and_compare_result(app_client, url, body_data, expect_file, method='get') | |||
| self._send_terminate_cmd(app_client) | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @@ -238,15 +252,13 @@ class TestAscendDebugger: | |||
| """Test compare tensor value.""" | |||
| node_name = 'Default/args0' | |||
| with self._debugger_client.get_thread_instance(): | |||
| flag = check_waiting_state(app_client) | |||
| assert flag is True | |||
| check_waiting_state(app_client) | |||
| # prepare tensor values | |||
| url = 'control' | |||
| body_data = {'mode': 'continue', | |||
| 'steps': 2} | |||
| get_request_result(app_client, url, body_data) | |||
| flag = check_waiting_state(app_client) | |||
| assert flag is True | |||
| check_waiting_state(app_client) | |||
| get_request_result( | |||
| app_client=app_client, url='retrieve_tensor_history', body_data={'name': node_name}) | |||
| res = get_request_result( | |||
| @@ -262,7 +274,7 @@ class TestAscendDebugger: | |||
| } | |||
| expect_file = 'compare_tensors.json' | |||
| send_and_compare_result(app_client, url, body_data, expect_file, method='get') | |||
| self._send_terminate_cmd(app_client) | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @@ -277,12 +289,110 @@ class TestAscendDebugger: | |||
| def test_retrieve_bfs_node(self, app_client, body_data, expect_file): | |||
| """Test retrieve bfs node.""" | |||
| with self._debugger_client.get_thread_instance(): | |||
| flag = check_waiting_state(app_client) | |||
| assert flag is True | |||
| check_waiting_state(app_client) | |||
| # prepare tensor values | |||
| url = 'retrieve_node_by_bfs' | |||
| send_and_compare_result(app_client, url, body_data, expect_file, method='get') | |||
| self._send_terminate_cmd(app_client) | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| def test_pause(self, app_client): | |||
| """Test pause the training.""" | |||
| with self._debugger_client.get_thread_instance(): | |||
| check_waiting_state(app_client) | |||
| # send run command to execute to next node | |||
| url = 'control' | |||
| body_data = {'mode': 'continue', | |||
| 'steps': -1} | |||
| res = get_request_result(app_client, url, body_data) | |||
| assert res == {'metadata': {'state': 'running', 'enable_recheck': False}} | |||
| # send pause command | |||
| url = 'control' | |||
| body_data = {'mode': 'pause'} | |||
| res = get_request_result(app_client, url, body_data) | |||
| assert res == {'metadata': {'state': 'waiting', 'enable_recheck': False}} | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.parametrize("url, body_data, enable_recheck", [ | |||
| ('create_watchpoint', | |||
| {'condition': {'id': 'inf', 'params': []}, | |||
| 'watch_nodes': ['Default']}, True), | |||
| ('update_watchpoint', | |||
| {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7'], | |||
| 'mode': 0}, True), | |||
| ('update_watchpoint', | |||
| {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum'], | |||
| 'mode': 1}, True), | |||
| ('delete_watchpoint', {}, True) | |||
| ]) | |||
| def test_recheck(self, app_client, url, body_data, enable_recheck): | |||
| """Test recheck.""" | |||
| with self._debugger_client.get_thread_instance(): | |||
| create_watchpoint_and_wait(app_client) | |||
| # create watchpoint | |||
| res = get_request_result(app_client, url, body_data, method='post') | |||
| assert res['metadata']['enable_recheck'] is enable_recheck | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| def test_recommend_watchpoints(self, app_client): | |||
| """Test generating recommended watchpoints.""" | |||
| original_value = settings.ENABLE_RECOMMENDED_WATCHPOINTS | |||
| settings.ENABLE_RECOMMENDED_WATCHPOINTS = True | |||
| try: | |||
| with self._debugger_client.get_thread_instance(): | |||
| check_waiting_state(app_client) | |||
| url = 'retrieve' | |||
| body_data = {'mode': 'watchpoint'} | |||
| expect_file = 'recommended_watchpoints_at_startup.json' | |||
| send_and_compare_result(app_client, url, body_data, expect_file, method='post') | |||
| send_terminate_cmd(app_client) | |||
| finally: | |||
| settings.ENABLE_RECOMMENDED_WATCHPOINTS = original_value | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.parametrize("body_data, expect_file", [ | |||
| ({'tensor_name': 'Default/TransData-op99:0', 'graph_name': 'graph_0'}, 'retrieve_tensor_graph-0.json'), | |||
| ({'tensor_name': 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias:0', 'graph_name': 'graph_0'}, | |||
| 'retrieve_tensor_graph-1.json') | |||
| ]) | |||
| def test_retrieve_tensor_graph(self, app_client, body_data, expect_file): | |||
| """Test retrieve tensor graph.""" | |||
| url = 'tensor_graphs' | |||
| with self._debugger_client.get_thread_instance(): | |||
| create_watchpoint_and_wait(app_client) | |||
| send_and_compare_result(app_client, url, body_data, expect_file, method='GET') | |||
| send_terminate_cmd(app_client) | |||
| class TestGPUDebugger: | |||
| """Test debugger on Ascend backend.""" | |||
| @classmethod | |||
| def setup_class(cls): | |||
| """Setup class.""" | |||
| cls._debugger_client = MockDebuggerClient(backend='GPU') | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @@ -294,23 +404,21 @@ class TestAscendDebugger: | |||
| """Test get next node on GPU.""" | |||
| gpu_debugger_client = MockDebuggerClient(backend='GPU') | |||
| with gpu_debugger_client.get_thread_instance(): | |||
| flag = check_waiting_state(app_client) | |||
| assert flag is True | |||
| check_waiting_state(app_client) | |||
| # send run command to get watchpoint hit | |||
| url = 'control' | |||
| body_data = {'mode': 'continue', | |||
| 'level': 'node', | |||
| 'name': 'Default/TransData-op99'} | |||
| res = get_request_result(app_client, url, body_data) | |||
| assert res == {'metadata': {'state': 'running'}} | |||
| assert res == {'metadata': {'state': 'running', 'enable_recheck': False}} | |||
| # get metadata | |||
| flag = check_waiting_state(app_client) | |||
| assert flag is True | |||
| check_waiting_state(app_client) | |||
| url = 'retrieve' | |||
| body_data = {'mode': 'all'} | |||
| expect_file = 'retrieve_next_node_on_gpu.json' | |||
| send_and_compare_result(app_client, url, body_data, expect_file) | |||
| self._send_terminate_cmd(app_client) | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @@ -318,20 +426,245 @@ class TestAscendDebugger: | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| def test_pause(self, app_client): | |||
| """Test pause the training.""" | |||
| @pytest.mark.parametrize("url, body_data, enable_recheck", [ | |||
| ('create_watchpoint', | |||
| {'condition': {'id': 'inf', 'params': []}, | |||
| 'watch_nodes': ['Default']}, False), | |||
| ('create_watchpoint', | |||
| {'condition': {'id': 'inf', 'params': []}, | |||
| 'watch_nodes': ['Default/TransData-op99']}, True), | |||
| ('update_watchpoint', | |||
| {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7'], | |||
| 'mode': 0}, True), | |||
| ('update_watchpoint', | |||
| {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum'], | |||
| 'mode': 1}, False), | |||
| ('update_watchpoint', | |||
| [{'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum'], | |||
| 'mode': 1}, | |||
| {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum'], | |||
| 'mode': 0} | |||
| ], True), | |||
| ('update_watchpoint', | |||
| [{'watch_point_id': 1, 'watch_nodes': ['Default/TransData-op99'], | |||
| 'mode': 0}, | |||
| {'watch_point_id': 1, 'watch_nodes': ['Default/TransData-op99'], | |||
| 'mode': 1} | |||
| ], True), | |||
| ('delete_watchpoint', {'watch_point_id': 1}, True) | |||
| ]) | |||
| def test_recheck_state(self, app_client, url, body_data, enable_recheck): | |||
| """Test update watchpoint and check the value of enable_recheck.""" | |||
| with self._debugger_client.get_thread_instance(): | |||
| flag = check_waiting_state(app_client) | |||
| assert flag is True | |||
| # send run command to execute to next node | |||
| url = 'control' | |||
| body_data = {'mode': 'continue', | |||
| 'steps': -1} | |||
| res = get_request_result(app_client, url, body_data) | |||
| assert res == {'metadata': {'state': 'running'}} | |||
| # send pause command | |||
| url = 'control' | |||
| body_data = {'mode': 'pause'} | |||
| res = get_request_result(app_client, url, body_data) | |||
| assert res == {'metadata': {'state': 'waiting'}} | |||
| self._send_terminate_cmd(app_client) | |||
| create_watchpoint_and_wait(app_client) | |||
| if not isinstance(body_data, list): | |||
| body_data = [body_data] | |||
| for sub_body_data in body_data: | |||
| res = get_request_result(app_client, url, sub_body_data, method='post') | |||
| assert res['metadata']['enable_recheck'] is enable_recheck | |||
| send_terminate_cmd(app_client) | |||
| def test_get_conditions(self, app_client): | |||
| """Test get conditions for gpu.""" | |||
| url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/conditions' | |||
| body_data = {} | |||
| expect_file = 'get_conditions_for_gpu.json' | |||
| with self._debugger_client.get_thread_instance(): | |||
| check_waiting_state(app_client) | |||
| send_and_compare_result(app_client, url, body_data, expect_file, method='get', full_url=True) | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| def test_recheck(self, app_client): | |||
| """Test recheck request.""" | |||
| with self._debugger_client.get_thread_instance(): | |||
| create_watchpoint_and_wait(app_client) | |||
| # send recheck when disable to do recheck | |||
| get_request_result(app_client, 'recheck', {}, method='post', expect_code=400) | |||
| # send recheck when enable to do recheck | |||
| create_watchpoint(app_client, {'id': 'inf', 'params': []}, 2) | |||
| res = get_request_result(app_client, 'recheck', {}, method='post') | |||
| assert res['metadata']['enable_recheck'] is False | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.parametrize("filter_condition, expect_file", [ | |||
| ({'name': 'fc', 'node_category': 'weight'}, 'search_weight.json'), | |||
| ({'name': 'fc', 'node_category': 'gradient'}, 'search_gradient.json'), | |||
| ({'node_category': 'activation'}, 'search_activation.json') | |||
| ]) | |||
| def test_search_by_category(self, app_client, filter_condition, expect_file): | |||
| """Test recheck request.""" | |||
| with self._debugger_client.get_thread_instance(): | |||
| check_waiting_state(app_client) | |||
| send_and_compare_result(app_client, 'search', filter_condition, expect_file, | |||
| method='get') | |||
| send_terminate_cmd(app_client) | |||
| class TestMultiGraphDebugger: | |||
| """Test debugger on Ascend backend.""" | |||
| @classmethod | |||
| def setup_class(cls): | |||
| """Setup class.""" | |||
| cls._debugger_client = MockDebuggerClient(backend='Ascend', graph_num=2) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.parametrize("body_data, expect_file", [ | |||
| ({'mode': 'all'}, 'multi_retrieve_all.json'), | |||
| ({'mode': 'node', 'params': {'name': 'Default', 'graph_name': 'graph_1'}}, 'retrieve_scope_node.json'), | |||
| ({'mode': 'node', 'params': {'name': 'graph_0'}}, 'multi_retrieve_scope_node.json'), | |||
| ({'mode': 'node', 'params': {'name': 'graph_0/Default/optimizer-Momentum/Parameter[18]_7'}}, | |||
| 'multi_retrieve_aggregation_scope_node.json'), | |||
| ({'mode': 'node', 'params': { | |||
| 'name': 'graph_0/Default/TransData-op99', | |||
| 'single_node': True}}, 'multi_retrieve_single_node.json'), | |||
| ({'mode': 'node', 'params': { | |||
| 'name': 'Default/TransData-op99', | |||
| 'single_node': True, 'graph_name': 'graph_0'}}, 'retrieve_single_node.json') | |||
| ]) | |||
| def test_multi_retrieve_when_train_begin(self, app_client, body_data, expect_file): | |||
| """Test retrieve when train_begin.""" | |||
| url = 'retrieve' | |||
| with self._debugger_client.get_thread_instance(): | |||
| check_waiting_state(app_client) | |||
| send_and_compare_result(app_client, url, body_data, expect_file) | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.parametrize("filter_condition, expect_file", [ | |||
| ({'name': '', 'node_category': 'weight'}, 'search_weight_multi_graph.json'), | |||
| ({'node_category': 'activation'}, 'search_activation_multi_graph.json') | |||
| ]) | |||
| def test_search_by_category_with_multi_graph(self, app_client, filter_condition, expect_file): | |||
| """Test search by category request.""" | |||
| with self._debugger_client.get_thread_instance(): | |||
| check_waiting_state(app_client) | |||
| send_and_compare_result(app_client, 'search', filter_condition, expect_file, method='get') | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.parametrize("filter_condition, expect_id", [ | |||
| ({'condition': {'id': 'inf'}, | |||
| 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7'], | |||
| 'graph_name': 'graph_0'}, 1), | |||
| ({'condition': {'id': 'inf'}, | |||
| 'watch_nodes': ['graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1'], | |||
| 'graph_name': None}, 1) | |||
| ]) | |||
| def test_create_watchpoint(self, app_client, filter_condition, expect_id): | |||
| """Test create watchpoint with multiple graphs.""" | |||
| url = 'create_watchpoint' | |||
| with self._debugger_client.get_thread_instance(): | |||
| check_waiting_state(app_client) | |||
| res = get_request_result(app_client, url, filter_condition) | |||
| assert res.get('id') == expect_id | |||
| send_terminate_cmd(app_client) | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.parametrize("params, expect_file", [ | |||
| ({'level': 'node'}, 'multi_next_node.json'), | |||
| ({'level': 'node', 'node_name': 'graph_0/Default/TransData-op99'}, 'multi_next_node.json'), | |||
| ({'level': 'node', 'node_name': 'Default/TransData-op99', 'graph_name': 'graph_0'}, | |||
| 'multi_next_node.json') | |||
| ]) | |||
| def test_continue_on_gpu(self, app_client, params, expect_file): | |||
| """Test get next node on GPU.""" | |||
| gpu_debugger_client = MockDebuggerClient(backend='GPU', graph_num=2) | |||
| original_value = settings.ENABLE_RECOMMENDED_WATCHPOINTS | |||
| settings.ENABLE_RECOMMENDED_WATCHPOINTS = True | |||
| try: | |||
| with gpu_debugger_client.get_thread_instance(): | |||
| check_waiting_state(app_client) | |||
| # send run command to get watchpoint hit | |||
| url = 'control' | |||
| body_data = {'mode': 'continue'} | |||
| body_data.update(params) | |||
| res = get_request_result(app_client, url, body_data) | |||
| assert res == {'metadata': {'state': 'running', 'enable_recheck': False}} | |||
| # get metadata | |||
| check_waiting_state(app_client) | |||
| url = 'retrieve' | |||
| body_data = {'mode': 'all'} | |||
| send_and_compare_result(app_client, url, body_data, expect_file) | |||
| send_terminate_cmd(app_client) | |||
| finally: | |||
| settings.ENABLE_RECOMMENDED_WATCHPOINTS = original_value | |||
| @pytest.mark.level0 | |||
| @pytest.mark.env_single | |||
| @pytest.mark.platform_x86_cpu | |||
| @pytest.mark.platform_arm_ascend_training | |||
| @pytest.mark.platform_x86_gpu_training | |||
| @pytest.mark.platform_x86_ascend_training | |||
| @pytest.mark.parametrize("body_data, expect_file", [ | |||
| ({'tensor_name': 'Default/TransData-op99:0', 'graph_name': 'graph_0'}, 'retrieve_tensor_hits-0.json'), | |||
| ({'tensor_name': 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias:0', 'graph_name': 'graph_0'}, | |||
| 'retrieve_tensor_hits-1.json') | |||
| ]) | |||
| def test_retrieve_tensor_hits(self, app_client, body_data, expect_file): | |||
| """Test retrieve tensor graph.""" | |||
| url = 'tensor_hits' | |||
| with self._debugger_client.get_thread_instance(): | |||
| check_waiting_state(app_client) | |||
| send_and_compare_result(app_client, url, body_data, expect_file, method='GET') | |||
| send_terminate_cmd(app_client) | |||
| def create_watchpoint(app_client, condition, expect_id): | |||
| """Create watchpoint.""" | |||
| url = 'create_watchpoint' | |||
| body_data = {'condition': condition, | |||
| 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7', | |||
| 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias', | |||
| 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias', | |||
| 'Default/TransData-op99']} | |||
| res = get_request_result(app_client, url, body_data) | |||
| assert res.get('id') == expect_id | |||
| def create_watchpoint_and_wait(app_client): | |||
| """Preparation for recheck.""" | |||
| check_waiting_state(app_client) | |||
| create_watchpoint(app_client, condition={'id': 'inf', 'params': []}, expect_id=1) | |||
| # send run command to get watchpoint hit | |||
| url = 'control' | |||
| body_data = {'mode': 'continue', | |||
| 'steps': 2} | |||
| res = get_request_result(app_client, url, body_data) | |||
| assert res == {'metadata': {'state': 'running', 'enable_recheck': False}} | |||
| # wait for server has received watchpoint hit | |||
| check_waiting_state(app_client) | |||
| @@ -27,19 +27,24 @@ def check_waiting_state(app_client): | |||
| body_data = {'mode': 'all'} | |||
| max_try_times = 30 | |||
| count = 0 | |||
| flag = False | |||
| while count < max_try_times: | |||
| res = get_request_result(app_client, url, body_data) | |||
| state = res.get('metadata', {}).get('state') | |||
| if state == 'waiting': | |||
| return True | |||
| flag = True | |||
| break | |||
| count += 1 | |||
| time.sleep(0.1) | |||
| return False | |||
| assert flag is True | |||
| def get_request_result(app_client, url, body_data, method='post', expect_code=200): | |||
| def get_request_result(app_client, url, body_data, method='post', expect_code=200, full_url=False): | |||
| """Get request result.""" | |||
| real_url = os.path.join(DEBUGGER_BASE_URL, url) | |||
| if not full_url: | |||
| real_url = os.path.join(DEBUGGER_BASE_URL, url) | |||
| else: | |||
| real_url = url | |||
| if method == 'post': | |||
| response = app_client.post(real_url, data=json.dumps(body_data)) | |||
| else: | |||
| @@ -50,9 +55,9 @@ def get_request_result(app_client, url, body_data, method='post', expect_code=20 | |||
| return res | |||
| def send_and_compare_result(app_client, url, body_data, expect_file=None, method='post'): | |||
| def send_and_compare_result(app_client, url, body_data, expect_file=None, method='post', full_url=False): | |||
| """Send and compare result.""" | |||
| res = get_request_result(app_client, url, body_data, method=method) | |||
| res = get_request_result(app_client, url, body_data, method=method, full_url=full_url) | |||
| delete_random_items(res) | |||
| if expect_file: | |||
| real_path = os.path.join(DEBUGGER_EXPECTED_RESULTS, 'restful_results', expect_file) | |||
| @@ -18,8 +18,6 @@ import os | |||
| from google.protobuf import json_format | |||
| from mindinsight.datavisual.data_transform.graph import NodeTypeEnum | |||
| from mindinsight.debugger.common.utils import NodeBasicInfo | |||
| from mindinsight.debugger.proto import ms_graph_pb2 | |||
| from mindinsight.debugger.stream_handler.graph_handler import GraphHandler | |||
| from mindinsight.debugger.stream_handler.watchpoint_handler import WatchpointHitHandler | |||
| @@ -46,7 +44,7 @@ def init_graph_handler(): | |||
| """Init GraphHandler.""" | |||
| graph = get_graph_proto() | |||
| graph_handler = GraphHandler() | |||
| graph_handler.put(graph) | |||
| graph_handler.put({graph.name: graph}) | |||
| return graph_handler | |||
| @@ -64,16 +62,10 @@ def get_node_basic_infos(node_names): | |||
| if not node_names: | |||
| return [] | |||
| graph_stream = init_graph_handler() | |||
| graph_name = graph_stream.graph_names[0] | |||
| node_infos = [] | |||
| for node_name in node_names: | |||
| node_type = graph_stream.get_node_type(node_name) | |||
| if node_type == NodeTypeEnum.AGGREGATION_SCOPE.value: | |||
| sub_nodes = graph_stream.get_nodes_by_scope(node_name) | |||
| sub_infos = [NodeBasicInfo(name=node.name, full_name=node.full_name, type=node.type) | |||
| for node in sub_nodes] | |||
| node_infos.extend(sub_infos) | |||
| full_name = graph_stream.get_full_name(node_name) | |||
| node_infos.append(NodeBasicInfo(name=node_name, full_name=full_name, type=node_type)) | |||
| node_infos.append(graph_stream.get_node_basic_info(node_name, graph_name)) | |||
| return node_infos | |||
| @@ -81,13 +73,10 @@ def get_watch_nodes_by_search(watch_nodes): | |||
| """Get watched leaf nodes by search name.""" | |||
| watched_leaf_nodes = [] | |||
| graph_stream = init_graph_handler() | |||
| graph_name = graph_stream.graph_names[0] | |||
| for search_name in watch_nodes: | |||
| search_nodes = graph_stream.get_searched_node_list() | |||
| search_node_names = [ | |||
| NodeBasicInfo(name=node.name, full_name=node.full_name, type=node.type) | |||
| for node in search_nodes | |||
| if node.name.startswith(search_name)] | |||
| watched_leaf_nodes.extend(search_node_names) | |||
| search_node_info = graph_stream.get_node_basic_info_by_scope(search_name, graph_name) | |||
| watched_leaf_nodes.extend(search_node_info) | |||
| return watched_leaf_nodes | |||
| @@ -141,7 +130,7 @@ def mock_tensor_history(): | |||
| return tensor_history | |||
| def compare_debugger_result_with_file(res, expect_file): | |||
| def compare_debugger_result_with_file(res, expect_file, save=False): | |||
| """ | |||
| Compare debugger result with file. | |||
| @@ -150,4 +139,8 @@ def compare_debugger_result_with_file(res, expect_file): | |||
| expect_file: The expected file name. | |||
| """ | |||
| real_path = os.path.join(DEBUGGER_EXPECTED_RESULTS, expect_file) | |||
| compare_result_with_file(res, real_path) | |||
| if save: | |||
| with open(real_path, 'w') as file_handler: | |||
| json.dump(res, file_handler) | |||
| else: | |||
| compare_result_with_file(res, real_path) | |||
| @@ -1 +1 @@ | |||
| {"metadata": {"state": "waiting", "step": 0, "device_name": "", "pos": "0", "ip": "", "node_name": "", "backend": ""}, "graph": {}, "watch_points": []} | |||
| {"metadata": {"state": "waiting", "step": 0, "device_name": "", "pos": "0", "ip": "", "node_name": "", "backend": "", "enable_recheck": false, "graph_name": ""}, "graph": {}, "watch_points": []} | |||
| @@ -1 +1,36 @@ | |||
| {"tensor_history": [{"name": "Default/TransData-op99:0", "full_name": "Default/TransData-op99:0", "node_type": "TransData", "type": "output", "step": 0, "dtype": "DT_FLOAT32", "shape": [2, 3], "has_prev_step": false, "value": "click to view"}, {"name": "Default/args0:0", "full_name": "Default/args0:0", "node_type": "Parameter", "type": "input", "step": 0, "dtype": "DT_FLOAT32", "shape": [2, 3], "has_prev_step": false, "value": "click to view"}], "metadata": {"state": "waiting", "step": 0, "device_name": "", "pos": "0", "ip": "", "node_name": "", "backend": ""}} | |||
| { | |||
| "tensor_history": [ | |||
| { | |||
| "name": "Default/TransData-op99:0", | |||
| "full_name": "Default/TransData-op99:0", | |||
| "node_type": "TransData", | |||
| "type": "output", | |||
| "step": 0, | |||
| "dtype": "DT_FLOAT32", | |||
| "shape": [ | |||
| 2, | |||
| 3 | |||
| ], | |||
| "has_prev_step": false, | |||
| "value": "click to view" | |||
| }, | |||
| { | |||
| "name": "Default/args0:0", | |||
| "full_name": "Default/args0:0", | |||
| "node_type": "Parameter", | |||
| "type": "input", | |||
| "step": 0, | |||
| "dtype": "DT_FLOAT32", | |||
| "shape": [ | |||
| 2, | |||
| 3 | |||
| ], | |||
| "has_prev_step": false, | |||
| "value": "click to view" | |||
| } | |||
| ], | |||
| "metadata": { | |||
| "state": "waiting", | |||
| "step": 0 | |||
| } | |||
| } | |||
| @@ -0,0 +1,197 @@ | |||
| { | |||
| "graph": { | |||
| "nodes": [ | |||
| { | |||
| "name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/args1", | |||
| "full_name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/args1", | |||
| "type": "Parameter", | |||
| "input": {}, | |||
| "output": { | |||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0": { | |||
| "shape": [ | |||
| [ | |||
| 32 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_INT32]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| } | |||
| ] | |||
| }, | |||
| { | |||
| "name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst1", | |||
| "full_name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst1", | |||
| "type": "Const", | |||
| "input": {}, | |||
| "output": { | |||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| } | |||
| ] | |||
| }, | |||
| { | |||
| "name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst2", | |||
| "full_name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst2", | |||
| "type": "Const", | |||
| "input": {}, | |||
| "output": { | |||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| } | |||
| ] | |||
| }, | |||
| { | |||
| "name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/SoftmaxCrossEntropyWithLogits-op18", | |||
| "full_name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/SoftmaxCrossEntropyWithLogits-op18", | |||
| "type": "SoftmaxCrossEntropyWithLogits", | |||
| "input": { | |||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 10 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "output": {}, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| }, | |||
| { | |||
| "slot": "1" | |||
| } | |||
| ] | |||
| }, | |||
| { | |||
| "name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0", | |||
| "full_name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0", | |||
| "type": "OneHot", | |||
| "input": { | |||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/args1": { | |||
| "shape": [ | |||
| [ | |||
| 32 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_INT32]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| }, | |||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst1": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| }, | |||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst2": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "output": { | |||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/SoftmaxCrossEntropyWithLogits-op18": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 10 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| } | |||
| ] | |||
| } | |||
| ] | |||
| } | |||
| } | |||
| @@ -0,0 +1,176 @@ | |||
| { | |||
| "graph": { | |||
| "nodes": [ | |||
| { | |||
| "name": "Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Cast-op201", | |||
| "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Cast-op201", | |||
| "type": "Cast", | |||
| "input": {}, | |||
| "output": { | |||
| "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLUV2-op89": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 16, | |||
| 10, | |||
| 10 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| } | |||
| ] | |||
| }, | |||
| { | |||
| "name": "Default/tuple_getitem[10]_0/tuple_getitem-op203", | |||
| "full_name": "Default/tuple_getitem-op203", | |||
| "type": "tuple_getitem", | |||
| "input": { | |||
| "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLUV2-op89": { | |||
| "shape": [ | |||
| [], | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TUPLE['DT_TENSOR[DT_FLOAT32]', 'DT_TENSOR[DT_UINT8]']", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ], | |||
| [ | |||
| "1", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "output": {}, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| } | |||
| ] | |||
| }, | |||
| { | |||
| "name": "Default/tuple_getitem[10]_0/tuple_getitem-op202", | |||
| "full_name": "Default/tuple_getitem-op202", | |||
| "type": "tuple_getitem", | |||
| "input": { | |||
| "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLUV2-op89": { | |||
| "shape": [ | |||
| [], | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TUPLE['DT_TENSOR[DT_FLOAT32]', 'DT_TENSOR[DT_UINT8]']", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ], | |||
| [ | |||
| "1", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "output": {}, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| } | |||
| ] | |||
| }, | |||
| { | |||
| "name": "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLUV2-op89", | |||
| "full_name": "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLUV2-op89", | |||
| "type": "ReLUV2", | |||
| "input": { | |||
| "Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Cast-op201": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 16, | |||
| 10, | |||
| 10 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "output": { | |||
| "Default/tuple_getitem[10]_0/tuple_getitem-op203": { | |||
| "shape": [ | |||
| [], | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TUPLE['DT_TENSOR[DT_FLOAT32]', 'DT_TENSOR[DT_UINT8]']", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ], | |||
| [ | |||
| "1", | |||
| "" | |||
| ] | |||
| ] | |||
| }, | |||
| "Default/tuple_getitem[10]_0/tuple_getitem-op202": { | |||
| "shape": [ | |||
| [], | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TUPLE['DT_TENSOR[DT_FLOAT32]', 'DT_TENSOR[DT_UINT8]']", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ], | |||
| [ | |||
| "1", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| }, | |||
| { | |||
| "slot": "1" | |||
| } | |||
| ] | |||
| } | |||
| ] | |||
| } | |||
| } | |||
| @@ -0,0 +1,166 @@ | |||
| { | |||
| "graph": { | |||
| "nodes": [ | |||
| { | |||
| "name": "Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/MaxPoolWithArgmax-op7", | |||
| "full_name": "Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/MaxPoolWithArgmax-op7", | |||
| "type": "MaxPoolWithArgmax", | |||
| "input": {}, | |||
| "output": { | |||
| "Default/tuple_getitem[10]_0/tuple_getitem-op206": { | |||
| "shape": [ | |||
| [], | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TUPLE['DT_TENSOR[DT_FLOAT32]', 'DT_TENSOR[DT_UINT16]']", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ], | |||
| [ | |||
| "1", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| }, | |||
| { | |||
| "slot": "1" | |||
| } | |||
| ] | |||
| }, | |||
| { | |||
| "name": "Default/tuple_getitem[10]_0/cst28", | |||
| "full_name": "Default/tuple_getitem[10]_0/cst28", | |||
| "type": "Const", | |||
| "input": {}, | |||
| "output": { | |||
| "Default/tuple_getitem[10]_0/tuple_getitem-op206": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| } | |||
| ] | |||
| }, | |||
| { | |||
| "name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPoolWithArgmax/MaxPoolGradWithArgmax-op46", | |||
| "full_name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPoolWithArgmax/MaxPoolGradWithArgmax-op46", | |||
| "type": "MaxPoolGradWithArgmax", | |||
| "input": { | |||
| "Default/tuple_getitem[10]_0/tuple_getitem-op206": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 16, | |||
| 4, | |||
| 3 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_UINT16]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "output": {}, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| } | |||
| ] | |||
| }, | |||
| { | |||
| "name": "Default/tuple_getitem[10]_0/tuple_getitem-op206", | |||
| "full_name": "Default/tuple_getitem-op206", | |||
| "type": "tuple_getitem", | |||
| "input": { | |||
| "Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/MaxPoolWithArgmax-op7": { | |||
| "shape": [ | |||
| [], | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TUPLE['DT_TENSOR[DT_FLOAT32]', 'DT_TENSOR[DT_UINT16]']", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ], | |||
| [ | |||
| "1", | |||
| "" | |||
| ] | |||
| ] | |||
| }, | |||
| "Default/tuple_getitem[10]_0/cst28": { | |||
| "shape": [ | |||
| [] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "output": { | |||
| "Gradients/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPoolWithArgmax/MaxPoolGradWithArgmax-op46": { | |||
| "shape": [ | |||
| [ | |||
| 32, | |||
| 16, | |||
| 4, | |||
| 3 | |||
| ] | |||
| ], | |||
| "edge_type": "data", | |||
| "independent_layout": false, | |||
| "data_type": "DT_TENSOR[DT_UINT16]", | |||
| "slot_mapping": [ | |||
| [ | |||
| "0", | |||
| "" | |||
| ] | |||
| ] | |||
| } | |||
| }, | |||
| "slots": [ | |||
| { | |||
| "slot": "0" | |||
| } | |||
| ] | |||
| } | |||
| ] | |||
| } | |||
| } | |||
| @@ -0,0 +1 @@ | |||
| {"node_names": ["Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/conv2.weight", "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/conv1.weight", "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.weight", "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.bias", "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.weight", "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.bias", "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.weight", "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.bias"]} | |||
| @@ -0,0 +1 @@ | |||
| {"node_names": ["Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op12", "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op15"]} | |||
| @@ -1 +0,0 @@ | |||
| {"tensor_history": [{"name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190:0", "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190:0", "node_type": "Cast", "type": "output"}, {"name": "Default/TransData-op99:0", "full_name": "Default/TransData-op99:0", "node_type": "TransData", "type": "input"}]} | |||
| @@ -0,0 +1,18 @@ | |||
| { | |||
| "tensor_history": [ | |||
| { | |||
| "name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190:0", | |||
| "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190:0", | |||
| "node_type": "Cast", | |||
| "type": "output", | |||
| "graph_name": "kernel_graph_0" | |||
| }, | |||
| { | |||
| "name": "Default/TransData-op99:0", | |||
| "full_name": "Default/TransData-op99:0", | |||
| "node_type": "TransData", | |||
| "type": "input", | |||
| "graph_name": "kernel_graph_0" | |||
| } | |||
| ] | |||
| } | |||
| @@ -1 +1,53 @@ | |||
| {"tensor_history": [{"name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22:0", "full_name": "Default/optimizer-Momentum/ApplyMomentum-op22:0", "node_type": "ApplyMomentum", "type": "output"}, {"name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22:1", "full_name": "Default/optimizer-Momentum/ApplyMomentum-op22:1", "node_type": "ApplyMomentum", "type": "output"}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op21:0", "full_name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op21:0", "node_type": "BiasAddGrad", "type": "input"}, {"name": "Default/optimizer-Momentum/Parameter[18]_7/fc3.bias:0", "full_name": "Default/optimizer-Momentum/Parameter[18]_7/fc3.bias:0", "node_type": "Parameter", "type": "input"}, {"name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias:0", "full_name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias:0", "node_type": "Parameter", "type": "input"}, {"name": "Default/optimizer-Momentum/Parameter[18]_7/learning_rate:0", "full_name": "Default/optimizer-Momentum/Parameter[18]_7/learning_rate:0", "node_type": "Parameter", "type": "input"}, {"name": "Default/optimizer-Momentum/Parameter[18]_7/momentum:0", "full_name": "Default/optimizer-Momentum/Parameter[18]_7/momentum:0", "node_type": "Parameter", "type": "input"}]} | |||
| { | |||
| "tensor_history": [ | |||
| { | |||
| "name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22:0", | |||
| "full_name": "Default/optimizer-Momentum/ApplyMomentum-op22:0", | |||
| "node_type": "ApplyMomentum", | |||
| "type": "output", | |||
| "graph_name": "kernel_graph_0" | |||
| }, | |||
| { | |||
| "name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22:1", | |||
| "full_name": "Default/optimizer-Momentum/ApplyMomentum-op22:1", | |||
| "node_type": "ApplyMomentum", | |||
| "type": "output", | |||
| "graph_name": "kernel_graph_0" | |||
| }, | |||
| { | |||
| "name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op21:0", | |||
| "full_name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op21:0", | |||
| "node_type": "BiasAddGrad", | |||
| "type": "input", | |||
| "graph_name": "kernel_graph_0" | |||
| }, | |||
| { | |||
| "name": "Default/optimizer-Momentum/Parameter[18]_7/fc3.bias:0", | |||
| "full_name": "Default/optimizer-Momentum/Parameter[18]_7/fc3.bias:0", | |||
| "node_type": "Parameter", | |||
| "type": "input", | |||
| "graph_name": "kernel_graph_0" | |||
| }, | |||
| { | |||
| "name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias:0", | |||
| "full_name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias:0", | |||
| "node_type": "Parameter", | |||
| "type": "input", | |||
| "graph_name": "kernel_graph_0" | |||
| }, | |||
| { | |||
| "name": "Default/optimizer-Momentum/Parameter[18]_7/learning_rate:0", | |||
| "full_name": "Default/optimizer-Momentum/Parameter[18]_7/learning_rate:0", | |||
| "node_type": "Parameter", | |||
| "type": "input", | |||
| "graph_name": "kernel_graph_0" | |||
| }, | |||
| { | |||
| "name": "Default/optimizer-Momentum/Parameter[18]_7/momentum:0", | |||
| "full_name": "Default/optimizer-Momentum/Parameter[18]_7/momentum:0", | |||
| "node_type": "Parameter", | |||
| "type": "input", | |||
| "graph_name": "kernel_graph_0" | |||
| } | |||
| ] | |||
| } | |||
| @@ -3,31 +3,23 @@ | |||
| "watchCondition": { | |||
| "condition": "inf" | |||
| }, | |||
| "id": 1 | |||
| "id": 1, | |||
| "watch_nodes_num": 0 | |||
| }, | |||
| { | |||
| "watchCondition": { | |||
| "condition": "inf" | |||
| }, | |||
| "id": 2, | |||
| "watchNodes": [ | |||
| { | |||
| "nodeName": "Default", | |||
| "nodeType": "scope" | |||
| } | |||
| ] | |||
| "watch_nodes_num": 172 | |||
| }, | |||
| { | |||
| "watchCondition": { | |||
| "condition": "max_gt", | |||
| "value": 1.0 | |||
| "params": [{"name": "param", "value": 1}], | |||
| "value": 1 | |||
| }, | |||
| "id": 3, | |||
| "watchNodes": [ | |||
| { | |||
| "nodeName": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92", | |||
| "nodeType": "leaf" | |||
| } | |||
| ] | |||
| "watch_nodes_num": 1 | |||
| } | |||
| ] | |||
| @@ -1 +1 @@ | |||
| [{"id": 1, "watch_condition": {"condition": "INF"}}, {"id": 2, "watch_condition": {"condition": "INF"}}, {"id": 3, "watch_condition": {"condition": "MAX_GT", "param": 1}}] | |||
| [{"id": 1, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}, {"id": 2, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}, {"id": 3, "watch_condition": {"id": "max_gt", "params": [{"name": "param", "value": 1, "disable": false}], "abbr": "MAX>"}}] | |||
| @@ -1 +1,22 @@ | |||
| {"watch_point_hits": [{"node_name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92", "watch_points": [{"id": 1, "watch_condition": {"condition": "MAX_GT", "param": 1}}]}]} | |||
| { | |||
| "watch_point_hits": [ | |||
| { | |||
| "node_name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92", | |||
| "tensors": [ | |||
| { | |||
| "slot": "0", | |||
| "watch_points": [ | |||
| { | |||
| "id": 1, | |||
| "watch_condition": { | |||
| "condition": "MAX_GT", | |||
| "param": 1 | |||
| } | |||
| } | |||
| ] | |||
| } | |||
| ], | |||
| "graph_name": "kernel_graph_0" | |||
| } | |||
| ] | |||
| } | |||
| @@ -0,0 +1,15 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Test for debugger stream cache.""" | |||
| @@ -0,0 +1,77 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Function: | |||
| Test query debugger node type identifier. | |||
| Usage: | |||
| pytest tests/ut/debugger | |||
| """ | |||
| from unittest.mock import MagicMock | |||
| import pytest | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | |||
| from mindinsight.debugger.stream_cache.node_type_identifier import NodeTypeIdentifier | |||
| class TestNodeTypeIdentifier: | |||
| """Test NodeTypeIdentifier.""" | |||
| @pytest.mark.parametrize("name, node_type, result", [ | |||
| ('Default/mock/node_name.bias', "Parameter", True), | |||
| ('Default/mock/node_name.weight', "Parameter", True), | |||
| ('Gradients/mock/node_name.bias', "Parameter", False), | |||
| ('Default/optimizer-mock/node_name.bias', "Parameter", False), | |||
| ]) | |||
| def test_weight_node(self, name, node_type, result): | |||
| """Test weight node.""" | |||
| identifier = NodeTypeIdentifier('weight') | |||
| mock_node = MagicMock(type=node_type) | |||
| mock_node.name = name | |||
| res = identifier.is_match(mock_node) | |||
| assert res is result | |||
| @pytest.mark.parametrize("name, node_type, result", [ | |||
| ('Default/mock/node_name.bias', "Parameter", False), | |||
| ('Gradients/mock/node_name.bias', "Parameter", False), | |||
| ('Gradients/mock-mock/node_name.bias', "ReluGrad", True), | |||
| ]) | |||
| def test_gradient_node(self, name, node_type, result): | |||
| """Test gradient node.""" | |||
| identifier = NodeTypeIdentifier('gradient') | |||
| mock_node = MagicMock(type=node_type) | |||
| mock_node.name = name | |||
| res = identifier.is_match(mock_node) | |||
| assert res is result | |||
| @pytest.mark.parametrize("name, node_type, condition, result", [ | |||
| ('Default/mock/relu_ReLU-op11', "ReLU", None, True), | |||
| ('Gradients/mock/relu_ReLU-op11', "ReLU", None, False), | |||
| ('Default/mock/relu_ReLU-op11', "Parameter", None, False), | |||
| ('Default/mock/relu_ReLU-op11', "ReLU", {'activation_func': 'Softmax'}, False), | |||
| ('Default/mock/relu_ReLU-op11', "Softmax", {'activation_func': ['ReLU', 'Softmax']}, True) | |||
| ]) | |||
| def test_activate_node(self, name, node_type, condition, result): | |||
| """Test activate node.""" | |||
| identifier = NodeTypeIdentifier('activation') | |||
| mock_node = MagicMock(type=node_type) | |||
| mock_node.name = name | |||
| res = identifier.is_match(mock_node, condition) | |||
| assert res is result | |||
| def test_invalid_func(self): | |||
| """Test invalid func.""" | |||
| with pytest.raises(DebuggerParamValueError, match='Invalid identify type.'): | |||
| NodeTypeIdentifier('invalid_type') | |||
| @@ -22,7 +22,7 @@ import os | |||
| import pytest | |||
| from tests.ut.debugger.configurations import init_graph_handler | |||
| from tests.ut.debugger.configurations import init_graph_handler, compare_debugger_result_with_file | |||
| from tests.utils.tools import compare_result_with_file | |||
| @@ -46,11 +46,12 @@ class TestGraphHandler: | |||
| """Test get.""" | |||
| result = self.graph_handler.get(filter_condition) | |||
| file_path = os.path.join(self.graph_results_dir, result_file) | |||
| compare_debugger_result_with_file(result, file_path, True) | |||
| compare_result_with_file(result, file_path) | |||
| @pytest.mark.parametrize("node_name, result_file", [ | |||
| ("Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", | |||
| "tenor_hist_0.json"), | |||
| "tensor_hist_0.json"), | |||
| ("Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22", | |||
| "tensor_hist_1.json") | |||
| ]) | |||
| @@ -66,10 +67,22 @@ class TestGraphHandler: | |||
| ]) | |||
| def test_search_nodes(self, pattern, result_file): | |||
| """Test search nodes.""" | |||
| result = self.graph_handler.search_nodes(pattern) | |||
| result = self.graph_handler.search_nodes({'name': pattern}) | |||
| file_path = os.path.join(self.graph_results_dir, result_file) | |||
| compare_result_with_file(result, file_path) | |||
| @pytest.mark.parametrize("node_type, condition, result_file", [ | |||
| ("weight", None, "search_nodes_by_type_0.json"), | |||
| ("activation", {'activation_func': ['ReLU', 'Softmax']}, "search_nodes_by_type_1.json") | |||
| ]) | |||
| def test_search_nodes_by_type(self, node_type, condition, result_file): | |||
| """Test search nodes by type.""" | |||
| search_nodes = self.graph_handler.get_searched_node_list( | |||
| {'node_category': node_type, 'condition': condition}, 'kernel_graph_0') | |||
| file_path = os.path.join(self.graph_results_dir, result_file) | |||
| result = {'node_names': [node.name for node in search_nodes]} | |||
| compare_result_with_file(result, file_path) | |||
| @pytest.mark.parametrize("node_name, expect_type", [ | |||
| ("Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst1", 'Const'), | |||
| ("Default/TransData-op99", "TransData") | |||
| @@ -96,7 +109,7 @@ class TestGraphHandler: | |||
| ]) | |||
| def test_get_node_name_by_full_name(self, full_name, expect_node_name): | |||
| """Test get node name by full name.""" | |||
| node_name = self.graph_handler.get_node_name_by_full_name(full_name) | |||
| node_name = self.graph_handler.get_node_name_by_full_name(full_name, 'kernel_graph_0') | |||
| assert node_name == expect_node_name | |||
| @pytest.mark.parametrize("node_name, ascend, expect_next", [ | |||
| @@ -112,3 +125,13 @@ class TestGraphHandler: | |||
| """Test get node by BFS order.""" | |||
| next_node = self.graph_handler.get_node_by_bfs_order(node_name, ascend) | |||
| assert next_node == expect_next | |||
| @pytest.mark.parametrize("tensor_name, expect_file", [ | |||
| ("Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0:0", "get_tensor_graph-0.json"), | |||
| ("Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLUV2-op89:1", "get_tensor_graph-1.json"), | |||
| ("Default/tuple_getitem[10]_0/tuple_getitem-op206:1", "get_tensor_graph-2.json"), | |||
| ]) | |||
| def test_get_tensor_graph(self, tensor_name, expect_file): | |||
| """Test get tensor graph.""" | |||
| res = self.graph_handler.get_tensor_graph(tensor_name, None) | |||
| compare_debugger_result_with_file(res, expect_file=os.path.join('graph', expect_file)) | |||
| @@ -14,11 +14,10 @@ | |||
| # ============================================================================ | |||
| """Test tensor_handler.py""" | |||
| from unittest import mock | |||
| from unittest.mock import MagicMock | |||
| import pytest | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | |||
| from mindinsight.debugger.common.log import logger as log | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.stream_handler.tensor_handler import TensorHandler | |||
| @@ -41,34 +40,6 @@ class TestTensorHandler: | |||
| self.tensor_handler.get(filter_condition) | |||
| assert "No tensor named {}".format(None) in str(ex.value) | |||
| @mock.patch.object(TensorHandler, '_get_prev_tensor_value_status') | |||
| @pytest.mark.parametrize( | |||
| "node_type, tensor_name, tensor_info", [('Parameter', 'name', {'full_name': 'name', 'step': 1})]) | |||
| def test_update_has_prev_step_field(self, mock_get_pre, node_type, tensor_name, tensor_info): | |||
| """Test update has_prev_step field in tensor info.""" | |||
| mock_get_pre.return_value = True | |||
| res = self.tensor_handler._update_has_prev_step_field(tensor_info, tensor_name, node_type) | |||
| assert res | |||
| def test_get_prev_tensor_value_status_none(self): | |||
| """ | |||
| test _get_prev_tensor_value_status. | |||
| """ | |||
| res = self.tensor_handler._get_prev_tensor_value_status('tensor_name') | |||
| assert res is None | |||
| @mock.patch.object(TensorHandler, '_get_tensor') | |||
| def test_get_prev_tensor_value_status_false(self, mock_get_tensor): | |||
| """ | |||
| test _get_prev_tensor_value_status. | |||
| """ | |||
| self.tensor_handler._cur_step = 1 | |||
| mock_tensor = MagicMock() | |||
| mock_tensor.value = None | |||
| mock_get_tensor.return_value = mock_tensor | |||
| res = self.tensor_handler._get_prev_tensor_value_status('tensor_name') | |||
| assert not res | |||
| def test_get_tensor_value_by_name_none(self): | |||
| """Test get_tensor_value_by_name.""" | |||
| res = self.tensor_handler.get_tensor_value_by_name('tensor_name', True) | |||
| @@ -22,47 +22,56 @@ import json | |||
| import os | |||
| from unittest import mock, TestCase | |||
| from google.protobuf import json_format | |||
| import pytest | |||
| from google.protobuf import json_format | |||
| from mindinsight.conditionmgr.conditionmgr import ConditionMgr | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | |||
| DebuggerParamTypeError | |||
| from mindinsight.debugger.common.log import logger as log | |||
| from mindinsight.debugger.common.log import LOGGER as log | |||
| from mindinsight.debugger.stream_cache.watchpoint import Watchpoint | |||
| from mindinsight.debugger.stream_handler.watchpoint_handler import WatchpointHandler, \ | |||
| WatchpointHitHandler, validate_watch_condition, validate_watch_condition_params | |||
| from tests.ut.debugger.configurations import init_graph_handler, mock_tensor_proto, \ | |||
| mock_tensor_history, get_node_basic_infos, get_watch_nodes_by_search, \ | |||
| mock_tensor_history, get_node_basic_infos, \ | |||
| init_watchpoint_hit_handler | |||
| from tests.utils.tools import compare_result_with_file | |||
| class TestWatchpointHandler: | |||
| """Test WatchpointHandler.""" | |||
| @classmethod | |||
| def setup_class(cls): | |||
| """Init WatchpointHandler for watchpoint unittest.""" | |||
| cls.handler = WatchpointHandler() | |||
| cls.results_dir = os.path.join(os.path.dirname(__file__), | |||
| '../expected_results/watchpoint') | |||
| cls.graph_results_dir = os.path.join(os.path.dirname(__file__), | |||
| '../expected_results/graph') | |||
| cls.graph_stream = init_graph_handler() | |||
| cls.conditionmgr = None | |||
| cls.handler = None | |||
| @pytest.mark.parametrize( | |||
| "watch_condition, watch_nodes, watch_point_id, expect_new_id", [ | |||
| ({'condition': 'INF'}, None, None, 1), | |||
| ({'condition': 'INF'}, ["Default"], None, 2), | |||
| ({'condition': 'MAX_GT', 'param': 1}, | |||
| ["Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92"], None, 3) | |||
| ]) | |||
| def test_create_watchpoint(self, watch_condition, watch_nodes, | |||
| watch_point_id, expect_new_id): | |||
| def setup_method(self): | |||
| """Init watchpoint for each unittest.""" | |||
| self.conditionmgr = ConditionMgr() | |||
| self.handler = WatchpointHandler() | |||
| self._create_watchpoint() | |||
| def _create_watchpoint(self): | |||
| """Test create_watchpoint.""" | |||
| watch_nodes = get_node_basic_infos(watch_nodes) | |||
| watch_point_id = self.handler.create_watchpoint(watch_condition, watch_nodes, watch_point_id) | |||
| assert watch_point_id == expect_new_id | |||
| watchpoints = [ | |||
| ({'id': 'inf', 'params': []}, None, None, 1), | |||
| ({'id': 'inf', 'params': []}, ["Default"], None, 2), | |||
| ({'id': 'max_gt', 'params': [{'name': 'param', 'value': 1, 'disable': False}]}, | |||
| ["Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92"], | |||
| None, 3) | |||
| ] | |||
| for watch_condition, watch_nodes, watch_point_id, expect_new_id in watchpoints: | |||
| watch_nodes = get_node_basic_infos(watch_nodes) | |||
| watch_point_id = self.handler.create_watchpoint(self.conditionmgr, watch_condition, watch_nodes, | |||
| watch_point_id) | |||
| assert watch_point_id == expect_new_id | |||
| @pytest.mark.parametrize( | |||
| "watch_point_id, watch_nodes, watched, expect_updated_id", [ | |||
| @@ -84,31 +93,28 @@ class TestWatchpointHandler: | |||
| ]) | |||
| def test_update_watchpoint_delete(self, watch_point_id, watch_nodes, watched, expect_updated_id): | |||
| """Test update_watchpoint on deletion.""" | |||
| watch_nodes = get_watch_nodes_by_search(watch_nodes) | |||
| watch_nodes = get_node_basic_infos(watch_nodes) | |||
| with TestCase().assertLogs(logger=log, level='DEBUG') as log_content: | |||
| self.handler.update_watchpoint(watch_point_id, watch_nodes, watched) | |||
| TestCase().assertIn(f"DEBUG:debugger.debugger:Update watchpoint {expect_updated_id} in cache.", | |||
| log_content.output) | |||
| @pytest.mark.parametrize("filter_condition, result_file", [ | |||
| (True, 'watchpoint_handler_get_0.json') | |||
| ]) | |||
| def test_get_filter_true(self, filter_condition, result_file): | |||
| def test_get_pending_commands(self): | |||
| """Test get with filter_condition is True.""" | |||
| result_file = 'watchpoint_handler_get_0.json' | |||
| file_path = os.path.join(self.results_dir, result_file) | |||
| with open(file_path, 'r') as f: | |||
| contents = json.load(f) | |||
| reply = self.handler.get(filter_condition) | |||
| protos = reply.get('watch_points') | |||
| with open(file_path, 'r') as file_handler: | |||
| contents = json.load(file_handler) | |||
| protos = self.handler.get_pending_commands(self.graph_stream) | |||
| for proto in protos: | |||
| msg_dict = json_format.MessageToDict(proto) | |||
| msg_dict['watch_nodes_num'] = len(msg_dict.pop('watchNodes', [])) | |||
| assert msg_dict in contents | |||
| @pytest.mark.parametrize("filter_condition, result_file", [ | |||
| (False, 'watchpoint_handler_get_1.json') | |||
| (None, 'watchpoint_handler_get_1.json') | |||
| ]) | |||
| def test_get_filter_false(self, filter_condition, result_file): | |||
| def test_get_without_filter(self, filter_condition, result_file): | |||
| """Test get with filer_condition is False.""" | |||
| file_path = os.path.join(self.results_dir, result_file) | |||
| reply = self.handler.get(filter_condition) | |||
| @@ -121,7 +127,7 @@ class TestWatchpointHandler: | |||
| with pytest.raises(DebuggerParamValueError) as err: | |||
| self.handler.get_watchpoint_by_id(watchpoint_id) | |||
| assert err.value.error_code == '5054B081' | |||
| assert err.value.message == f"ValueError. Invalid watchpoint id {watchpoint_id}" | |||
| assert err.value.message == f"ValueError. Invalid watchpoint id: {watchpoint_id}" | |||
| @pytest.mark.parametrize("graph_file, watch_point_id", [ | |||
| ('graph_handler_get_3_single_node.json', 4) | |||
| @@ -129,20 +135,37 @@ class TestWatchpointHandler: | |||
| def test_set_watch_nodes(self, graph_file, watch_point_id): | |||
| """Test set_watch_nodes.""" | |||
| path = os.path.join(self.graph_results_dir, graph_file) | |||
| with open(path, 'r') as f: | |||
| graph = json.load(f) | |||
| with open(path, 'r') as file_handler: | |||
| graph = json.load(file_handler) | |||
| self.handler.set_watch_nodes(graph, self.graph_stream, watch_point_id) | |||
| @pytest.mark.parametrize( | |||
| "watch_point_id, expect_deleted_ids", [ | |||
| (3, 3), (2, 2) | |||
| (3, 3), (None, 2) | |||
| ]) | |||
| def test_delete_watchpoint(self, watch_point_id, expect_deleted_ids): | |||
| """Test delete_watchpoint.""" | |||
| self.handler.sync_set_cmd({}) | |||
| with TestCase().assertLogs(logger=log, level='DEBUG') as log_content: | |||
| self.handler.delete_watchpoint(watch_point_id) | |||
| TestCase().assertIn(f"DEBUG:debugger.debugger:Delete watchpoint {expect_deleted_ids} in cache.", | |||
| log_content.output) | |||
| TestCase().assertIn( | |||
| f"DEBUG:debugger.debugger:Delete watchpoint {expect_deleted_ids} in cache.", | |||
| log_content.output) | |||
| @pytest.mark.parametrize( | |||
| "watch_point_id, expect_deleted_ids", [ | |||
| (3, 3), (2, 2) | |||
| ]) | |||
| def test_delete_watchpoint_in_cache(self, watch_point_id, | |||
| expect_deleted_ids): | |||
| """Test delete_watchpoint.""" | |||
| for _ in range(watch_point_id): | |||
| self.handler.create_watchpoint(self.conditionmgr, {'id': 'inf', 'param': []}) | |||
| with TestCase().assertLogs(logger=log, level='DEBUG') as log_content: | |||
| self.handler.delete_watchpoint(watch_point_id) | |||
| TestCase().assertIn( | |||
| f"DEBUG:debugger.debugger:Cancel create watchpoint {expect_deleted_ids} in cache.", | |||
| log_content.output) | |||
| class TestWatchpointHitHandler: | |||
| @@ -155,8 +178,7 @@ class TestWatchpointHitHandler: | |||
| 'tensor_proto': mock_tensor_proto(), | |||
| 'watchpoint': watchpoint, | |||
| 'node_name': 'Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92', | |||
| 'finished': True, | |||
| 'slot': 0 | |||
| 'graph_name': 'kernel_graph_0', | |||
| } | |||
| @classmethod | |||
| @@ -198,24 +220,26 @@ class TestWatchpointHitHandler: | |||
| def test_validate_watch_condition_type_error(): | |||
| """Test validate_watch_condition.""" | |||
| watch_condition = [] | |||
| conditionmgr = ConditionMgr() | |||
| with pytest.raises(DebuggerParamTypeError) as err: | |||
| validate_watch_condition(watch_condition) | |||
| validate_watch_condition(conditionmgr, watch_condition) | |||
| assert err.value.error_code == '5054B080' | |||
| watch_condition = {'watch_condition': {'condition': 'MAXIMUM'}} | |||
| with pytest.raises(DebuggerParamValueError) as err: | |||
| validate_watch_condition(watch_condition) | |||
| validate_watch_condition(conditionmgr, watch_condition) | |||
| assert err.value.error_code == '5054B081' | |||
| def test_validate_watch_condition_params_except(): | |||
| """Test validate_watch_condition_params.""" | |||
| watch_condition = {'watch_condition': {'condition': 'NAN', 'param': 1}} | |||
| watch_condition = {'id': 'inf', 'params': [{'name': 'param', 'value': 0, 'disable': False}]} | |||
| conditionmgr = ConditionMgr() | |||
| with pytest.raises(DebuggerParamValueError) as err: | |||
| validate_watch_condition_params(watch_condition) | |||
| validate_watch_condition_params(conditionmgr, watch_condition) | |||
| assert err.value.error_code == '5054B081' | |||
| watch_condition = {'watch_condition': {'condition': 'MAX_GT', 'param': '0'}} | |||
| watch_condition = {'id': 'max_gt', 'params': [{'name': 'param', 'value': '0', 'disable': False}]} | |||
| with pytest.raises(DebuggerParamValueError) as err: | |||
| validate_watch_condition_params(watch_condition) | |||
| validate_watch_condition_params(conditionmgr, watch_condition) | |||
| assert err.value.error_code == '5054B081' | |||
| @@ -23,6 +23,7 @@ from unittest.mock import MagicMock | |||
| import numpy as np | |||
| from mindinsight.conditionmgr.conditionmgr import ConditionMgr | |||
| from mindinsight.debugger.common.utils import get_ack_reply, ServerStatus | |||
| from mindinsight.debugger.debugger_cache import DebuggerCache | |||
| from mindinsight.debugger.debugger_grpc_server import DebuggerGrpcServer | |||
| @@ -117,7 +118,7 @@ class TestDebuggerGrpcServer: | |||
| def setup_method(self): | |||
| """Initialize for each testcase.""" | |||
| cache_store = DebuggerCache() | |||
| self._server = DebuggerGrpcServer(cache_store) | |||
| self._server = DebuggerGrpcServer(cache_store, condition_mgr=ConditionMgr()) | |||
| def test_waitcmd_with_pending_status(self): | |||
| """Test wait command interface when status is pending.""" | |||
| @@ -125,6 +126,7 @@ class TestDebuggerGrpcServer: | |||
| assert res.status == EventReply.Status.FAILED | |||
| @mock.patch.object(WatchpointHitHandler, 'empty', False) | |||
| @mock.patch.object(WatchpointHitHandler, 'put') | |||
| @mock.patch.object(DebuggerGrpcServer, '_deal_with_old_command') | |||
| def test_waitcmd_with_old_command(self, *args): | |||
| """Test wait command interface with old command.""" | |||
| @@ -132,8 +134,8 @@ class TestDebuggerGrpcServer: | |||
| args[0].return_value = old_command | |||
| setattr(self._server, '_status', ServerStatus.WAITING) | |||
| setattr(self._server, '_received_view_cmd', {'node_name': 'mock_node_name'}) | |||
| setattr(self._server, '_received_hit', True) | |||
| res = self._server.WaitCMD(MagicMock(cur_step=1), MagicMock()) | |||
| setattr(self._server, '_received_hit', [MagicMock()]) | |||
| res = self._server.WaitCMD(MagicMock(cur_step=1, cur_node=''), MagicMock()) | |||
| assert res == old_command | |||
| @mock.patch.object(DebuggerGrpcServer, '_deal_with_old_command', return_value=None) | |||
| @@ -143,7 +145,7 @@ class TestDebuggerGrpcServer: | |||
| old_command = MockDataGenerator.get_run_cmd(steps=1) | |||
| args[0].return_value = old_command | |||
| setattr(self._server, '_status', ServerStatus.WAITING) | |||
| res = self._server.WaitCMD(MagicMock(cur_step=1), MagicMock()) | |||
| res = self._server.WaitCMD(MagicMock(cur_step=1, cur_node=''), MagicMock()) | |||
| assert res == old_command | |||
| @mock.patch.object(DebuggerGrpcServer, '_deal_with_old_command', return_value=None) | |||
| @@ -152,7 +154,7 @@ class TestDebuggerGrpcServer: | |||
| """Test wait command interface with next command is None.""" | |||
| args[0].return_value = None | |||
| setattr(self._server, '_status', ServerStatus.RECEIVE_GRAPH) | |||
| res = self._server.WaitCMD(MagicMock(cur_step=1), MagicMock()) | |||
| res = self._server.WaitCMD(MagicMock(cur_step=1, cur_node=''), MagicMock()) | |||
| assert res == get_ack_reply(1) | |||
| @mock.patch.object(DebuggerCache, 'get_command', return_value=(0, None)) | |||
| @@ -228,6 +230,7 @@ class TestDebuggerGrpcServer: | |||
| assert res == get_ack_reply() | |||
| @mock.patch.object(WatchpointHandler, 'get_watchpoint_by_id') | |||
| @mock.patch.object(GraphHandler, 'get_graph_id_by_full_name', return_value='mock_graph_name') | |||
| @mock.patch.object(GraphHandler, 'get_node_name_by_full_name') | |||
| def test_send_watchpoint_hit(self, *args): | |||
| """Test SendWatchpointHits interface.""" | |||
| @@ -28,6 +28,7 @@ import pytest | |||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | |||
| DebuggerCompareTensorError, DebuggerCreateWatchPointError, DebuggerDeleteWatchPointError | |||
| from mindinsight.debugger.common.utils import Streams | |||
| from mindinsight.debugger.debugger_cache import DebuggerCache | |||
| from mindinsight.debugger.debugger_server import DebuggerServer | |||
| from mindinsight.debugger.debugger_server import grpc_server_base | |||
| @@ -81,7 +82,7 @@ class TestDebuggerServer: | |||
| """Test search node.""" | |||
| mock_graph = {'nodes': ['mock_nodes']} | |||
| args[0].return_value = mock_graph | |||
| res = self._server.search('mock_name') | |||
| res = self._server.search({'name': 'mock_name'}) | |||
| assert res == mock_graph | |||
| def test_tensor_comparision_with_wrong_status(self): | |||
| @@ -93,6 +94,7 @@ class TestDebuggerServer: | |||
| @mock.patch.object(MetadataHandler, 'state', 'waiting') | |||
| @mock.patch.object(GraphHandler, 'get_node_type') | |||
| @mock.patch.object(GraphHandler, 'get_graph_id_by_name') | |||
| @mock.patch.object(GraphHandler, 'get_full_name', return_value='mock_node_name') | |||
| def test_tensor_comparision_with_wrong_type(self, *args): | |||
| """Test tensor comparison with wrong type.""" | |||
| @@ -101,6 +103,7 @@ class TestDebuggerServer: | |||
| self._server.tensor_comparisons(name='mock_node_name:0', shape='[:, :]') | |||
| @mock.patch.object(MetadataHandler, 'state', 'waiting') | |||
| @mock.patch.object(GraphHandler, 'get_graph_id_by_name') | |||
| @mock.patch.object(GraphHandler, 'get_node_type', return_value='Parameter') | |||
| @mock.patch.object(GraphHandler, 'get_full_name', return_value='mock_node_name') | |||
| @mock.patch.object(TensorHandler, 'get_tensors_diff') | |||
| @@ -156,7 +159,7 @@ class TestDebuggerServer: | |||
| """Test validate leaf name.""" | |||
| args[0].return_value = 'name_scope' | |||
| with pytest.raises(DebuggerParamValueError, match='Invalid leaf node name.'): | |||
| self._server._validate_leaf_name(node_name='mock_node_name') | |||
| self._server._validate_leaf_name(node_name='mock_node_name', graph_name='mock_graph_name') | |||
| @mock.patch.object(TensorHandler, 'get') | |||
| @mock.patch.object(DebuggerServer, '_get_tensor_name_and_type_by_ui_name') | |||
| @@ -199,40 +202,42 @@ class TestDebuggerServer: | |||
| self._server.create_watchpoint(watch_condition={'condition': 'INF'}) | |||
| @mock.patch.object(MetadataHandler, 'state', 'waiting') | |||
| @mock.patch.object(GraphHandler, 'get_full_name', return_value='mock_full_name') | |||
| @mock.patch.object(GraphHandler, 'get_full_name', return_value='mock_full_name') | |||
| @mock.patch.object(GraphHandler, 'get_nodes_by_scope', return_value=[MagicMock()]) | |||
| @mock.patch.object(GraphHandler, 'get_node_basic_info', return_value=[MagicMock()]) | |||
| @mock.patch.object(GraphHandler, 'get_node_type', return_value='aggregation_scope') | |||
| @mock.patch.object(WatchpointHandler, 'create_watchpoint') | |||
| def test_create_watchpoint(self, *args): | |||
| """Test create watchpoint.""" | |||
| args[0].return_value = 1 | |||
| res = self._server.create_watchpoint({'condition': 'INF'}, ['watch_node_name']) | |||
| assert res == {'id': 1} | |||
| assert res == {'id': 1, 'metadata': {'enable_recheck': False, 'state': 'waiting'}} | |||
| @mock.patch.object(MetadataHandler, 'state', 'waiting') | |||
| @mock.patch.object(GraphHandler, 'validate_graph_name', return_value='kernel_graph_0') | |||
| @mock.patch.object(GraphHandler, 'get_searched_node_list') | |||
| @mock.patch.object(WatchpointHandler, 'validate_watchpoint_id') | |||
| @mock.patch.object(WatchpointHandler, 'update_watchpoint') | |||
| def test_update_watchpoint(self, *args): | |||
| """Test update watchpoint.""" | |||
| args[2].return_value = [MagicMock(name='seatch_name/op_name')] | |||
| args[2].return_value = [MagicMock(name='search_name/op_name')] | |||
| res = self._server.update_watchpoint( | |||
| watch_point_id=1, watch_nodes=['search_name'], mode=1, name='search_name') | |||
| assert res == {} | |||
| watch_point_id=1, watch_nodes=['search_name'], | |||
| mode=1, search_pattern={'name': 'search_name'}, graph_name='kernel_graph_0') | |||
| assert res == {'metadata': {'enable_recheck': False, 'state': 'waiting'}} | |||
| def test_delete_watchpoint_with_wrong_state(self): | |||
| """Test delete watchpoint with wrong state.""" | |||
| with pytest.raises(DebuggerDeleteWatchPointError, match='Failed to delete watchpoint'): | |||
| self._server.delete_watchpoint(watch_point_id=1) | |||
| @mock.patch.object(MetadataHandler, 'state', 'waiting') | |||
| @mock.patch.object(MetadataHandler, 'enable_recheck', True) | |||
| @mock.patch.object(WatchpointHandler, 'is_recheckable', return_value=True) | |||
| @mock.patch.object(WatchpointHandler, 'delete_watchpoint') | |||
| def test_delete_watchpoint(self, *args): | |||
| """Test delete watchpoint with wrong state.""" | |||
| self._server.cache_store.get_stream_handler(Streams.METADATA).state = 'waiting' | |||
| args[0].return_value = None | |||
| res = self._server.delete_watchpoint(1) | |||
| assert res == {} | |||
| assert res == {'metadata': {'enable_recheck': True, 'state': 'waiting'}} | |||
| @pytest.mark.parametrize('mode, cur_state, state', [ | |||
| ('continue', 'waiting', 'running'), | |||
| @@ -242,7 +247,7 @@ class TestDebuggerServer: | |||
| """Test control request.""" | |||
| with mock.patch.object(MetadataHandler, 'state', cur_state): | |||
| res = self._server.control({'mode': mode}) | |||
| assert res == {'metadata': {'state': state}} | |||
| assert res == {'metadata': {'enable_recheck': False, 'state': state}} | |||
| def test_construct_run_event(self): | |||
| """Test construct run event.""" | |||
| @@ -34,11 +34,10 @@ def get_url(url, params): | |||
| Args: | |||
| url (str): A link requested. For example, http://example.com. | |||
| params (dict): A dict consists of params. For example, {'offset': 1, 'limit':'100}. | |||
| params (dict): A dict consists of params. For example, {'offset': 1, 'limit': 100}. | |||
| Returns: | |||
| str, like http://example.com?offset=1&limit=100 | |||
| """ | |||
| return url + '?' + urlencode(params) | |||