| @@ -0,0 +1,26 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Module init file.""" | |||||
| from mindinsight.backend.conditionmgr.conditionmgr_api import init_module as init_query_module | |||||
| def init_module(app): | |||||
| """ | |||||
| Init module entry. | |||||
| Args: | |||||
| app (Flask): A Flask instance. | |||||
| """ | |||||
| init_query_module(app) | |||||
| @@ -0,0 +1,46 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Conditionmgr restful api.""" | |||||
| from flask import Blueprint | |||||
| from mindinsight.conf import settings | |||||
| from mindinsight.backend.debugger.debugger_api import BACKEND_SERVER, _wrap_reply | |||||
| BLUEPRINT = Blueprint("conditionmgr", __name__, | |||||
| url_prefix=settings.URL_PATH_PREFIX + settings.API_PREFIX) | |||||
| @BLUEPRINT.route("/conditionmgr/train-jobs/<train_id>/conditions", methods=["GET"]) | |||||
| def get_conditions(train_id): | |||||
| """get conditions""" | |||||
| reply = _wrap_reply(BACKEND_SERVER.get_conditions, train_id) | |||||
| return reply | |||||
| @BLUEPRINT.route("/conditionmgr/train-jobs/<train_id>/condition-collections", methods=["GET"]) | |||||
| def get_condition_collections(train_id): | |||||
| """get condition collections""" | |||||
| reply = _wrap_reply(BACKEND_SERVER.get_condition_collections, train_id) | |||||
| return reply | |||||
| def init_module(app): | |||||
| """ | |||||
| Init module entry. | |||||
| Args: | |||||
| app (Flask): The application obj. | |||||
| """ | |||||
| app.register_blueprint(BLUEPRINT) | |||||
| @@ -88,11 +88,16 @@ def search(): | |||||
| str, the required data. | str, the required data. | ||||
| Examples: | Examples: | ||||
| >>> Get http://xxxx/v1/mindinsight/debugger/retrive?mode=all | |||||
| >>> Get http://xxxx/v1/mindinsight/debugger/search?name=mock_name&watch_point_id=1 | |||||
| """ | """ | ||||
| name = request.args.get('name') | name = request.args.get('name') | ||||
| graph_name = request.args.get('graph_name') | |||||
| watch_point_id = int(request.args.get('watch_point_id', 0)) | watch_point_id = int(request.args.get('watch_point_id', 0)) | ||||
| reply = _wrap_reply(BACKEND_SERVER.search, name, watch_point_id) | |||||
| node_category = request.args.get('node_category') | |||||
| reply = _wrap_reply(BACKEND_SERVER.search, {'name': name, | |||||
| 'graph_name': graph_name, | |||||
| 'watch_point_id': watch_point_id, | |||||
| 'node_category': node_category}) | |||||
| return reply | return reply | ||||
| @@ -109,9 +114,10 @@ def retrieve_node_by_bfs(): | |||||
| >>> Get http://xxxx/v1/mindinsight/debugger/retrieve_node_by_bfs?name=node_name&ascend=true | >>> Get http://xxxx/v1/mindinsight/debugger/retrieve_node_by_bfs?name=node_name&ascend=true | ||||
| """ | """ | ||||
| name = request.args.get('name') | name = request.args.get('name') | ||||
| graph_name = request.args.get('graph_name') | |||||
| ascend = request.args.get('ascend', 'false') | ascend = request.args.get('ascend', 'false') | ||||
| ascend = ascend == 'true' | ascend = ascend == 'true' | ||||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_node_by_bfs, name, ascend) | |||||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_node_by_bfs, name, graph_name, ascend) | |||||
| return reply | return reply | ||||
| @@ -167,7 +173,8 @@ def retrieve_tensor_history(): | |||||
| """ | """ | ||||
| body = _read_post_request(request) | body = _read_post_request(request) | ||||
| name = body.get('name') | name = body.get('name') | ||||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_tensor_history, name) | |||||
| graph_name = body.get('graph_name') | |||||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_tensor_history, name, graph_name) | |||||
| return reply | return reply | ||||
| @@ -180,12 +187,15 @@ def retrieve_tensor_value(): | |||||
| str, the required data. | str, the required data. | ||||
| Examples: | Examples: | ||||
| >>> GET http://xxxx/v1/mindinsight/debugger/tensors?name=node_name&detail=data&shape=[1,1,:,:] | |||||
| >>> GET http://xxxx/v1/mindinsight/debugger/tensors?name=tensor_name&detail=data&shape=[1,1,:,:] | |||||
| """ | """ | ||||
| name = request.args.get('name') | name = request.args.get('name') | ||||
| detail = request.args.get('detail') | detail = request.args.get('detail') | ||||
| shape = request.args.get('shape') | shape = request.args.get('shape') | ||||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_tensor_value, name, detail, shape) | |||||
| graph_name = request.args.get('graph_name') | |||||
| prev = bool(request.args.get('prev') == 'true') | |||||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_tensor_value, name, detail, shape, graph_name, prev) | |||||
| return reply | return reply | ||||
| @@ -199,7 +209,6 @@ def create_watchpoint(): | |||||
| Raises: | Raises: | ||||
| MindInsightException: If method fails to be called. | MindInsightException: If method fails to be called. | ||||
| ParamValueError: If parsing json data search_condition fails. | |||||
| Examples: | Examples: | ||||
| >>> POST http://xxxx/v1/mindinsight/debugger/create_watchpoint | >>> POST http://xxxx/v1/mindinsight/debugger/create_watchpoint | ||||
| @@ -207,9 +216,12 @@ def create_watchpoint(): | |||||
| body = _read_post_request(request) | body = _read_post_request(request) | ||||
| condition = body.get('condition') | condition = body.get('condition') | ||||
| graph_name = body.get('graph_name') | |||||
| watch_nodes = body.get('watch_nodes') | watch_nodes = body.get('watch_nodes') | ||||
| watch_point_id = body.get('watch_point_id') | watch_point_id = body.get('watch_point_id') | ||||
| reply = _wrap_reply(BACKEND_SERVER.create_watchpoint, condition, watch_nodes, watch_point_id) | |||||
| search_pattern = body.get('search_pattern') | |||||
| reply = _wrap_reply(BACKEND_SERVER.create_watchpoint, | |||||
| condition, watch_nodes, watch_point_id, search_pattern, graph_name) | |||||
| return reply | return reply | ||||
| @@ -223,7 +235,6 @@ def update_watchpoint(): | |||||
| Raises: | Raises: | ||||
| MindInsightException: If method fails to be called. | MindInsightException: If method fails to be called. | ||||
| ParamValueError: If parsing json data search_condition fails. | |||||
| Examples: | Examples: | ||||
| >>> POST http://xxxx/v1/mindinsight/debugger/update_watchpoint | >>> POST http://xxxx/v1/mindinsight/debugger/update_watchpoint | ||||
| @@ -232,10 +243,10 @@ def update_watchpoint(): | |||||
| watch_point_id = body.get('watch_point_id') | watch_point_id = body.get('watch_point_id') | ||||
| watch_nodes = body.get('watch_nodes') | watch_nodes = body.get('watch_nodes') | ||||
| graph_name = body.get('graph_name') | |||||
| mode = body.get('mode') | mode = body.get('mode') | ||||
| name = body.get('name') | |||||
| reply = _wrap_reply(BACKEND_SERVER.update_watchpoint, watch_point_id, watch_nodes, mode, name) | |||||
| pattern = body.get('search_pattern') | |||||
| reply = _wrap_reply(BACKEND_SERVER.update_watchpoint, watch_point_id, watch_nodes, mode, pattern, graph_name) | |||||
| return reply | return reply | ||||
| @@ -249,7 +260,6 @@ def delete_watchpoint(): | |||||
| Raises: | Raises: | ||||
| MindInsightException: If method fails to be called. | MindInsightException: If method fails to be called. | ||||
| ParamValueError: If parsing json data search_condition fails. | |||||
| Examples: | Examples: | ||||
| >>> POST http://xxxx/v1/mindinsight/debugger/delete_watchpoint | >>> POST http://xxxx/v1/mindinsight/debugger/delete_watchpoint | ||||
| @@ -273,7 +283,6 @@ def control(): | |||||
| Raises: | Raises: | ||||
| MindInsightException: If method fails to be called. | MindInsightException: If method fails to be called. | ||||
| ParamValueError: If parsing json data search_condition fails. | |||||
| Examples: | Examples: | ||||
| >>> POST http://xxxx/v1/mindinsight/debugger/control | >>> POST http://xxxx/v1/mindinsight/debugger/control | ||||
| @@ -284,6 +293,59 @@ def control(): | |||||
| return reply | return reply | ||||
| @BLUEPRINT.route("/debugger/recheck", methods=["POST"]) | |||||
| def recheck(): | |||||
| """ | |||||
| Recheck request. | |||||
| Returns: | |||||
| str, reply message. | |||||
| Raises: | |||||
| MindInsightException: If method fails to be called. | |||||
| Examples: | |||||
| >>> POST http://xxxx/v1/mindinsight/debugger/recheck | |||||
| """ | |||||
| reply = _wrap_reply(BACKEND_SERVER.recheck) | |||||
| return reply | |||||
| @BLUEPRINT.route("/debugger/tensor_graphs", methods=["GET"]) | |||||
| def retrieve_tensor_graph(): | |||||
| """ | |||||
| Retrieve tensor value according to name and shape. | |||||
| Returns: | |||||
| str, the required data. | |||||
| Examples: | |||||
| >>> GET http://xxxx/v1/mindinsight/debugger/tensor_graphs?tensor_name=tensor_name$graph_name=graph_name | |||||
| """ | |||||
| tensor_name = request.args.get('tensor_name') | |||||
| graph_name = request.args.get('graph_name') | |||||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_tensor_graph, tensor_name, graph_name) | |||||
| return reply | |||||
| @BLUEPRINT.route("/debugger/tensor_hits", methods=["GET"]) | |||||
| def retrieve_tensor_hits(): | |||||
| """ | |||||
| Retrieve tensor value according to name and shape. | |||||
| Returns: | |||||
| str, the required data. | |||||
| Examples: | |||||
| >>> GET http://xxxx/v1/mindinsight/debugger/tensor_hits?tensor_name=tensor_name$graph_name=graph_name | |||||
| """ | |||||
| tensor_name = request.args.get('tensor_name') | |||||
| graph_name = request.args.get('graph_name') | |||||
| reply = _wrap_reply(BACKEND_SERVER.retrieve_tensor_hits, tensor_name, graph_name) | |||||
| return reply | |||||
| BACKEND_SERVER = _initialize_debugger_server() | BACKEND_SERVER = _initialize_debugger_server() | ||||
| @@ -0,0 +1,15 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Provide condition manager function.""" | |||||
| @@ -0,0 +1,15 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Common module.""" | |||||
| @@ -0,0 +1,18 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Utils module.""" | |||||
| from collections import namedtuple | |||||
| NodeBasicInfo = namedtuple('node_basic_info', ['name', 'full_name', 'type']) | |||||
| @@ -0,0 +1,232 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| Management of all conditions. | |||||
| This module is used to register all conditions, as well as their parameters. | |||||
| This module also provide the available conditions to condition_collections api. | |||||
| """ | |||||
| from enum import Enum | |||||
| from mindinsight.conditionmgr.log import logger | |||||
| class ConditionIdEnum(Enum): | |||||
| """Condition ids.""" | |||||
| WEIGHT_INITIALIZATION = "weight_initialization" | |||||
| WEIGHT_OVERFLOW = "weight_overflow" | |||||
| WEIGHT_TOO_LARGE = "weight_too_large" | |||||
| WEIGHT_TOO_SMALL = "weight_too_small" | |||||
| GRADIENT_VANISHING = "gradient_vanishing" | |||||
| GRADIENT_TOO_LARGE = "gradient_too_large" | |||||
| GRADIENT_EXPLODING = "gradient_exploding" | |||||
| TENSOR_OVERFLOW = "tensor_overflow" | |||||
| OPERATOR_OVERFLOW = "operator_overflow" | |||||
| NAN = "nan" | |||||
| OVERFLOW_ASCEND_CHIP = "overflow" | |||||
| INF = "inf" | |||||
| MAX_GT = "max_gt" | |||||
| MAX_LT = "max_lt" | |||||
| MIN_GT = "min_gt" | |||||
| MIN_LT = "min_lt" | |||||
| MAX_MIN_GT = "max_min_gt" | |||||
| MAX_MIN_LT = "max_min_lt" | |||||
| MEAN_GT = "mean_gt" | |||||
| MEAN_LT = "mean_lt" | |||||
| TENSOR_INITIALIZATION = "tensor_initialization" | |||||
| TENSOR_TOO_LARGE = "tensor_too_large" | |||||
| TENSOR_TOO_SMALL = "tensor_too_small" | |||||
| TENSOR_ALL_ZERO = "tensor_all_zero" | |||||
| WEIGHT_NOT_CHANGED = "weight_not_changed" | |||||
| WEIGHT_CHANGE_TOO_LARGE = "weight_change_too_large" | |||||
| WEIGHT_CHANGE_TOO_SMALL = "weight_change_too_small" | |||||
| TENSOR_CHANGE_TOO_LARGE = "tensor_change_too_large" | |||||
| TENSOR_CHANGE_TOO_SMALL = "tensor_change_too_small" | |||||
| TENSOR_NOT_CHANGED = "tensor_not_changed" | |||||
| class OptimizePhaseEnum(Enum): | |||||
| """Optimize phases.""" | |||||
| TENSOR_CHECK = 400 | |||||
| OPERATOR_CHECK = 100 | |||||
| LOSS_CHECK = 300 | |||||
| INPUT_DATA_CHECK = 200 | |||||
| class ValueTypeEnum(Enum): | |||||
| """Value types.""" | |||||
| FLOAT64 = 1 | |||||
| INT64 = 2 | |||||
| BOOL = 3 | |||||
| class PlatformEnum(Enum): | |||||
| """Platform types.""" | |||||
| GPU = "GPU" | |||||
| ASCEND = "Ascend" | |||||
| class TargetTypeEnum(Enum): | |||||
| """Target types.""" | |||||
| TENSOR = 'tensor' | |||||
| WEIGHT = 'weight' | |||||
| ACTIVATION = 'activation' | |||||
| GRADIENT = 'gradient' | |||||
| class ConditionContext: | |||||
| """ | |||||
| The class for condition context. | |||||
| Args: | |||||
| backend (str): parameter name. | |||||
| step (int): the type of value. | |||||
| debugger_capability (tuple): whether the param support no assignment. | |||||
| """ | |||||
| def __init__(self, backend, step=0, debugger_capability=(1, 0)): | |||||
| self._backend = backend | |||||
| self._step = step | |||||
| self._debugger_capability = debugger_capability | |||||
| @property | |||||
| def backend(self): | |||||
| """Get backend.""" | |||||
| return self._backend | |||||
| @property | |||||
| def step(self): | |||||
| """Get _step.""" | |||||
| return self._step | |||||
| @property | |||||
| def debugger_capability(self): | |||||
| """Get debugger_capability.""" | |||||
| return self._debugger_capability | |||||
| class ConditionParameter: | |||||
| """ | |||||
| The class for parameters of conditions. | |||||
| Args: | |||||
| name (str): parameter name. | |||||
| value_type (ValueTypeEnum): the type of value. | |||||
| support_disable (bool): whether the param support no assignment. | |||||
| default_value (float): default value. | |||||
| visible_on_ui (bool): whether the param visible on ui. | |||||
| """ | |||||
| def __init__(self, name, value_type: ValueTypeEnum, support_disable=True, default_value=None, visible_on_ui=True): | |||||
| self._name = name | |||||
| self._type = value_type | |||||
| self._support_disable = support_disable | |||||
| self._default_value = default_value | |||||
| self._visible_on_ui = visible_on_ui | |||||
| @property | |||||
| def name(self): | |||||
| """Get name of parameter.""" | |||||
| return self._name | |||||
| @property | |||||
| def type(self): | |||||
| """Get type of parameter.""" | |||||
| return self._type | |||||
| @property | |||||
| def support_disable(self): | |||||
| """Get support_disable of parameter.""" | |||||
| return self._support_disable | |||||
| @property | |||||
| def default_value(self): | |||||
| """Get default_value of parameter.""" | |||||
| return self._default_value | |||||
| @property | |||||
| def visible_on_ui(self): | |||||
| """Get visible_on_ui of parameter.""" | |||||
| return self._visible_on_ui | |||||
| class Condition: | |||||
| """ | |||||
| The class for parameters of conditions. | |||||
| Args: | |||||
| condition_id (str): condition id. | |||||
| abbr (str): the abbreviation of condition id. | |||||
| optimize_phase (OptimizePhaseEnum): optimize phase. | |||||
| parameters (List[ConditionParameter]): parameters. | |||||
| supported_target_type (TargetTypeEnum): the supported target type. | |||||
| supported_platforms (tuple[PlatformEnum, PlatformEnum]): the supported platforms. | |||||
| minimum_debugger_capability (tuple): the minimum debugger capability required. | |||||
| available_test_func (func): the function used to test whether the condition is available | |||||
| """ | |||||
| def __init__(self, condition_id, abbr, optimize_phase, parameters, supported_target_type, supported_platforms, | |||||
| minimum_debugger_capability, available_test_func=None): | |||||
| self.id = condition_id | |||||
| self._abbr = abbr | |||||
| self.optimize_phase = optimize_phase | |||||
| self._parameters = { | |||||
| parameter.name: parameter for parameter in parameters | |||||
| } | |||||
| self._supported_target_type = supported_target_type | |||||
| self.supported_platforms = supported_platforms | |||||
| self.minimum_debugger_capability = minimum_debugger_capability | |||||
| self.available_test_func = available_test_func | |||||
| def get_parameter_definition(self, name): | |||||
| """Return parameter definition by the name""" | |||||
| return self._parameters[name] | |||||
| def is_available(self, condition_context): | |||||
| """Check is the condition available.""" | |||||
| backend = condition_context.backend | |||||
| debugger_capability = condition_context.debugger_capability | |||||
| if debugger_capability < self.minimum_debugger_capability: | |||||
| logger.debug("The debugger capability is lower than the minimum debugger capability.") | |||||
| return False | |||||
| if backend not in [platform.value for platform in self.supported_platforms]: | |||||
| logger.debug("The condition %s is not supported on the platform.", self.id) | |||||
| return False | |||||
| if self.available_test_func is None: | |||||
| return True | |||||
| return self.available_test_func(condition_context) | |||||
| @property | |||||
| def abbr(self): | |||||
| """The abbreviation of condition""" | |||||
| return self._abbr | |||||
| @property | |||||
| def names(self): | |||||
| """The name of condition""" | |||||
| return self._parameters.keys() | |||||
| @property | |||||
| def parameters(self): | |||||
| """The parameters of condition""" | |||||
| return self._parameters.values() | |||||
| @property | |||||
| def supported_target_type(self): | |||||
| """The supported target type of condition""" | |||||
| return self._supported_target_type | |||||
| def check_initialization_available(condition_context): | |||||
| """Check if initialization is available at this step""" | |||||
| if condition_context.step == 0: | |||||
| return True | |||||
| return False | |||||
| @@ -0,0 +1,599 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| Condition list. | |||||
| This module provide the detail conditions list. | |||||
| """ | |||||
| from mindinsight.conditionmgr.condition import Condition | |||||
| from mindinsight.conditionmgr.condition import OptimizePhaseEnum | |||||
| from mindinsight.conditionmgr.condition import ConditionParameter | |||||
| from mindinsight.conditionmgr.condition import ValueTypeEnum | |||||
| from mindinsight.conditionmgr.condition import TargetTypeEnum | |||||
| from mindinsight.conditionmgr.condition import PlatformEnum | |||||
| from mindinsight.conditionmgr.condition import check_initialization_available | |||||
| CONDITION_LIST = [ | |||||
| Condition( | |||||
| condition_id="weight_initialization", | |||||
| abbr="WI", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_initialization | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="zero_percentage_ge", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| default_value=100 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="max_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="min_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.WEIGHT, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1), | |||||
| available_test_func=check_initialization_available | |||||
| ), | |||||
| Condition( | |||||
| condition_id="weight_overflow", | |||||
| abbr="WO", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_general_overflow | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="weight_too_large", | |||||
| abbr="WL", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_too_large | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="abs_mean_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="max_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="min_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="mean_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.WEIGHT, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="weight_too_small", | |||||
| abbr="WS", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_too_small | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="abs_mean_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="max_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="min_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="mean_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.WEIGHT, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="gradient_vanishing", | |||||
| abbr="GV", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_too_small | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="abs_mean_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="max_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="min_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="mean_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.GRADIENT, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="gradient_too_large", | |||||
| abbr="GL", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_too_large | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="abs_mean_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="max_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="min_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="mean_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.GRADIENT, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="gradient_exploding", | |||||
| abbr="GE", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_general_overflow | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[], | |||||
| supported_target_type=TargetTypeEnum.GRADIENT, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="tensor_overflow", | |||||
| abbr="TO", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_general_overflow | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="operator_overflow", | |||||
| abbr="OO", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.overflow | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND,), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="nan", | |||||
| abbr="NAN", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.nan | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.GPU,), | |||||
| minimum_debugger_capability=(1, 0) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="overflow", | |||||
| abbr="OVERFLOW", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.overflow | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND,), | |||||
| minimum_debugger_capability=(1, 0) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="inf", | |||||
| abbr="INF", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.inf | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 0) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="max_gt", | |||||
| abbr="MAX>", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.max_gt | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="param", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 0) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="max_lt", | |||||
| abbr="MAX<", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.max_lt | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="param", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 0) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="min_gt", | |||||
| abbr="MIN>", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.min_gt | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="param", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 0) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="min_lt", | |||||
| abbr="MIN<", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.min_lt | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="param", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 0) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="max_min_gt", | |||||
| abbr="MAX-MIN>", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.max_min_gt | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="param", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 0) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="max_min_lt", | |||||
| abbr="MAX-Min<", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.max_min_lt | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="param", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 0) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="mean_gt", | |||||
| abbr="MEAN>", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.mean_gt | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="param", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 0) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="mean_lt", | |||||
| abbr="MEAN<", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.mean_lt | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="param", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 0) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="tensor_initialization", | |||||
| abbr="TI", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_initialization | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="zero_percentage_ge", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| default_value=100 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="max_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="min_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1), | |||||
| available_test_func=check_initialization_available | |||||
| ), | |||||
| Condition( | |||||
| condition_id="tensor_too_large", | |||||
| abbr="TL", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_too_large | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="abs_mean_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="max_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="min_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="mean_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="tensor_too_small", | |||||
| abbr="TS", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_too_small | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="abs_mean_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="max_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="min_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="mean_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="tensor_all_zero", | |||||
| abbr="TZ", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_all_zero | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="zero_percentage_ge", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| default_value=100 | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="weight_not_changed", | |||||
| abbr="WNC", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_not_changed | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="rtol", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| default_value=1e-5 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="atol", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| support_disable=False, | |||||
| default_value=1e-8, | |||||
| visible_on_ui=False | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="equal_nan", | |||||
| value_type=ValueTypeEnum.BOOL, | |||||
| support_disable=False, | |||||
| default_value=False, | |||||
| visible_on_ui=False | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.WEIGHT, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="weight_change_too_large", | |||||
| abbr="WCL", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_change_too_large | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="abs_update_ratio_mean_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| default_value=1e-1 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="epsilon", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| support_disable=False, | |||||
| default_value=1e-9, | |||||
| visible_on_ui=False | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.WEIGHT, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="weight_change_too_small", | |||||
| abbr="WCS", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_change_too_small | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="abs_update_ratio_mean_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| default_value=1e-4 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="epsilon", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| support_disable=False, | |||||
| default_value=1e-9, | |||||
| visible_on_ui=False | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.WEIGHT, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="tensor_change_too_large", | |||||
| abbr="TCL", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_change_too_large | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="abs_update_ratio_mean_gt", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| default_value=1e-1 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="epsilon", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| support_disable=False, | |||||
| default_value=1e-9, | |||||
| visible_on_ui=False | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="tensor_change_too_small", | |||||
| abbr="TCS", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_change_too_small | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="abs_update_ratio_mean_lt", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| default_value=1e-4 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="epsilon", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| support_disable=False, | |||||
| default_value=1e-9, | |||||
| visible_on_ui=False | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ), | |||||
| Condition( | |||||
| condition_id="tensor_not_changed", | |||||
| abbr="TNC", | |||||
| # Send this condition to MindSpore will use WatchCondition.Condition.tensor_not_changed | |||||
| optimize_phase=OptimizePhaseEnum.TENSOR_CHECK, | |||||
| parameters=[ | |||||
| ConditionParameter( | |||||
| name="rtol", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| default_value=1e-5 | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="atol", | |||||
| value_type=ValueTypeEnum.FLOAT64, | |||||
| support_disable=False, | |||||
| default_value=1e-8, | |||||
| visible_on_ui=False | |||||
| ), | |||||
| ConditionParameter( | |||||
| name="equal_nan", | |||||
| value_type=ValueTypeEnum.BOOL, | |||||
| support_disable=False, | |||||
| default_value=False, | |||||
| visible_on_ui=False | |||||
| ) | |||||
| ], | |||||
| supported_target_type=TargetTypeEnum.TENSOR, | |||||
| supported_platforms=(PlatformEnum.ASCEND, PlatformEnum.GPU), | |||||
| minimum_debugger_capability=(1, 1) | |||||
| ) | |||||
| ] | |||||
| @@ -0,0 +1,132 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| Condition manager.. | |||||
| This module provide condition manager function. | |||||
| """ | |||||
| from mindinsight.conditionmgr.condition import Condition | |||||
| from mindinsight.conditionmgr.condition import TargetTypeEnum | |||||
| from mindinsight.conditionmgr.condition_list import CONDITION_LIST | |||||
| from mindinsight.conditionmgr.log import logger | |||||
| class ConditionMgr: | |||||
| """Condition manager.""" | |||||
| def __init__(self): | |||||
| self.conditions = {} | |||||
| self.no_parameter_conditions = [] | |||||
| self._register_default_conditions() | |||||
| def _register_default_conditions(self): | |||||
| """Register default condition definitions""" | |||||
| self.register_conditions(CONDITION_LIST) | |||||
| def register_condition(self, condition): | |||||
| """Register conditions into dict""" | |||||
| if not condition.parameters: | |||||
| self.no_parameter_conditions.append(condition.id) | |||||
| self.conditions[condition.id] = condition | |||||
| def register_conditions(self, conditions): | |||||
| """Register conditions""" | |||||
| for condition in conditions: | |||||
| self.register_condition(condition) | |||||
| def get_all(self, condition_context): | |||||
| """Get all register conditions.""" | |||||
| conditions = [] | |||||
| for condition in self.conditions.values(): | |||||
| parameters = [] | |||||
| if not condition.is_available(condition_context): | |||||
| continue | |||||
| for param in condition.parameters: | |||||
| if not param.visible_on_ui: | |||||
| continue | |||||
| parameters.append({ | |||||
| "name": param.name, | |||||
| "type": param.type.name, | |||||
| "support_disable": param.support_disable, | |||||
| "default_value": param.default_value | |||||
| }) | |||||
| conditions.append({ | |||||
| "id": condition.id, | |||||
| "parameters": parameters, | |||||
| "supported_target_type": condition.supported_target_type.name | |||||
| }) | |||||
| conditions = sorted(conditions, key=lambda x: x.get('id')) | |||||
| return {"conditions": conditions} | |||||
| def get_condition(self, condition_id) -> Condition: | |||||
| """Get condition by condition id""" | |||||
| return self.conditions[condition_id] | |||||
| def has_condition(self, condition_id, condition_context) -> bool: | |||||
| """Return if the condition exist and avilible""" | |||||
| if condition_id in self.conditions: | |||||
| condition = self.get_condition(condition_id) | |||||
| return condition.is_available(condition_context) | |||||
| logger.warning("Condition id %s not found.", condition_id) | |||||
| return False | |||||
| def get_no_param_condition(self) -> list: | |||||
| """Return the list of condition without parameters""" | |||||
| return self.no_parameter_conditions | |||||
| @staticmethod | |||||
| def check_and_sort(collections, target_type, reply): | |||||
| """Check the collection and sort conditions""" | |||||
| collection = collections.get(target_type) | |||||
| if collection: | |||||
| collection = sorted(collection, key=lambda x: x.get('id')) | |||||
| reply.append({"id": target_type + "_condition_collection", "conditions": collection}) | |||||
| else: | |||||
| logger.warning("Condition collection for %s is None.", target_type) | |||||
| def get_all_collections(self, condition_context): | |||||
| """Get all register conditions.""" | |||||
| collections = { | |||||
| TargetTypeEnum.WEIGHT.value: [], TargetTypeEnum.TENSOR.value: [], TargetTypeEnum.GRADIENT.value: [], | |||||
| TargetTypeEnum.ACTIVATION.value: [] | |||||
| } | |||||
| for condition in self.conditions.values(): | |||||
| parameters = [] | |||||
| if not condition.is_available(condition_context): | |||||
| continue | |||||
| for param in condition.parameters: | |||||
| if not param.visible_on_ui: | |||||
| continue | |||||
| parameters.append({ | |||||
| "name": param.name, | |||||
| "type": param.type.name, | |||||
| "support_disable": param.support_disable, | |||||
| "default_value": param.default_value | |||||
| }) | |||||
| collections[condition.supported_target_type.value].append({ | |||||
| "id": condition.id, | |||||
| "parameters": parameters, | |||||
| "supported_target_type": condition.supported_target_type.name, | |||||
| "abbr": condition.abbr | |||||
| }) | |||||
| reply = [] | |||||
| self.check_and_sort(collections, TargetTypeEnum.ACTIVATION.value, reply) | |||||
| self.check_and_sort(collections, TargetTypeEnum.GRADIENT.value, reply) | |||||
| self.check_and_sort(collections, TargetTypeEnum.TENSOR.value, reply) | |||||
| self.check_and_sort(collections, TargetTypeEnum.WEIGHT.value, reply) | |||||
| return reply | |||||
| @@ -0,0 +1,19 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Log module""" | |||||
| from mindinsight.utils.log import setup_logger | |||||
| logger = setup_logger(sub_module="conditionmgr", log_name="conditionmgr") | |||||
| @@ -0,0 +1,365 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| Predefined watchpoints. | |||||
| This module predefine recommend watchpoints. | |||||
| """ | |||||
| import queue as Queue | |||||
| from mindinsight.conditionmgr.conditionmgr import ConditionMgr | |||||
| from mindinsight.conditionmgr.condition import TargetTypeEnum | |||||
| from mindinsight.conditionmgr.condition import ConditionIdEnum | |||||
| from mindinsight.conditionmgr.common.utils import NodeBasicInfo | |||||
| from mindinsight.conditionmgr.log import logger | |||||
| from mindinsight.conf import settings | |||||
| UNSELECTED_STATUS = 0 | |||||
| HALF_SELECTED_STATUS = 1 | |||||
| SELECTED_STATUS = 2 | |||||
| class _WatchPointData: | |||||
| """WatchPoint data container""" | |||||
| def __init__(self, watch_condition, watch_nodes): | |||||
| self.watch_condition = watch_condition | |||||
| self.watch_nodes = watch_nodes | |||||
| def get_watch_condition_dict(self): | |||||
| return { | |||||
| "id": self.watch_condition.get("condition"), | |||||
| "params": [{ | |||||
| "name": param.get_parameter_name(), | |||||
| "disable": False, | |||||
| "value": param.value | |||||
| } for param in self.watch_condition.get("params")] | |||||
| } | |||||
| class _ConditionParameterValue: | |||||
| """Condition parameter data container""" | |||||
| def __init__(self, parameter, value): | |||||
| self.parameter = parameter | |||||
| self.value = value | |||||
| def get_parameter_name(self): | |||||
| return self.parameter.name | |||||
| def recommend_watchpoints(condition_mgr: ConditionMgr, graph_stream, condition_context): | |||||
| """ | |||||
| Recommend watchpoints. | |||||
| Args: | |||||
| condition_mgr (ConditionMgr): Condition manager instance. | |||||
| graph_stream (GraphHandler): Graph handler instance. | |||||
| condition_context (ConditionContext): Context for condition. | |||||
| Returns: | |||||
| list[WatchPointData], watch points to be created. | |||||
| """ | |||||
| watch_points = [] | |||||
| if not graph_stream.graph: | |||||
| logger.warning("Given graph is None.") | |||||
| return watch_points | |||||
| if not settings.ENABLE_RECOMMENDED_WATCHPOINTS: | |||||
| return watch_points | |||||
| # add weight watch points | |||||
| merged_info = _get_basic_node_info(TargetTypeEnum.WEIGHT.value, graph_stream) | |||||
| _recommend_weight_initialization(merged_info, condition_mgr, watch_points, condition_context) | |||||
| _recommend_weight_change_too_large(merged_info, condition_mgr, watch_points, condition_context) | |||||
| # Because we cannot identify trainable weights currently, weight_no_change and weight_change_too_small will not be | |||||
| # recommended. | |||||
| trainable_weight_nodes = [] | |||||
| _recommend_weight_not_changed(condition_mgr, trainable_weight_nodes, watch_points, condition_context) | |||||
| _recommend_weight_change_too_small(condition_mgr, trainable_weight_nodes, watch_points, condition_context) | |||||
| # add gradient watch points | |||||
| merged_info = _get_basic_node_info(TargetTypeEnum.GRADIENT.value, graph_stream) | |||||
| _recommend_gradient_vanishing(merged_info, condition_mgr, watch_points, condition_context) | |||||
| # add tensor watch points | |||||
| merged_info = _get_basic_node_info(TargetTypeEnum.TENSOR.value, graph_stream) | |||||
| _recommend_overflow_ascend_chip(merged_info, condition_mgr, watch_points, condition_context) | |||||
| _recommend_tensor_overflow(merged_info, condition_mgr, watch_points, condition_context) | |||||
| _recommend_tensor_all_zero(merged_info, condition_mgr, watch_points, condition_context) | |||||
| return watch_points | |||||
| def _recommend_tensor_all_zero(basic_info_nodes, condition_mgr, watch_points, condition_context): | |||||
| """Recommend tensor all zero watchpoint.""" | |||||
| if not basic_info_nodes: | |||||
| return | |||||
| if not condition_mgr.has_condition(ConditionIdEnum.TENSOR_ALL_ZERO.value, condition_context): | |||||
| return | |||||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.TENSOR_ALL_ZERO.value) | |||||
| tensor_all_zero_watchpoint = _WatchPointData( | |||||
| watch_condition={ | |||||
| "condition": condition.id, | |||||
| "params": [_ConditionParameterValue( | |||||
| parameter=condition.get_parameter_definition("zero_percentage_ge"), | |||||
| value=100 # set default value to 100 | |||||
| )] | |||||
| }, | |||||
| watch_nodes=basic_info_nodes.copy(), | |||||
| ) | |||||
| watch_points.append(tensor_all_zero_watchpoint) | |||||
| def _recommend_tensor_overflow(basic_info_nodes, condition_mgr, watch_points, condition_context): | |||||
| """Recommend tensor general overflow watchpoint.""" | |||||
| if not basic_info_nodes: | |||||
| return | |||||
| if not condition_mgr.has_condition(ConditionIdEnum.TENSOR_OVERFLOW.value, condition_context): | |||||
| return | |||||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.TENSOR_OVERFLOW.value) | |||||
| overflow_watchpoint = _WatchPointData( | |||||
| watch_condition={ | |||||
| "condition": condition.id, | |||||
| "params": [] | |||||
| }, | |||||
| watch_nodes=basic_info_nodes.copy(), | |||||
| ) | |||||
| watch_points.append(overflow_watchpoint) | |||||
| def _recommend_overflow_ascend_chip(basic_info_nodes, condition_mgr, watch_points, condition_context): | |||||
| """Recommend tensor overflow watchpoint.""" | |||||
| if not basic_info_nodes: | |||||
| return | |||||
| if not condition_mgr.has_condition(ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value, condition_context): | |||||
| return | |||||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value) | |||||
| overflow_d_watchpoint = _WatchPointData( | |||||
| watch_condition={ | |||||
| "condition": condition.id, | |||||
| "params": [] | |||||
| }, | |||||
| watch_nodes=basic_info_nodes.copy(), | |||||
| ) | |||||
| watch_points.append(overflow_d_watchpoint) | |||||
| def _recommend_gradient_vanishing(basic_info_nodes, condition_mgr, watch_points, condition_context): | |||||
| """Recommend gradient vanishing watchpoint.""" | |||||
| if not basic_info_nodes: | |||||
| return | |||||
| if not condition_mgr.has_condition(ConditionIdEnum.GRADIENT_VANISHING.value, condition_context): | |||||
| return | |||||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.GRADIENT_VANISHING.value) | |||||
| gradient_vanishing_watchpoint = _WatchPointData( | |||||
| watch_condition={ | |||||
| "condition": condition.id, | |||||
| "params": [_ConditionParameterValue( | |||||
| parameter=condition.get_parameter_definition("abs_mean_lt"), | |||||
| value=1e-9 # set default value to 1e-9 | |||||
| )] | |||||
| }, | |||||
| watch_nodes=basic_info_nodes.copy(), | |||||
| ) | |||||
| watch_points.append(gradient_vanishing_watchpoint) | |||||
| def _recommend_weight_change_too_small(condition_mgr, trainable_weight_nodes, watch_points, condition_context): | |||||
| """Recommend weight change too small watchpoint.""" | |||||
| if not trainable_weight_nodes: | |||||
| return | |||||
| if not condition_mgr.has_condition(ConditionIdEnum.WEIGHT_CHANGE_TOO_SMALL.value, condition_context): | |||||
| return | |||||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.WEIGHT_CHANGE_TOO_SMALL.value) | |||||
| weight_change_too_small_watchpoint = _WatchPointData( | |||||
| watch_condition={ | |||||
| "condition": condition.id, | |||||
| "params": [ | |||||
| _ConditionParameterValue( | |||||
| parameter=condition.get_parameter_definition("abs_update_ratio_mean_lt"), | |||||
| value=1.0e-4 # set default value to 1.0e-4 | |||||
| ), | |||||
| ] | |||||
| }, | |||||
| watch_nodes=trainable_weight_nodes, | |||||
| ) | |||||
| watch_points.append(weight_change_too_small_watchpoint) | |||||
| def _recommend_weight_not_changed(condition_mgr, trainable_weight_nodes, watch_points, condition_context): | |||||
| """Recommend weight not changed watchpoint.""" | |||||
| if not trainable_weight_nodes: | |||||
| return | |||||
| if not condition_mgr.has_condition(ConditionIdEnum.WEIGHT_NOT_CHANGED.value, condition_context): | |||||
| return | |||||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.WEIGHT_NOT_CHANGED.value) | |||||
| weight_no_change_watchpoint = _WatchPointData( | |||||
| watch_condition={ | |||||
| "condition": condition.id, | |||||
| "params": [ | |||||
| _ConditionParameterValue( | |||||
| parameter=condition.get_parameter_definition("rtol"), | |||||
| value=1.0e-5 # set default value to 1.0e-5 | |||||
| ), | |||||
| _ConditionParameterValue( | |||||
| parameter=condition.get_parameter_definition("atol"), | |||||
| value=1.0e-8 # set default value to 1.0e-8 | |||||
| ), | |||||
| ] | |||||
| }, | |||||
| watch_nodes=trainable_weight_nodes, | |||||
| ) | |||||
| watch_points.append(weight_no_change_watchpoint) | |||||
| def _recommend_weight_change_too_large(basic_info_nodes, condition_mgr, watch_points, condition_context): | |||||
| """Recommend weight change too large watchpoint.""" | |||||
| if not basic_info_nodes: | |||||
| return | |||||
| if not condition_mgr.has_condition(ConditionIdEnum.WEIGHT_CHANGE_TOO_LARGE.value, condition_context): | |||||
| return | |||||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.WEIGHT_CHANGE_TOO_LARGE.value) | |||||
| weight_initialization_watchpoint = _WatchPointData( | |||||
| watch_condition={ | |||||
| "condition": condition.id, | |||||
| "params": [_ConditionParameterValue( | |||||
| parameter=condition.get_parameter_definition("abs_update_ratio_mean_gt"), | |||||
| value=0.1 # set default value to 0.1 | |||||
| )] | |||||
| }, | |||||
| watch_nodes=basic_info_nodes.copy(), | |||||
| ) | |||||
| watch_points.append(weight_initialization_watchpoint) | |||||
| def _recommend_weight_initialization(basic_info_nodes, condition_mgr, watch_points, condition_context): | |||||
| """Recommend weight initialization watchpoint.""" | |||||
| if not basic_info_nodes: | |||||
| return | |||||
| if not condition_mgr.has_condition(ConditionIdEnum.WEIGHT_INITIALIZATION.value, condition_context): | |||||
| return | |||||
| condition = condition_mgr.get_condition(condition_id=ConditionIdEnum.WEIGHT_INITIALIZATION.value) | |||||
| weight_initialization_watchpoint = _WatchPointData( | |||||
| watch_condition={ | |||||
| "condition": condition.id, | |||||
| "params": [_ConditionParameterValue( | |||||
| parameter=condition.get_parameter_definition("zero_percentage_ge"), | |||||
| value=100 # set default value to 100 | |||||
| )] | |||||
| }, | |||||
| watch_nodes=basic_info_nodes.copy(), | |||||
| ) | |||||
| watch_points.append(weight_initialization_watchpoint) | |||||
| def _get_basic_node_info(node_category, graph_stream): | |||||
| """Get node merged info.""" | |||||
| basic_info_nodes = _get_basic_node_info_by_node_category(node_category, graph_stream) | |||||
| merged_info = _merge_nodes(basic_info_nodes, graph_stream.whole_graph) | |||||
| merged_info = _add_graph_name(merged_info, graph_stream) | |||||
| return merged_info | |||||
| def _get_basic_node_info_by_node_category(node_category, graph_stream): | |||||
| """Get node basic info by node category.""" | |||||
| all_graph_nodes = graph_stream.get_searched_nodes(pattern={'node_category': node_category}) | |||||
| basic_info_nodes = [] | |||||
| for graph_name, nodes in all_graph_nodes.items(): | |||||
| if len(all_graph_nodes) == 1: | |||||
| logger.debug("This is a single graph") | |||||
| graph_name = "" | |||||
| for node in nodes: | |||||
| if graph_name == "": | |||||
| basic_node_info = NodeBasicInfo(name=node.name, full_name=node.full_name, type=node.type) | |||||
| else: | |||||
| basic_node_info = graph_stream.construct_node_basic_info( | |||||
| full_name=node.full_name, graph_name=graph_name, node_name=node.name, node_type=node.type) | |||||
| basic_info_nodes.append(basic_node_info) | |||||
| return basic_info_nodes | |||||
| def _merge_nodes(leaf_nodes, graph): | |||||
| """merge nodes in one graph""" | |||||
| unmerged_tree = graph.get_nodes(leaf_nodes) | |||||
| tmp_node_queue = Queue.Queue() | |||||
| # watch node list in layer order | |||||
| watch_nodes = [] | |||||
| for node in unmerged_tree: | |||||
| if node["type"] != "name_scope": | |||||
| # if node is leaf_node, it is totally chosen | |||||
| node["status"] = SELECTED_STATUS | |||||
| else: | |||||
| # if node is not leaf_node, it is not chosen initially | |||||
| node["status"] = UNSELECTED_STATUS | |||||
| tmp_node_queue.put(node) | |||||
| while not tmp_node_queue.empty(): | |||||
| cur_node = tmp_node_queue.get() | |||||
| watch_nodes.append(cur_node) | |||||
| for sub_node in cur_node["nodes"]: | |||||
| if sub_node["type"] != "name_scope": | |||||
| # if node is leaf_node, it is totally chosen | |||||
| sub_node["status"] = SELECTED_STATUS | |||||
| else: | |||||
| # if node is not leaf_node, it is not chosen initially | |||||
| sub_node["status"] = UNSELECTED_STATUS | |||||
| tmp_node_queue.put(sub_node) | |||||
| merged_watch_nodes = [] | |||||
| while watch_nodes: | |||||
| cur_node = watch_nodes.pop() | |||||
| node_name = cur_node["name"] | |||||
| sub_count = graph.normal_node_map.get(node_name).subnode_count | |||||
| if len(cur_node["nodes"]) < sub_count or not cur_node["nodes"]: | |||||
| continue | |||||
| is_all_chosen = True | |||||
| for sub_node in cur_node["nodes"]: | |||||
| if sub_node["status"] != SELECTED_STATUS: | |||||
| is_all_chosen = False | |||||
| break | |||||
| if is_all_chosen: | |||||
| cur_node["status"] = SELECTED_STATUS | |||||
| merged_watch_nodes.append(cur_node) | |||||
| else: | |||||
| cur_node["status"] = HALF_SELECTED_STATUS | |||||
| logger.debug("merged_watch_nodes: %s", merged_watch_nodes) | |||||
| out_nodes = [] | |||||
| for node_info in merged_watch_nodes: | |||||
| node_basic_info = NodeBasicInfo(name=node_info["name"], full_name=node_info["name"], type=node_info["type"]) | |||||
| out_nodes.append(node_basic_info) | |||||
| logger.debug("out_nodes: %s", out_nodes) | |||||
| return out_nodes | |||||
| def _add_graph_name(nodes, graph_stream): | |||||
| """add graph_name in node.name""" | |||||
| if len(graph_stream.graph) > 1: | |||||
| return nodes | |||||
| graph_name = graph_stream.graph_names[0] | |||||
| output_nodes = [] | |||||
| for node in nodes: | |||||
| node_basic_info = graph_stream.construct_node_basic_info( | |||||
| full_name=node.name, graph_name=graph_name, node_name=node.name, node_type=node.type) | |||||
| output_nodes.append(node_basic_info) | |||||
| return output_nodes | |||||
| @@ -63,3 +63,5 @@ MAX_GRAPH_STEP_SIZE_PER_TAG = 1 | |||||
| MAX_HISTOGRAM_STEP_SIZE_PER_TAG = 50 | MAX_HISTOGRAM_STEP_SIZE_PER_TAG = 50 | ||||
| MAX_TENSOR_STEP_SIZE_PER_TAG = 20 | MAX_TENSOR_STEP_SIZE_PER_TAG = 20 | ||||
| MAX_TENSOR_RESPONSE_DATA_SIZE = 100000 | MAX_TENSOR_RESPONSE_DATA_SIZE = 100000 | ||||
| ENABLE_RECOMMENDED_WATCHPOINTS = False | |||||
| @@ -54,7 +54,7 @@ class MSGraph(Graph): | |||||
| node_protos (list[anf_ir_pb2.NodeProto]): Refer to anf_ir_pb2.NodeProto. | node_protos (list[anf_ir_pb2.NodeProto]): Refer to anf_ir_pb2.NodeProto. | ||||
| """ | """ | ||||
| logger.debug("Start to parse op nodes from proto.") | logger.debug("Start to parse op nodes from proto.") | ||||
| for node_proto in node_protos: | |||||
| for topological_index, node_proto in enumerate(node_protos): | |||||
| if not node_proto.name: | if not node_proto.name: | ||||
| logger.warning("Finding a node with an empty name will not save it.") | logger.warning("Finding a node with an empty name will not save it.") | ||||
| continue | continue | ||||
| @@ -69,7 +69,7 @@ class MSGraph(Graph): | |||||
| # The Graphviz plug-in that the UI USES can't handle these special characters. | # The Graphviz plug-in that the UI USES can't handle these special characters. | ||||
| check_invalid_character(node_name) | check_invalid_character(node_name) | ||||
| node = Node(name=node_name, node_id=node_proto.name) | |||||
| node = Node(name=node_name, node_id=node_proto.name, topological_index=topological_index) | |||||
| node.full_name = node_proto.full_name | node.full_name = node_proto.full_name | ||||
| node.type = node_proto.op_type | node.type = node_proto.op_type | ||||
| @@ -35,7 +35,7 @@ class Node: | |||||
| node_id (str): The id of this node, and node id is unique in graph. | node_id (str): The id of this node, and node id is unique in graph. | ||||
| """ | """ | ||||
| def __init__(self, name, node_id): | |||||
| def __init__(self, name, node_id, topological_index=-1): | |||||
| self._node_id = node_id | self._node_id = node_id | ||||
| self.name = name | self.name = name | ||||
| self.type = "" | self.type = "" | ||||
| @@ -53,6 +53,8 @@ class Node: | |||||
| self.output_nums = 0 | self.output_nums = 0 | ||||
| self.elem_types = [] | self.elem_types = [] | ||||
| self.full_name = "" | self.full_name = "" | ||||
| # This value will be used as the priority field. | |||||
| self.topological_index = topological_index | |||||
| def to_dict(self): | def to_dict(self): | ||||
| """Converts the node object to dictionary format.""" | """Converts the node object to dictionary format.""" | ||||
| @@ -16,7 +16,7 @@ | |||||
| This file is used to define the node of graph and associated base types. | This file is used to define the node of graph and associated base types. | ||||
| """ | """ | ||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| class NodeTree: | class NodeTree: | ||||
| @@ -37,6 +37,7 @@ class DebuggerErrors(DebuggerErrorCodes): | |||||
| CONTINUE_ERROR = 3 | _DEBUGGER_RUNNING_ERROR | CONTINUE_ERROR = 3 | _DEBUGGER_RUNNING_ERROR | ||||
| PAUSE_ERROR = 4 | _DEBUGGER_RUNNING_ERROR | PAUSE_ERROR = 4 | _DEBUGGER_RUNNING_ERROR | ||||
| COMPARE_TENSOR_ERROR = 5 | _DEBUGGER_RUNNING_ERROR | COMPARE_TENSOR_ERROR = 5 | _DEBUGGER_RUNNING_ERROR | ||||
| RECHECK_ERROR = 6 | _DEBUGGER_RUNNING_ERROR | |||||
| @unique | @unique | ||||
| @@ -52,3 +53,4 @@ class DebuggerErrorMsg(Enum): | |||||
| DELETE_WATCHPOINT_ERROR = "Delete watchpoint failed. {}" | DELETE_WATCHPOINT_ERROR = "Delete watchpoint failed. {}" | ||||
| CONTINUE_ERROR = "Continue debugging failed. {}" | CONTINUE_ERROR = "Continue debugging failed. {}" | ||||
| PAUSE_ERROR = "Pause debugging failed. {}" | PAUSE_ERROR = "Pause debugging failed. {}" | ||||
| RECHECK_ERROR = "Recheck failed. {}" | |||||
| @@ -72,6 +72,17 @@ class DebuggerDeleteWatchPointError(MindInsightException): | |||||
| ) | ) | ||||
| class DebuggerRecheckError(MindInsightException): | |||||
| """The error about deleting watch point.""" | |||||
| def __init__(self, msg): | |||||
| super(DebuggerRecheckError, self).__init__( | |||||
| error=DebuggerErrors.RECHECK_ERROR, | |||||
| message=DebuggerErrorMsg.RECHECK_ERROR.value.format(msg), | |||||
| http_code=400 | |||||
| ) | |||||
| class DebuggerCompareTensorError(MindInsightException): | class DebuggerCompareTensorError(MindInsightException): | ||||
| """The error about comparing tensors.""" | """The error about comparing tensors.""" | ||||
| @@ -17,4 +17,4 @@ from mindinsight.utils.log import setup_logger | |||||
| LOG_NAME = "debugger" | LOG_NAME = "debugger" | ||||
| LOG_MODULE = "debugger" | LOG_MODULE = "debugger" | ||||
| logger = setup_logger(sub_module=LOG_MODULE, log_name=LOG_NAME) | |||||
| LOGGER = setup_logger(sub_module=LOG_MODULE, log_name=LOG_NAME) | |||||
| @@ -14,7 +14,6 @@ | |||||
| # ============================================================================ | # ============================================================================ | ||||
| """Define the utils.""" | """Define the utils.""" | ||||
| import enum | import enum | ||||
| from collections import namedtuple | |||||
| import numpy as np | import numpy as np | ||||
| @@ -72,7 +71,12 @@ class Streams(enum.Enum): | |||||
| WATCHPOINT_HIT = 'watchpoint_hit' | WATCHPOINT_HIT = 'watchpoint_hit' | ||||
| NodeBasicInfo = namedtuple('node_basic_info', ['name', 'full_name', 'type']) | |||||
| class RunLevel(enum.Enum): | |||||
| """Run Level enum, it depends on whether the program is executed node by node, | |||||
| step by step, or in recheck phase""" | |||||
| NODE = "node" | |||||
| STEP = "step" | |||||
| RECHECK = "recheck" | |||||
| def get_ack_reply(state=0): | def get_ack_reply(state=0): | ||||
| @@ -140,5 +144,4 @@ def create_view_event_from_tensor_history(tensor_history): | |||||
| def is_scope_type(node_type): | def is_scope_type(node_type): | ||||
| """Judge whether the type is scope type.""" | """Judge whether the type is scope type.""" | ||||
| scope_types = [NodeTypeEnum.NAME_SCOPE.value, NodeTypeEnum.AGGREGATION_SCOPE.value] | |||||
| return node_type in scope_types | |||||
| return node_type.endswith('scope') | |||||
| @@ -15,7 +15,7 @@ | |||||
| """Implement the debugger data cache manager.""" | """Implement the debugger data cache manager.""" | ||||
| import sys | import sys | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.common.utils import Streams | from mindinsight.debugger.common.utils import Streams | ||||
| from mindinsight.debugger.stream_handler import EventHandler, MetadataHandler, GraphHandler, \ | from mindinsight.debugger.stream_handler import EventHandler, MetadataHandler, GraphHandler, \ | ||||
| TensorHandler, WatchpointHandler, WatchpointHitHandler | TensorHandler, WatchpointHandler, WatchpointHitHandler | ||||
| @@ -15,11 +15,13 @@ | |||||
| """Implement the debugger grpc server.""" | """Implement the debugger grpc server.""" | ||||
| from functools import wraps | from functools import wraps | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| import mindinsight.conditionmgr.recommender | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.common.utils import get_ack_reply, ServerStatus, \ | from mindinsight.debugger.common.utils import get_ack_reply, ServerStatus, \ | ||||
| Streams | |||||
| Streams, RunLevel | |||||
| from mindinsight.debugger.proto import debug_grpc_pb2_grpc as grpc_server_base | from mindinsight.debugger.proto import debug_grpc_pb2_grpc as grpc_server_base | ||||
| from mindinsight.debugger.proto.ms_graph_pb2 import GraphProto | from mindinsight.debugger.proto.ms_graph_pb2 import GraphProto | ||||
| from mindinsight.conditionmgr.condition import ConditionContext | |||||
| def debugger_wrap(func): | def debugger_wrap(func): | ||||
| @@ -39,7 +41,7 @@ def debugger_wrap(func): | |||||
| class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | ||||
| """The grpc server used to interactive with grpc client.""" | """The grpc server used to interactive with grpc client.""" | ||||
| def __init__(self, cache_store): | |||||
| def __init__(self, cache_store, condition_mgr): | |||||
| """ | """ | ||||
| Initialize. | Initialize. | ||||
| @@ -48,6 +50,7 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||||
| """ | """ | ||||
| cache_store.initialize() | cache_store.initialize() | ||||
| self._cache_store = cache_store | self._cache_store = cache_store | ||||
| self._condition_mgr = condition_mgr | |||||
| # the next position of command queue to be queried | # the next position of command queue to be queried | ||||
| self._pos = None | self._pos = None | ||||
| # the status of grpc server, the value is in ServerStatus | # the status of grpc server, the value is in ServerStatus | ||||
| @@ -66,7 +69,7 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||||
| self._status = ServerStatus.PENDING | self._status = ServerStatus.PENDING | ||||
| self._old_run_cmd = {} | self._old_run_cmd = {} | ||||
| self._received_view_cmd = {} | self._received_view_cmd = {} | ||||
| self._received_hit = False | |||||
| self._received_hit = [] | |||||
| self._cache_store.clean() | self._cache_store.clean() | ||||
| @debugger_wrap | @debugger_wrap | ||||
| @@ -90,25 +93,46 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||||
| reply = get_ack_reply(1) | reply = get_ack_reply(1) | ||||
| log.warning("Failed to get command event.") | log.warning("Failed to get command event.") | ||||
| else: | else: | ||||
| log.info("Reply to WaitCMD: %s", reply) | |||||
| log.debug("Reply to WaitCMD: %s", reply) | |||||
| return reply | return reply | ||||
| def _add_predefined_watchpoints(self, condition_context): | |||||
| """Add predefined watchpoints.""" | |||||
| log.debug("Add predefined watchpoints.") | |||||
| graph_stream = self._cache_store.get_stream_handler(Streams.GRAPH) | |||||
| watchpoints = mindinsight.conditionmgr.recommender.recommend_watchpoints(self._condition_mgr, graph_stream, | |||||
| condition_context) | |||||
| watch_point_stream_handler = self._cache_store.get_stream_handler(Streams.WATCHPOINT) | |||||
| for watchpoint in watchpoints: | |||||
| watch_point_stream_handler.create_watchpoint( | |||||
| watch_condition=watchpoint.get_watch_condition_dict(), | |||||
| watch_nodes=watchpoint.watch_nodes, | |||||
| condition_mgr=self._condition_mgr | |||||
| ) | |||||
| def _pre_process(self, request): | def _pre_process(self, request): | ||||
| """Pre-process before dealing with command.""" | """Pre-process before dealing with command.""" | ||||
| metadata_stream = self._cache_store.get_stream_handler(Streams.METADATA) | metadata_stream = self._cache_store.get_stream_handler(Streams.METADATA) | ||||
| watchpoint_stream = self._cache_store.get_stream_handler(Streams.WATCHPOINT) | |||||
| is_new_step = metadata_stream.step < request.cur_step | is_new_step = metadata_stream.step < request.cur_step | ||||
| is_new_node = metadata_stream.full_name != request.cur_node | is_new_node = metadata_stream.full_name != request.cur_node | ||||
| # clean cache data at the beginning of new step | |||||
| # clean cache data at the beginning of new step or node has been changed. | |||||
| if is_new_step or is_new_node: | if is_new_step or is_new_node: | ||||
| self._cache_store.clean_data() | self._cache_store.clean_data() | ||||
| if is_new_step: | if is_new_step: | ||||
| self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT).clean() | |||||
| self._cache_store.get_stream_handler(Streams.TENSOR).clean_tensors(request.cur_step) | self._cache_store.get_stream_handler(Streams.TENSOR).clean_tensors(request.cur_step) | ||||
| watchpoint_stream.clean_temp_cached_names() | |||||
| # receive graph at the beginning of the training | # receive graph at the beginning of the training | ||||
| if self._status == ServerStatus.RECEIVE_GRAPH: | if self._status == ServerStatus.RECEIVE_GRAPH: | ||||
| condition_context = ConditionContext(backend=request.backend, debugger_capability=(1, 0)) | |||||
| self._add_predefined_watchpoints(condition_context) | |||||
| self._send_graph_flag(metadata_stream) | self._send_graph_flag(metadata_stream) | ||||
| # receive new metadata | # receive new metadata | ||||
| if is_new_step or is_new_node: | if is_new_step or is_new_node: | ||||
| self._update_metadata(metadata_stream, request) | self._update_metadata(metadata_stream, request) | ||||
| # save the full name of the node which MindSpore has stored the tensor. | |||||
| watchpoint_stream.add_temp_cached_name(request.cur_node) | |||||
| self._send_received_tensor_tag() | self._send_received_tensor_tag() | ||||
| self._send_watchpoint_hit_flag() | self._send_watchpoint_hit_flag() | ||||
| @@ -139,9 +163,14 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||||
| """ | """ | ||||
| # put new metadata into cache | # put new metadata into cache | ||||
| metadata_stream.put(metadata_proto) | metadata_stream.put(metadata_proto) | ||||
| cur_node = self._cache_store.get_stream_handler(Streams.GRAPH).get_node_name_by_full_name( | |||||
| metadata_proto.cur_node) if metadata_proto.cur_node else '' | |||||
| # update current node name and graph name | |||||
| graph_stream = self._cache_store.get_stream_handler(Streams.GRAPH) | |||||
| full_name = metadata_proto.cur_node | |||||
| graph_name = graph_stream.get_graph_id_by_full_name( | |||||
| full_name) if full_name else metadata_stream.graph_name | |||||
| cur_node = graph_stream.get_node_name_by_full_name(full_name, graph_name) | |||||
| metadata_stream.node_name = cur_node | metadata_stream.node_name = cur_node | ||||
| metadata_stream.graph_name = graph_name | |||||
| metadata = metadata_stream.get() | metadata = metadata_stream.get() | ||||
| self._cache_store.put_data(metadata) | self._cache_store.put_data(metadata) | ||||
| log.debug("Put new metadata into data queue.") | log.debug("Put new metadata into data queue.") | ||||
| @@ -151,7 +180,7 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||||
| node_name = self._received_view_cmd.get('node_name') | node_name = self._received_view_cmd.get('node_name') | ||||
| if not node_name or self._received_view_cmd.get('wait_for_tensor'): | if not node_name or self._received_view_cmd.get('wait_for_tensor'): | ||||
| return | return | ||||
| metadata = self._cache_store.get_stream_handler(Streams.METADATA).get() | |||||
| metadata = self._cache_store.get_stream_handler(Streams.METADATA).get(['step', 'state']) | |||||
| ret = {'receive_tensor': {'node_name': node_name}} | ret = {'receive_tensor': {'node_name': node_name}} | ||||
| ret.update(metadata) | ret.update(metadata) | ||||
| self._cache_store.put_data(ret) | self._cache_store.put_data(ret) | ||||
| @@ -161,9 +190,12 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||||
| def _send_watchpoint_hit_flag(self): | def _send_watchpoint_hit_flag(self): | ||||
| """Send Watchpoint hit flag.""" | """Send Watchpoint hit flag.""" | ||||
| watchpoint_hit_stream = self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT) | watchpoint_hit_stream = self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT) | ||||
| if watchpoint_hit_stream.empty or not self._received_hit: | |||||
| if not self._received_hit: | |||||
| return | return | ||||
| self._received_hit = False | |||||
| watchpoint_hits = self._received_hit | |||||
| self._received_hit = [] | |||||
| for watchpoint_hit in watchpoint_hits: | |||||
| watchpoint_hit_stream.put(watchpoint_hit) | |||||
| watchpoint_hits_info = watchpoint_hit_stream.get() | watchpoint_hits_info = watchpoint_hit_stream.get() | ||||
| self._cache_store.put_data(watchpoint_hits_info) | self._cache_store.put_data(watchpoint_hits_info) | ||||
| log.debug("Send the watchpoint hits to DataQueue.\nSend the reply.") | log.debug("Send the watchpoint hits to DataQueue.\nSend the reply.") | ||||
| @@ -187,7 +219,6 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||||
| event = self._deal_with_left_continue_step(left_step_count) | event = self._deal_with_left_continue_step(left_step_count) | ||||
| else: | else: | ||||
| event = self._deal_with_left_continue_node(node_name) | event = self._deal_with_left_continue_node(node_name) | ||||
| self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT).clean() | |||||
| log.debug("Send old RunCMD. Clean watchpoint hit.") | log.debug("Send old RunCMD. Clean watchpoint hit.") | ||||
| return event | return event | ||||
| @@ -260,7 +291,10 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||||
| event = self._deal_with_run_cmd(event) | event = self._deal_with_run_cmd(event) | ||||
| elif event.HasField('exit'): | elif event.HasField('exit'): | ||||
| self._cache_store.clean() | self._cache_store.clean() | ||||
| log.info("Clean cache for exit cmd.") | |||||
| log.debug("Clean cache for exit cmd.") | |||||
| else: | |||||
| self._cache_store.get_stream_handler(Streams.WATCHPOINT).clean_cache_set_cmd(event.set_cmd) | |||||
| log.debug("get set cmd.") | |||||
| return event | return event | ||||
| @@ -294,7 +328,9 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||||
| elif run_cmd.node_name: | elif run_cmd.node_name: | ||||
| self._old_run_cmd['node_name'] = run_cmd.node_name | self._old_run_cmd['node_name'] = run_cmd.node_name | ||||
| run_cmd.node_name = '' | run_cmd.node_name = '' | ||||
| self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT).clean() | |||||
| # clean watchpoint hit cache | |||||
| if run_cmd.run_level == RunLevel.RECHECK.value: | |||||
| self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT).clean() | |||||
| log.debug("Receive RunCMD. Clean watchpoint hit cache.") | log.debug("Receive RunCMD. Clean watchpoint hit cache.") | ||||
| return event | return event | ||||
| @@ -330,9 +366,34 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||||
| for chunk in request_iterator: | for chunk in request_iterator: | ||||
| serial_graph += chunk.buffer | serial_graph += chunk.buffer | ||||
| graph = GraphProto.FromString(serial_graph) | graph = GraphProto.FromString(serial_graph) | ||||
| log.debug("Deserialize the graph. Receive %s nodes", len(graph.node)) | |||||
| self._cache_store.get_stream_handler(Streams.GRAPH).put(graph) | |||||
| log.debug("Deserialize the graph %s. Receive %s nodes", graph.name, len(graph.node)) | |||||
| graph_dict = {graph.name: graph} | |||||
| self._cache_store.get_stream_handler(Streams.GRAPH).put(graph_dict) | |||||
| self._cache_store.get_stream_handler(Streams.TENSOR).put_const_vals(graph.const_vals) | self._cache_store.get_stream_handler(Streams.TENSOR).put_const_vals(graph.const_vals) | ||||
| self._cache_store.get_stream_handler(Streams.METADATA).graph_name = graph.name | |||||
| self._status = ServerStatus.RECEIVE_GRAPH | |||||
| reply = get_ack_reply() | |||||
| log.debug("Send the reply for graph.") | |||||
| return reply | |||||
| @debugger_wrap | |||||
| def SendMultiGraphs(self, request_iterator, context): | |||||
| """Send graph into DebuggerCache.""" | |||||
| log.info("Received graph.") | |||||
| serial_graph = b"" | |||||
| graph_dict = {} | |||||
| for chunk in request_iterator: | |||||
| serial_graph += chunk.buffer | |||||
| if chunk.finished: | |||||
| sub_graph = GraphProto.FromString(serial_graph) | |||||
| graph_dict[sub_graph.name] = sub_graph | |||||
| log.debug("Deserialize the graph %s. Receive %s nodes", sub_graph.name, | |||||
| len(sub_graph.node)) | |||||
| serial_graph = b"" | |||||
| self._cache_store.get_stream_handler(Streams.TENSOR).put_const_vals( | |||||
| sub_graph.const_vals) | |||||
| self._cache_store.get_stream_handler(Streams.GRAPH).put(graph_dict) | |||||
| self._status = ServerStatus.RECEIVE_GRAPH | self._status = ServerStatus.RECEIVE_GRAPH | ||||
| reply = get_ack_reply() | reply = get_ack_reply() | ||||
| log.debug("Send the reply for graph.") | log.debug("Send the reply for graph.") | ||||
| @@ -365,22 +426,30 @@ class DebuggerGrpcServer(grpc_server_base.EventListenerServicer): | |||||
| """Send watchpoint hits info DebuggerCache.""" | """Send watchpoint hits info DebuggerCache.""" | ||||
| log.info("Received WatchpointHits. Left run cmd %s change to emtpy.", self._old_run_cmd) | log.info("Received WatchpointHits. Left run cmd %s change to emtpy.", self._old_run_cmd) | ||||
| self._old_run_cmd.clear() | self._old_run_cmd.clear() | ||||
| self._received_hit = True | |||||
| watchpoint_hit_stream = self._cache_store.get_stream_handler(Streams.WATCHPOINT_HIT) | |||||
| if self._cache_store.get_stream_handler(Streams.METADATA).state == ServerStatus.RUNNING.value: | |||||
| # if the client session is running a script, all the cached command should be cleared | |||||
| # when received watchpoint_hits. | |||||
| self._cache_store.clean_command() | |||||
| # save the watchpoint_hits data | |||||
| watchpoint_hits = [] | |||||
| watchpoint_stream = self._cache_store.get_stream_handler(Streams.WATCHPOINT) | watchpoint_stream = self._cache_store.get_stream_handler(Streams.WATCHPOINT) | ||||
| graph_stream = self._cache_store.get_stream_handler(Streams.GRAPH) | graph_stream = self._cache_store.get_stream_handler(Streams.GRAPH) | ||||
| for watchpoint_hit_proto in request_iterator: | for watchpoint_hit_proto in request_iterator: | ||||
| ui_node_name = graph_stream.get_node_name_by_full_name( | |||||
| watchpoint_hit_proto.tensor.node_name) | |||||
| node_full_name = watchpoint_hit_proto.tensor.node_name | |||||
| graph_name = graph_stream.get_graph_id_by_full_name(node_full_name) | |||||
| ui_node_name = graph_stream.get_node_name_by_full_name(node_full_name, graph_name) | |||||
| log.debug("Receive watch point hit: %s", watchpoint_hit_proto) | log.debug("Receive watch point hit: %s", watchpoint_hit_proto) | ||||
| if not ui_node_name: | if not ui_node_name: | ||||
| log.info("Not support to show %s on graph.", watchpoint_hit_proto.tensor.node_name) | |||||
| log.info("Not support to show %s on graph.", node_full_name) | |||||
| continue | continue | ||||
| watchpoint_hit = { | watchpoint_hit = { | ||||
| 'tensor_proto': watchpoint_hit_proto.tensor, | 'tensor_proto': watchpoint_hit_proto.tensor, | ||||
| 'watchpoint': watchpoint_stream.get_watchpoint_by_id(watchpoint_hit_proto.id), | 'watchpoint': watchpoint_stream.get_watchpoint_by_id(watchpoint_hit_proto.id), | ||||
| 'node_name': ui_node_name | |||||
| 'node_name': ui_node_name, | |||||
| 'graph_name': graph_name | |||||
| } | } | ||||
| watchpoint_hit_stream.put(watchpoint_hit) | |||||
| watchpoint_hits.append(watchpoint_hit) | |||||
| self._received_hit = watchpoint_hits | |||||
| reply = get_ack_reply() | reply = get_ack_reply() | ||||
| return reply | return reply | ||||
| @@ -16,23 +16,26 @@ | |||||
| import signal | import signal | ||||
| from concurrent import futures | from concurrent import futures | ||||
| from threading import Thread | from threading import Thread | ||||
| import grpc | import grpc | ||||
| from mindinsight.conditionmgr.conditionmgr import ConditionMgr | |||||
| from mindinsight.conditionmgr.condition import ConditionContext, ConditionIdEnum | |||||
| from mindinsight.conf import settings | from mindinsight.conf import settings | ||||
| from mindinsight.datavisual.data_transform.graph import NodeTypeEnum | from mindinsight.datavisual.data_transform.graph import NodeTypeEnum | ||||
| from mindinsight.datavisual.utils.tools import to_float | from mindinsight.datavisual.utils.tools import to_float | ||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | ||||
| DebuggerParamTypeError, DebuggerCreateWatchPointError, DebuggerUpdateWatchPointError, \ | DebuggerParamTypeError, DebuggerCreateWatchPointError, DebuggerUpdateWatchPointError, \ | ||||
| DebuggerDeleteWatchPointError, DebuggerContinueError, DebuggerPauseError, \ | DebuggerDeleteWatchPointError, DebuggerContinueError, DebuggerPauseError, \ | ||||
| DebuggerCompareTensorError | |||||
| from mindinsight.debugger.common.log import logger as log | |||||
| DebuggerCompareTensorError, DebuggerRecheckError | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.common.utils import get_ack_reply, ServerStatus, \ | from mindinsight.debugger.common.utils import get_ack_reply, ServerStatus, \ | ||||
| create_view_event_from_tensor_history, Streams, is_scope_type, NodeBasicInfo | |||||
| create_view_event_from_tensor_history, Streams, is_scope_type, RunLevel | |||||
| from mindinsight.conditionmgr.common.utils import NodeBasicInfo | |||||
| from mindinsight.debugger.debugger_cache import DebuggerCache | from mindinsight.debugger.debugger_cache import DebuggerCache | ||||
| from mindinsight.debugger.debugger_grpc_server import DebuggerGrpcServer | from mindinsight.debugger.debugger_grpc_server import DebuggerGrpcServer | ||||
| from mindinsight.debugger.proto import debug_grpc_pb2_grpc as grpc_server_base | from mindinsight.debugger.proto import debug_grpc_pb2_grpc as grpc_server_base | ||||
| from mindinsight.debugger.proto.debug_grpc_pb2 import RunCMD | from mindinsight.debugger.proto.debug_grpc_pb2 import RunCMD | ||||
| from mindinsight.debugger.stream_operator.tensor_detail_info import TensorDetailInfo | |||||
| from mindinsight.utils.exceptions import MindInsightException | from mindinsight.utils.exceptions import MindInsightException | ||||
| from mindinsight.utils.tensor import TensorUtils, MAX_DIMENSIONS_FOR_TENSOR | from mindinsight.utils.tensor import TensorUtils, MAX_DIMENSIONS_FOR_TENSOR | ||||
| @@ -42,11 +45,26 @@ class DebuggerServer: | |||||
| def __init__(self, grpc_port=None): | def __init__(self, grpc_port=None): | ||||
| self.grpc_port = grpc_port | self.grpc_port = grpc_port | ||||
| self.condition_mgr = ConditionMgr() | |||||
| self.cache_store = DebuggerCache() | self.cache_store = DebuggerCache() | ||||
| self.grpc_server = DebuggerGrpcServer(self.cache_store) | |||||
| self.grpc_server = DebuggerGrpcServer(self.cache_store, self.condition_mgr) | |||||
| self.grpc_server_manager = None | self.grpc_server_manager = None | ||||
| self.back_server = None | self.back_server = None | ||||
| def get_conditions(self, train_id): | |||||
| """Get all default conditions""" | |||||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | |||||
| condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 0)) | |||||
| log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend) | |||||
| return self.condition_mgr.get_all(condition_context) | |||||
| def get_condition_collections(self, train_id): | |||||
| """Get default condition_collections""" | |||||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | |||||
| condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step, (1, 0)) | |||||
| log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend) | |||||
| return self.condition_mgr.get_all_collections(condition_context) | |||||
| def start(self): | def start(self): | ||||
| """Start server.""" | """Start server.""" | ||||
| grpc_port = self.grpc_port if self.grpc_port else "50051" | grpc_port = self.grpc_port if self.grpc_port else "50051" | ||||
| @@ -97,24 +115,34 @@ class DebuggerServer: | |||||
| return reply | return reply | ||||
| def search(self, name, watch_point_id=0): | |||||
| def search(self, filter_condition): | |||||
| """ | """ | ||||
| Search for single node in graph. | Search for single node in graph. | ||||
| Args: | Args: | ||||
| name (str): The name pattern. | |||||
| watch_point_id (int): The id of watchpoint. Default: 0. | |||||
| filter_condition (dict): Filter condition. | |||||
| - name (str): The name pattern. | |||||
| - graph_name (str): The graph name. | |||||
| - watch_point_id (int): The id of watchpoint. Default: 0. | |||||
| - node_category (str): The node_category. Default: None | |||||
| Returns: | Returns: | ||||
| dict, the searched nodes. | dict, the searched nodes. | ||||
| """ | """ | ||||
| log.info("receive search request for node:%s, in watchpoint:%d", name, watch_point_id) | |||||
| log.info("receive search request with filter_condition: %s", filter_condition) | |||||
| # validate watchpoint id | |||||
| watch_point_id = filter_condition.pop('watch_point_id', 0) | |||||
| watchpoint_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT) | watchpoint_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT) | ||||
| watchpoint_stream.validate_watchpoint_id(watch_point_id) | watchpoint_stream.validate_watchpoint_id(watch_point_id) | ||||
| # validate and update graph name | |||||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | ||||
| graph = graph_stream.search_nodes(name) | |||||
| graph_name = graph_stream.validate_graph_name(filter_condition.get('graph_name')) | |||||
| filter_condition['graph_name'] = graph_name | |||||
| # get searched graph | |||||
| graph = graph_stream.search_nodes(filter_condition) | |||||
| # add watched label to graph | # add watched label to graph | ||||
| watchpoint_stream.set_watch_nodes(graph, graph_stream, watch_point_id) | |||||
| watchpoint_stream.set_watch_nodes(graph, graph_stream, watch_point_id, graph_name) | |||||
| return graph | return graph | ||||
| def tensor_comparisons(self, name, shape, detail='data', tolerance='0'): | def tensor_comparisons(self, name, shape, detail='data', tolerance='0'): | ||||
| @@ -150,7 +178,8 @@ class DebuggerServer: | |||||
| if node_type == NodeTypeEnum.PARAMETER.value: | if node_type == NodeTypeEnum.PARAMETER.value: | ||||
| reply = tensor_stream.get_tensors_diff(tensor_name, parsed_shape, tolerance) | reply = tensor_stream.get_tensors_diff(tensor_name, parsed_shape, tolerance) | ||||
| else: | else: | ||||
| raise DebuggerParamValueError("The node type must be parameter, but got {}.".format(node_type)) | |||||
| raise DebuggerParamValueError( | |||||
| "The node type must be parameter, but got {}.".format(node_type)) | |||||
| return reply | return reply | ||||
| def retrieve(self, mode, filter_condition=None): | def retrieve(self, mode, filter_condition=None): | ||||
| @@ -196,10 +225,13 @@ class DebuggerServer: | |||||
| self.cache_store.clean_data() | self.cache_store.clean_data() | ||||
| log.info("Clean data queue cache when retrieve all request.") | log.info("Clean data queue cache when retrieve all request.") | ||||
| result = {} | result = {} | ||||
| for stream in [Streams.METADATA, Streams.GRAPH, Streams.WATCHPOINT]: | |||||
| for stream in [Streams.METADATA, Streams.GRAPH]: | |||||
| sub_res = self.cache_store.get_stream_handler(stream).get() | sub_res = self.cache_store.get_stream_handler(stream).get() | ||||
| result.update(sub_res) | result.update(sub_res) | ||||
| sub_res = self._hide_parameters_for_ui() | |||||
| result.update(sub_res) | |||||
| return result | return result | ||||
| def _retrieve_node(self, filter_condition): | def _retrieve_node(self, filter_condition): | ||||
| @@ -210,10 +242,9 @@ class DebuggerServer: | |||||
| filter_condition (dict): Filter condition. | filter_condition (dict): Filter condition. | ||||
| - name (str): The name of single node. | - name (str): The name of single node. | ||||
| - graph_name (str): The relative graph_name of the node. | |||||
| - single_node (bool): If False, return the sub-layer of single node. If True, return | - single_node (bool): If False, return the sub-layer of single node. If True, return | ||||
| the node list from root node to single node. | the node list from root node to single node. | ||||
| - watch_point_id (int): The id of watchpoint. | - watch_point_id (int): The id of watchpoint. | ||||
| Returns: | Returns: | ||||
| @@ -222,9 +253,13 @@ class DebuggerServer: | |||||
| log.debug("Retrieve node %s.", filter_condition) | log.debug("Retrieve node %s.", filter_condition) | ||||
| # validate node name | # validate node name | ||||
| node_name = filter_condition.get('name') | node_name = filter_condition.get('name') | ||||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||||
| graph_name = graph_stream.validate_graph_name(filter_condition.get('graph_name')) | |||||
| if node_name: | if node_name: | ||||
| self.cache_store.get_stream_handler(Streams.GRAPH).get_node_type(node_name) | |||||
| # validate node name | |||||
| graph_stream.get_node_type(node_name, graph_name) | |||||
| filter_condition['single_node'] = bool(filter_condition.get('single_node')) | filter_condition['single_node'] = bool(filter_condition.get('single_node')) | ||||
| filter_condition['graph_name'] = graph_name | |||||
| reply = self._get_nodes_info(filter_condition) | reply = self._get_nodes_info(filter_condition) | ||||
| return reply | return reply | ||||
| @@ -236,10 +271,9 @@ class DebuggerServer: | |||||
| filter_condition (dict): The filter condition. | filter_condition (dict): The filter condition. | ||||
| - name (str): The node name. | - name (str): The node name. | ||||
| - graph_name (str): The relative graph_name of the node. | |||||
| - single_node (bool): If False, return the sub-layer of single node. If True, return | - single_node (bool): If False, return the sub-layer of single node. If True, return | ||||
| the node list from root node to single node. | the node list from root node to single node. | ||||
| - watch_point_id (int): The id of watchpoint. | - watch_point_id (int): The id of watchpoint. | ||||
| Returns: | Returns: | ||||
| @@ -254,15 +288,16 @@ class DebuggerServer: | |||||
| reply = graph_stream.get(filter_condition) | reply = graph_stream.get(filter_condition) | ||||
| graph = reply.get('graph') | graph = reply.get('graph') | ||||
| # add watched label to graph | # add watched label to graph | ||||
| watchpoint_stream.set_watch_nodes(graph, graph_stream, watch_point_id) | |||||
| watchpoint_stream.set_watch_nodes(graph, graph_stream, watch_point_id, filter_condition.get('graph_name')) | |||||
| return reply | return reply | ||||
| def retrieve_tensor_history(self, node_name): | |||||
| def retrieve_tensor_history(self, node_name, graph_name=None): | |||||
| """ | """ | ||||
| Retrieve tensor history for leaf node. | Retrieve tensor history for leaf node. | ||||
| Args: | Args: | ||||
| node_name (str): The name of leaf node. | node_name (str): The name of leaf node. | ||||
| graph_name (str): The graph name. Default: None. | |||||
| Returns: | Returns: | ||||
| dict, the tensor history and metadata. | dict, the tensor history and metadata. | ||||
| @@ -271,39 +306,31 @@ class DebuggerServer: | |||||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | ||||
| if metadata_stream.state == ServerStatus.PENDING.value: | if metadata_stream.state == ServerStatus.PENDING.value: | ||||
| log.info("The backend is in pending status.") | log.info("The backend is in pending status.") | ||||
| return metadata_stream.get() | |||||
| self._validate_leaf_name(node_name) | |||||
| res = self._get_tensor_history(node_name) | |||||
| return metadata_stream.get(['state', 'step']) | |||||
| res = self._get_tensor_history(node_name, graph_name) | |||||
| return res | return res | ||||
| def _validate_leaf_name(self, node_name): | |||||
| """Validate if the node is a leaf node.""" | |||||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||||
| node_type = graph_stream.get_node_type(node_name) | |||||
| if is_scope_type(node_type): | |||||
| log.error("Scope type node has no tensor history.") | |||||
| raise DebuggerParamValueError("Invalid leaf node name.") | |||||
| def _get_tensor_history(self, node_name): | |||||
| def _get_tensor_history(self, node_name, graph_name=None): | |||||
| """ | """ | ||||
| Get tensor history for single node. | Get tensor history for single node. | ||||
| Args: | Args: | ||||
| node_name (str): The name of leaf node. | node_name (str): The name of leaf node. | ||||
| graph_name (str): The graph name. Default: None. | |||||
| Returns: | Returns: | ||||
| dict, the tensor history and metadata. | dict, the tensor history and metadata. | ||||
| """ | """ | ||||
| # get basic tensor history | # get basic tensor history | ||||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | ||||
| tensor_history = graph_stream.get_tensor_history(node_name) | |||||
| tensor_history = graph_stream.get_tensor_history(node_name, graph_name) | |||||
| # add tensor value for tensor history | # add tensor value for tensor history | ||||
| self._add_tensor_value_for_tensor_history(tensor_history, node_name) | self._add_tensor_value_for_tensor_history(tensor_history, node_name) | ||||
| # add hit label for tensor history | # add hit label for tensor history | ||||
| watchpoint_hit_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT_HIT) | watchpoint_hit_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT_HIT) | ||||
| watchpoint_hit_stream.update_tensor_history(tensor_history) | watchpoint_hit_stream.update_tensor_history(tensor_history) | ||||
| # add metadata | # add metadata | ||||
| metadata = self.cache_store.get_stream_handler(Streams.METADATA).get() | |||||
| metadata = self.cache_store.get_stream_handler(Streams.METADATA).get(['state', 'step']) | |||||
| tensor_history.update(metadata) | tensor_history.update(metadata) | ||||
| return tensor_history | return tensor_history | ||||
| @@ -325,28 +352,30 @@ class DebuggerServer: | |||||
| self.cache_store.put_command({'view_cmd': view_cmd, 'node_name': node_name}) | self.cache_store.put_command({'view_cmd': view_cmd, 'node_name': node_name}) | ||||
| log.debug("Send view cmd.") | log.debug("Send view cmd.") | ||||
| def retrieve_tensor_value(self, name, detail, shape): | |||||
| def retrieve_tensor_value(self, name, detail, shape, graph_name=None, prev=False): | |||||
| """Retrieve the tensor value.""" | """Retrieve the tensor value.""" | ||||
| log.info("Retrieve tensor value: name: %s, detail: %s, shape: %s", name, detail, shape) | log.info("Retrieve tensor value: name: %s, detail: %s, shape: %s", name, detail, shape) | ||||
| self.validate_tensor_param(name, detail) | self.validate_tensor_param(name, detail) | ||||
| # Limit to query max two dimensions for tensor in table view. | # Limit to query max two dimensions for tensor in table view. | ||||
| parsed_shape = TensorUtils.parse_shape(shape, limit=MAX_DIMENSIONS_FOR_TENSOR) | parsed_shape = TensorUtils.parse_shape(shape, limit=MAX_DIMENSIONS_FOR_TENSOR) | ||||
| node_type, tensor_name = self._get_tensor_name_and_type_by_ui_name(name) | |||||
| node_type, tensor_name = self._get_tensor_name_and_type_by_ui_name(name, graph_name) | |||||
| reply = self.cache_store.get_stream_handler(Streams.TENSOR).get( | reply = self.cache_store.get_stream_handler(Streams.TENSOR).get( | ||||
| {'name': tensor_name, | {'name': tensor_name, | ||||
| 'node_type': node_type, | 'node_type': node_type, | ||||
| 'shape': parsed_shape} | |||||
| 'shape': parsed_shape, | |||||
| 'prev': prev} | |||||
| ) | ) | ||||
| reply['tensor_value']['name'] = name | reply['tensor_value']['name'] = name | ||||
| return reply | return reply | ||||
| def _get_tensor_name_and_type_by_ui_name(self, name): | |||||
| def _get_tensor_name_and_type_by_ui_name(self, name, graph_name=None): | |||||
| """ | """ | ||||
| Get inner tensor name and type by UI name. | Get inner tensor name and type by UI name. | ||||
| Args: | Args: | ||||
| name (str): Node name shown in UI. | name (str): Node name shown in UI. | ||||
| graph_name (Union[str, None]): The graph name, default is: None. | |||||
| Returns: | Returns: | ||||
| str, full name of tensor. | str, full name of tensor. | ||||
| @@ -354,8 +383,9 @@ class DebuggerServer: | |||||
| """ | """ | ||||
| node_name, slot = name.rsplit(':', 1) | node_name, slot = name.rsplit(':', 1) | ||||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | ||||
| node_type = graph_stream.get_node_type(node_name) | |||||
| full_name = graph_stream.get_full_name(node_name) | |||||
| graph_name = graph_name if graph_name else graph_stream.get_graph_id_by_name(node_name) | |||||
| node_type = graph_stream.get_node_type(node_name, graph_name) | |||||
| full_name = graph_stream.get_full_name(node_name, graph_name) | |||||
| tensor_name = full_name + ':' + slot | tensor_name = full_name + ':' + slot | ||||
| return node_type, tensor_name | return node_type, tensor_name | ||||
| @@ -379,9 +409,7 @@ class DebuggerServer: | |||||
| filter_condition (dict): Filter condition. | filter_condition (dict): Filter condition. | ||||
| - watch_point_id (int): The id of watchpoint. If not given, return all watchpoints. | - watch_point_id (int): The id of watchpoint. If not given, return all watchpoints. | ||||
| - name (str): The name of single node. | - name (str): The name of single node. | ||||
| - single_node (bool): If False, return the sub-layer of single node. If True, return | - single_node (bool): If False, return the sub-layer of single node. If True, return | ||||
| the node list from root node to single node. | the node list from root node to single node. | ||||
| @@ -390,7 +418,7 @@ class DebuggerServer: | |||||
| """ | """ | ||||
| watchpoint_id = filter_condition.get('watch_point_id', 0) | watchpoint_id = filter_condition.get('watch_point_id', 0) | ||||
| if not watchpoint_id: | if not watchpoint_id: | ||||
| reply = self.cache_store.get_stream_handler(Streams.WATCHPOINT).get() | |||||
| reply = self._hide_parameters_for_ui() | |||||
| log.debug("Get condition of watchpoints.") | log.debug("Get condition of watchpoints.") | ||||
| else: | else: | ||||
| reply = self._retrieve_node(filter_condition) | reply = self._retrieve_node(filter_condition) | ||||
| @@ -406,7 +434,6 @@ class DebuggerServer: | |||||
| filter_condition (dict): Filter condition. | filter_condition (dict): Filter condition. | ||||
| - name (str): The name of single node. | - name (str): The name of single node. | ||||
| - single_node (bool): If False, return the sub-layer of single node. If True, return | - single_node (bool): If False, return the sub-layer of single node. If True, return | ||||
| the node list from root node to single node. | the node list from root node to single node. | ||||
| @@ -418,34 +445,48 @@ class DebuggerServer: | |||||
| if node_name is None: | if node_name is None: | ||||
| reply = self.cache_store.get_stream_handler(Streams.WATCHPOINT_HIT).get() | reply = self.cache_store.get_stream_handler(Streams.WATCHPOINT_HIT).get() | ||||
| return reply | return reply | ||||
| # get tensor history and graph of the hit node. | |||||
| self._validate_leaf_name(node_name) | |||||
| # get tensor history | # get tensor history | ||||
| reply = self._get_tensor_history(node_name) | reply = self._get_tensor_history(node_name) | ||||
| log.debug("Get tensor history for watchpoint hit node.") | log.debug("Get tensor history for watchpoint hit node.") | ||||
| # get single graph | # get single graph | ||||
| if filter_condition.get('single_node'): | if filter_condition.get('single_node'): | ||||
| graph_name = self.cache_store.get_stream_handler(Streams.GRAPH).validate_graph_name( | |||||
| filter_condition.get('graph_name')) | |||||
| filter_condition['graph_name'] = graph_name | |||||
| graph = self._get_nodes_info(filter_condition) | graph = self._get_nodes_info(filter_condition) | ||||
| reply.update(graph) | reply.update(graph) | ||||
| log.debug("Get tensor history for watchpoint hit node.") | log.debug("Get tensor history for watchpoint hit node.") | ||||
| return reply | return reply | ||||
| def create_watchpoint(self, watch_condition, watch_nodes=None, watch_point_id=None): | |||||
| def create_watchpoint(self, watch_condition, watch_nodes=None, watch_point_id=None, search_pattern=None, | |||||
| graph_name=None): | |||||
| """ | """ | ||||
| Create watchpoint. | Create watchpoint. | ||||
| Args: | Args: | ||||
| watch_condition (dict): The watch condition. | |||||
| - condition (str): Accept `INF` or `NAN`. | |||||
| - param (list[float]): Not defined yet. | |||||
| watch_condition (dict): The watch condition. The format is like: | |||||
| { | |||||
| "id": "tensor_too_large", | |||||
| "params": [ | |||||
| { | |||||
| "name": "abs_mean_gt", | |||||
| "disable": false, | |||||
| "value": 1.1 | |||||
| } | |||||
| ] | |||||
| } | |||||
| - id (str): Id of condition. | |||||
| - params (list[dict]): The list of param for this condition. | |||||
| watch_nodes (list[str]): The list of node names. | watch_nodes (list[str]): The list of node names. | ||||
| watch_point_id (int): The id of watchpoint. | watch_point_id (int): The id of watchpoint. | ||||
| search_pattern (dict): The search pattern. Default: None. | |||||
| graph_name (str): The relative graph_name of the watched node. Default: None. | |||||
| Returns: | Returns: | ||||
| dict, the id of new watchpoint. | |||||
| dict, the id of new watchpoint and metadata info. | |||||
| """ | """ | ||||
| log.info("Received create watchpoint request. WatchCondition: %s", watch_condition) | log.info("Received create watchpoint request. WatchCondition: %s", watch_condition) | ||||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | ||||
| @@ -453,17 +494,28 @@ class DebuggerServer: | |||||
| log.error("Failed to create watchpoint as the MindSpore is not in waiting state.") | log.error("Failed to create watchpoint as the MindSpore is not in waiting state.") | ||||
| raise DebuggerCreateWatchPointError( | raise DebuggerCreateWatchPointError( | ||||
| "Failed to create watchpoint as the MindSpore is not in waiting state.") | "Failed to create watchpoint as the MindSpore is not in waiting state.") | ||||
| if metadata_stream.backend == 'GPU' and watch_condition.get('condition') == 'OVERFLOW': | |||||
| log.error("GPU doesn't support OVERFLOW watch condition.") | |||||
| raise DebuggerParamValueError("GPU doesn't support OVERFLOW watch condition.") | |||||
| if metadata_stream.backend == 'GPU' and watch_condition.get('id') in ( | |||||
| ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value, ConditionIdEnum.OPERATOR_OVERFLOW.value): | |||||
| log.error("GPU doesn't support overflow watch condition.") | |||||
| raise DebuggerParamValueError("GPU doesn't support overflow watch condition.") | |||||
| if metadata_stream.backend == 'Ascend' and watch_condition.get('id') == ConditionIdEnum.NAN.value: | |||||
| log.error("Ascend doesn't support nan watch condition.") | |||||
| raise DebuggerParamValueError("Ascend doesn't support nan watch condition.") | |||||
| watch_nodes = self._get_node_basic_infos(watch_nodes) | |||||
| watch_point_id = self.cache_store.get_stream_handler(Streams.WATCHPOINT).create_watchpoint( | |||||
| watch_condition, watch_nodes, watch_point_id) | |||||
| watch_nodes = self._get_watch_node_with_basic_info( | |||||
| node_names=watch_nodes, search_pattern=search_pattern, graph_name=graph_name) | |||||
| watchpoint_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT) | |||||
| watch_point_id = watchpoint_stream.create_watchpoint( | |||||
| self.condition_mgr, watch_condition, watch_nodes, watch_point_id) | |||||
| log.info("Create watchpoint %d", watch_point_id) | log.info("Create watchpoint %d", watch_point_id) | ||||
| return {'id': watch_point_id} | |||||
| def update_watchpoint(self, watch_point_id, watch_nodes, mode, name=None): | |||||
| metadata_stream.enable_recheck = watchpoint_stream.is_recheckable(metadata_stream.backend) | |||||
| res = metadata_stream.get(['state', 'enable_recheck']) | |||||
| res['id'] = watch_point_id | |||||
| return res | |||||
| def update_watchpoint(self, watch_point_id, watch_nodes, mode, search_pattern=None, graph_name=None): | |||||
| """ | """ | ||||
| Update watchpoint. | Update watchpoint. | ||||
| @@ -472,13 +524,14 @@ class DebuggerServer: | |||||
| watch_nodes (list[str]): The list of node names. | watch_nodes (list[str]): The list of node names. | ||||
| mode (int): The update operator on nodes. 0 for remove nodes from watch nodes. | mode (int): The update operator on nodes. 0 for remove nodes from watch nodes. | ||||
| 1 for add nodes to watch nodes. | 1 for add nodes to watch nodes. | ||||
| name (str): The search name. Default: None. | |||||
| search_pattern (dict): The search pattern. Default: None. | |||||
| graph_name (str): The relative graph_name of the watched node. Default: None. | |||||
| Returns: | Returns: | ||||
| dict, empty response. | |||||
| dict, the metadata info. | |||||
| """ | """ | ||||
| if self.cache_store.get_stream_handler( | |||||
| Streams.METADATA).state != ServerStatus.WAITING.value: | |||||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | |||||
| if metadata_stream.state != ServerStatus.WAITING.value: | |||||
| log.error("Failed to update watchpoint as the MindSpore is not in waiting state.") | log.error("Failed to update watchpoint as the MindSpore is not in waiting state.") | ||||
| raise DebuggerUpdateWatchPointError( | raise DebuggerUpdateWatchPointError( | ||||
| "Failed to update watchpoint as the MindSpore is not in waiting state." | "Failed to update watchpoint as the MindSpore is not in waiting state." | ||||
| @@ -489,22 +542,40 @@ class DebuggerServer: | |||||
| if not watch_nodes or not watch_point_id: | if not watch_nodes or not watch_point_id: | ||||
| log.error("Invalid parameter for update watchpoint.") | log.error("Invalid parameter for update watchpoint.") | ||||
| raise DebuggerParamValueError("Invalid parameter for update watchpoint.") | raise DebuggerParamValueError("Invalid parameter for update watchpoint.") | ||||
| # update watch node | |||||
| if name is not None: | |||||
| watch_nodes = self._get_watch_nodes_by_search(watch_nodes) | |||||
| elif mode == 1: | |||||
| watch_nodes = self._get_node_basic_infos(watch_nodes) | |||||
| # get node basic info for watch nodes | |||||
| watch_nodes = self._get_watch_node_with_basic_info(watch_nodes, search_pattern, graph_name) | |||||
| watchpoint_stream.update_watchpoint(watch_point_id, watch_nodes, mode) | watchpoint_stream.update_watchpoint(watch_point_id, watch_nodes, mode) | ||||
| metadata_stream.enable_recheck = watchpoint_stream.is_recheckable(metadata_stream.backend) | |||||
| log.info("Update watchpoint with id: %d", watch_point_id) | log.info("Update watchpoint with id: %d", watch_point_id) | ||||
| return {} | |||||
| return metadata_stream.get(['state', 'enable_recheck']) | |||||
| def _get_watch_nodes_by_search(self, watch_nodes): | |||||
| def _get_watch_node_with_basic_info(self, node_names, search_pattern=None, graph_name=None): | |||||
| """ | |||||
| Get watch node with basic info. | |||||
| Args: | |||||
| node_names (list[str]): A list of node names. | |||||
| search_pattern (dict): Get watch node with search pattern. Default: None | |||||
| graph_name (str): The relative graph_name of the watched node. Default: None. | |||||
| Returns: | |||||
| list[NodeBasicInfo], a list of node basic infos. | |||||
| """ | |||||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||||
| graph_name = graph_stream.validate_graph_name(graph_name) | |||||
| if search_pattern is not None: | |||||
| watch_nodes = self._get_watch_nodes_by_search(node_names, search_pattern, graph_name) | |||||
| else: | |||||
| watch_nodes = self._get_node_basic_infos(node_names, graph_name=graph_name) | |||||
| return watch_nodes | |||||
| def _get_watch_nodes_by_search(self, watch_nodes, search_pattern, graph_name): | |||||
| """Get watched leaf nodes by search name.""" | """Get watched leaf nodes by search name.""" | ||||
| watched_leaf_nodes = [] | watched_leaf_nodes = [] | ||||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | ||||
| new_pattern = {'graph_name': graph_name}.update(search_pattern) | |||||
| for search_name in watch_nodes: | for search_name in watch_nodes: | ||||
| search_nodes = graph_stream.get_searched_node_list() | |||||
| search_nodes = graph_stream.get_searched_node_list(new_pattern) | |||||
| search_node_names = [ | search_node_names = [ | ||||
| NodeBasicInfo(name=node.name, full_name=node.full_name, type=node.type) | NodeBasicInfo(name=node.name, full_name=node.full_name, type=node.type) | ||||
| for node in search_nodes | for node in search_nodes | ||||
| @@ -515,41 +586,48 @@ class DebuggerServer: | |||||
| return watched_leaf_nodes | return watched_leaf_nodes | ||||
| def delete_watchpoint(self, watch_point_id): | |||||
| def delete_watchpoint(self, watch_point_id=None): | |||||
| """ | """ | ||||
| Delete watchpoint. | Delete watchpoint. | ||||
| Args: | Args: | ||||
| watch_point_id (int): The id of watchpoint. | |||||
| watch_point_id (Union[None, int]): The id of watchpoint. | |||||
| If None, delete all watchpoints. Default: None. | |||||
| Returns: | Returns: | ||||
| dict, empty response. | |||||
| dict, the metadata info. | |||||
| """ | """ | ||||
| if self.cache_store.get_stream_handler( | |||||
| Streams.METADATA).state != ServerStatus.WAITING.value: | |||||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | |||||
| if metadata_stream.state != ServerStatus.WAITING.value: | |||||
| log.error("Failed to delete watchpoint as the MindSpore is not in waiting state.") | log.error("Failed to delete watchpoint as the MindSpore is not in waiting state.") | ||||
| raise DebuggerDeleteWatchPointError( | raise DebuggerDeleteWatchPointError( | ||||
| "Failed to delete watchpoint as the MindSpore is not in waiting state." | "Failed to delete watchpoint as the MindSpore is not in waiting state." | ||||
| ) | ) | ||||
| self.cache_store.get_stream_handler(Streams.WATCHPOINT).delete_watchpoint(watch_point_id) | |||||
| log.info("Delete watchpoint with id: %d", watch_point_id) | |||||
| return {} | |||||
| watchpoint_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT) | |||||
| watchpoint_stream.delete_watchpoint(watch_point_id) | |||||
| metadata_stream.enable_recheck = watchpoint_stream.is_recheckable() | |||||
| log.info("Delete watchpoint with id: %s", watch_point_id) | |||||
| return metadata_stream.get(['state', 'enable_recheck']) | |||||
| def _get_node_basic_infos(self, node_names, graph_name=None): | |||||
| """ | |||||
| Get node info according to node names. | |||||
| def _get_node_basic_infos(self, node_names): | |||||
| """Get node info according to node names.""" | |||||
| Args: | |||||
| node_names (list[str]): A list of node names. | |||||
| graph_name (str): The relative graph_name of the watched node. Default: None. | |||||
| Returns: | |||||
| list[NodeBasicInfo], a list of basic node infos. | |||||
| """ | |||||
| if not node_names: | if not node_names: | ||||
| return [] | return [] | ||||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | ||||
| node_infos = [] | node_infos = [] | ||||
| for node_name in node_names: | for node_name in node_names: | ||||
| node_type = graph_stream.get_node_type(node_name) | |||||
| if node_type == NodeTypeEnum.AGGREGATION_SCOPE.value: | |||||
| sub_nodes = graph_stream.get_nodes_by_scope(node_name) | |||||
| sub_infos = [NodeBasicInfo(name=node.name, full_name=node.full_name, type=node.type) | |||||
| for node in sub_nodes] | |||||
| node_infos.extend(sub_infos) | |||||
| full_name = graph_stream.get_full_name(node_name) | |||||
| node_infos.append(NodeBasicInfo(name=node_name, full_name=full_name, type=node_type)) | |||||
| node_info = graph_stream.get_node_basic_info(node_name, graph_name) | |||||
| node_infos.append(node_info) | |||||
| return node_infos | return node_infos | ||||
| def control(self, params=None): | def control(self, params=None): | ||||
| @@ -561,14 +639,12 @@ class DebuggerServer: | |||||
| - mode (str): Acceptable control command, including `continue`, | - mode (str): Acceptable control command, including `continue`, | ||||
| `pause` and `terminate`. | `pause` and `terminate`. | ||||
| - level (str): The control granularity, `node` level or `step` level. | - level (str): The control granularity, `node` level or `step` level. | ||||
| Default: `step`. | Default: `step`. | ||||
| - steps (int): Specify the steps that training should run. | - steps (int): Specify the steps that training should run. | ||||
| Used when `level` is `step`. | Used when `level` is `step`. | ||||
| - name (str): Specify the name of the node. Used when `level` is `node`. | - name (str): Specify the name of the node. Used when `level` is `node`. | ||||
| - graph_name (str): The graph name. | |||||
| Returns: | Returns: | ||||
| dict, the response. | dict, the response. | ||||
| @@ -597,6 +673,9 @@ class DebuggerServer: | |||||
| Args: | Args: | ||||
| metadata_stream (MetadataHandler): The metadata_handler | metadata_stream (MetadataHandler): The metadata_handler | ||||
| params (dict): The control params. | params (dict): The control params. | ||||
| Returns: | |||||
| dict, metadata info. | |||||
| """ | """ | ||||
| if metadata_stream.state != ServerStatus.WAITING.value: | if metadata_stream.state != ServerStatus.WAITING.value: | ||||
| log.error("MindSpore is not ready to run. Current state is: %s", metadata_stream.state) | log.error("MindSpore is not ready to run. Current state is: %s", metadata_stream.state) | ||||
| @@ -604,7 +683,6 @@ class DebuggerServer: | |||||
| "MindSpore is not ready to run or is running currently." | "MindSpore is not ready to run or is running currently." | ||||
| ) | ) | ||||
| metadata_stream.state = ServerStatus.RUNNING.value | metadata_stream.state = ServerStatus.RUNNING.value | ||||
| current_state = ServerStatus.RUNNING.value | |||||
| try: | try: | ||||
| event = self._construct_run_event(params) | event = self._construct_run_event(params) | ||||
| self._send_watchpoints() | self._send_watchpoints() | ||||
| @@ -612,13 +690,12 @@ class DebuggerServer: | |||||
| except MindInsightException as err: | except MindInsightException as err: | ||||
| log.error("Failed to send run event.") | log.error("Failed to send run event.") | ||||
| log.exception(err) | log.exception(err) | ||||
| current_state = ServerStatus.WAITING.value | |||||
| metadata_stream.state = current_state | |||||
| metadata_stream.state = ServerStatus.WAITING.value | |||||
| raise DebuggerContinueError("Failed to send run command.") | raise DebuggerContinueError("Failed to send run command.") | ||||
| else: | else: | ||||
| metadata_stream.enable_recheck = False | |||||
| log.debug("Send the RunCMD to command queue.") | log.debug("Send the RunCMD to command queue.") | ||||
| return {'metadata': {'state': current_state}} | |||||
| return metadata_stream.get(['state', 'enable_recheck']) | |||||
| def _construct_run_event(self, params): | def _construct_run_event(self, params): | ||||
| """ | """ | ||||
| @@ -627,18 +704,22 @@ class DebuggerServer: | |||||
| Args: | Args: | ||||
| params (dict): The control params. | params (dict): The control params. | ||||
| - level (str): The control granularity, `node` level or `step` level. | |||||
| - level (str): The control granularity, `node`, `step` or `recheck` level. | |||||
| Default: `step`. | Default: `step`. | ||||
| - steps (int): Specify the steps that training should run. | - steps (int): Specify the steps that training should run. | ||||
| Used when `level` is `step`. | Used when `level` is `step`. | ||||
| - name (str): Specify the name of the node. Used when `level` is `node`. | - name (str): Specify the name of the node. Used when `level` is `node`. | ||||
| - graph_name (str): The graph name. | |||||
| Returns: | Returns: | ||||
| EventReply, control event with run command. | EventReply, control event with run command. | ||||
| """ | """ | ||||
| level = params.get('level', 'step') | level = params.get('level', 'step') | ||||
| # validate level | |||||
| if level not in [RunLevel.NODE.value, RunLevel.STEP.value, RunLevel.RECHECK.value]: | |||||
| log.error("Invalid Value. `level` should be `step`, `node` or `recheck`. Got %s", level) | |||||
| raise DebuggerParamValueError("level` should be `step`, `node` or `recheck`.") | |||||
| # construct run command events | |||||
| event = get_ack_reply() | event = get_ack_reply() | ||||
| if level == 'step': | if level == 'step': | ||||
| steps = params.get('steps') | steps = params.get('steps') | ||||
| @@ -646,31 +727,37 @@ class DebuggerServer: | |||||
| steps = 1 | steps = 1 | ||||
| run_cmd = RunCMD(run_level='step', run_steps=steps) | run_cmd = RunCMD(run_level='step', run_steps=steps) | ||||
| elif level == 'node': | elif level == 'node': | ||||
| name = params.get('name') | |||||
| name = params.get('name', '') | |||||
| graph_name = params.get('graph_name') | |||||
| if name: | if name: | ||||
| self._validate_leaf_name(name) | |||||
| name = self.cache_store.get_stream_handler(Streams.GRAPH).get_full_name(name) | |||||
| else: | |||||
| name = '' | |||||
| self._validate_leaf_name(name, graph_name) | |||||
| name = self.cache_store.get_stream_handler(Streams.GRAPH).get_full_name(name, graph_name) | |||||
| run_cmd = RunCMD(run_level='node', node_name=name) | run_cmd = RunCMD(run_level='node', node_name=name) | ||||
| else: | else: | ||||
| log.error("Invalid Value. `level` should be `step` or `node`. Got %s", level) | |||||
| raise DebuggerParamValueError("level` should be `step` or `node`") | |||||
| run_cmd = RunCMD(run_level='recheck') | |||||
| event.run_cmd.CopyFrom(run_cmd) | event.run_cmd.CopyFrom(run_cmd) | ||||
| log.debug("Construct run event. %s", event) | log.debug("Construct run event. %s", event) | ||||
| return event | return event | ||||
| def _validate_leaf_name(self, node_name, graph_name): | |||||
| """Validate if the node is a leaf node.""" | |||||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | |||||
| node_type = graph_stream.get_node_type(node_name, graph_name) | |||||
| if is_scope_type(node_type): | |||||
| log.error("Scope type node has no tensor history.") | |||||
| raise DebuggerParamValueError("Invalid leaf node name.") | |||||
| def _send_watchpoints(self): | def _send_watchpoints(self): | ||||
| """Set watchpoints.""" | """Set watchpoints.""" | ||||
| watchpoint_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT) | watchpoint_stream = self.cache_store.get_stream_handler(Streams.WATCHPOINT) | ||||
| watchpoints = watchpoint_stream.get(filter_condition=True).get('watch_points') | |||||
| if watchpoints: | |||||
| for watchpoint in watchpoints: | |||||
| set_commands = watchpoint_stream.get_pending_commands(self.cache_store.get_stream_handler(Streams.GRAPH)) | |||||
| if set_commands: | |||||
| for set_cmd in set_commands: | |||||
| event = get_ack_reply() | event = get_ack_reply() | ||||
| event.set_cmd.CopyFrom(watchpoint) | |||||
| event.set_cmd.CopyFrom(set_cmd) | |||||
| self.cache_store.put_command(event) | self.cache_store.put_command(event) | ||||
| watchpoint_stream.sync_set_cmd() | |||||
| watchpoint_stream.sync_set_cmd(set_commands) | |||||
| log.debug("Send SetCMD to MindSpore. %s", event) | log.debug("Send SetCMD to MindSpore. %s", event) | ||||
| def _pause(self, metadata_stream): | def _pause(self, metadata_stream): | ||||
| @@ -679,6 +766,9 @@ class DebuggerServer: | |||||
| Args: | Args: | ||||
| metadata_stream (MetadataHandler): The metadata stream handler. | metadata_stream (MetadataHandler): The metadata stream handler. | ||||
| Returns: | |||||
| dict, metadata info. | |||||
| """ | """ | ||||
| if metadata_stream.state != ServerStatus.RUNNING.value: | if metadata_stream.state != ServerStatus.RUNNING.value: | ||||
| log.error("The MindSpore is not running.") | log.error("The MindSpore is not running.") | ||||
| @@ -687,8 +777,9 @@ class DebuggerServer: | |||||
| event = get_ack_reply() | event = get_ack_reply() | ||||
| event.run_cmd.CopyFrom(RunCMD(run_level='step', run_steps=0)) | event.run_cmd.CopyFrom(RunCMD(run_level='step', run_steps=0)) | ||||
| self.cache_store.put_command(event) | self.cache_store.put_command(event) | ||||
| metadata_stream.enable_recheck = False | |||||
| log.debug("Send the Pause command") | log.debug("Send the Pause command") | ||||
| return {'metadata': {'state': 'waiting'}} | |||||
| return metadata_stream.get(['state', 'enable_recheck']) | |||||
| def _terminate(self, metadata_stream): | def _terminate(self, metadata_stream): | ||||
| """ | """ | ||||
| @@ -696,21 +787,27 @@ class DebuggerServer: | |||||
| Args: | Args: | ||||
| metadata_stream (MetadataHandler): The metadata stream handler. | metadata_stream (MetadataHandler): The metadata stream handler. | ||||
| Returns: | |||||
| dict, metadata info. | |||||
| """ | """ | ||||
| metadata_stream.state = 'pending' | metadata_stream.state = 'pending' | ||||
| self.cache_store.clean_data() | self.cache_store.clean_data() | ||||
| self.cache_store.clean_command() | |||||
| event = get_ack_reply() | event = get_ack_reply() | ||||
| event.exit = True | event.exit = True | ||||
| self.cache_store.put_command(event) | self.cache_store.put_command(event) | ||||
| metadata_stream.enable_recheck = False | |||||
| log.debug("Send the ExitCMD.") | log.debug("Send the ExitCMD.") | ||||
| return {'metadata': {'state': 'pending'}} | |||||
| return metadata_stream.get(['state', 'enable_recheck']) | |||||
| def retrieve_node_by_bfs(self, node_name, ascend=False): | |||||
| def retrieve_node_by_bfs(self, node_name, graph_name=None, ascend=False): | |||||
| """ | """ | ||||
| Get the graph of the next node according to node_name. | Get the graph of the next node according to node_name. | ||||
| Args: | Args: | ||||
| node_name (str): The name of current chosen leaf node. | node_name (str): The name of current chosen leaf node. | ||||
| graph_name (str): The graph name. | |||||
| ascend (bool): If True, traverse the input nodes; | ascend (bool): If True, traverse the input nodes; | ||||
| If False, traverse the output nodes. Default is True. | If False, traverse the output nodes. Default is True. | ||||
| @@ -721,6 +818,7 @@ class DebuggerServer: | |||||
| node_name, ascend) | node_name, ascend) | ||||
| reply = {} | reply = {} | ||||
| graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | graph_stream = self.cache_store.get_stream_handler(Streams.GRAPH) | ||||
| graph_name = graph_stream.validate_graph_name(graph_name) | |||||
| next_node_name = graph_stream.get_node_by_bfs_order(node_name, ascend) | next_node_name = graph_stream.get_node_by_bfs_order(node_name, ascend) | ||||
| # no next node | # no next node | ||||
| if next_node_name is None: | if next_node_name is None: | ||||
| @@ -728,6 +826,7 @@ class DebuggerServer: | |||||
| # add graph and tensor history for next node | # add graph and tensor history for next node | ||||
| filter_condition = { | filter_condition = { | ||||
| 'name': next_node_name, | 'name': next_node_name, | ||||
| 'graph_name': graph_name, | |||||
| 'single_node': True | 'single_node': True | ||||
| } | } | ||||
| search_graph = self._get_nodes_info(filter_condition) | search_graph = self._get_nodes_info(filter_condition) | ||||
| @@ -735,3 +834,85 @@ class DebuggerServer: | |||||
| reply.update(search_graph) | reply.update(search_graph) | ||||
| return reply | return reply | ||||
| def recheck(self): | |||||
| """ | |||||
| Recheck all watchpoints. | |||||
| Returns: | |||||
| dict, metadata info. | |||||
| """ | |||||
| metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) | |||||
| # validate backend status is able to recheck watchpoint | |||||
| if not metadata_stream.enable_recheck: | |||||
| log.error("Recheck is not available.") | |||||
| raise DebuggerRecheckError("Recheck is not available.") | |||||
| metadata_stream.state = ServerStatus.RUNNING.value | |||||
| metadata_stream.enable_recheck = False | |||||
| # send updated watchpoint and recheck command | |||||
| try: | |||||
| event = self._construct_run_event({'level': 'recheck'}) | |||||
| self._send_watchpoints() | |||||
| self.cache_store.put_command(event) | |||||
| except MindInsightException as err: | |||||
| log.error("Failed to send recheck event.") | |||||
| log.exception(err) | |||||
| metadata_stream.state = ServerStatus.WAITING.value | |||||
| metadata_stream.enable_recheck = True | |||||
| raise DebuggerContinueError("Failed to send run command.") | |||||
| else: | |||||
| log.debug("Send the recheck to command queue.") | |||||
| return metadata_stream.get(['state', 'enable_recheck']) | |||||
| def retrieve_tensor_graph(self, tensor_name, graph_name): | |||||
| """ | |||||
| Retrieve tensor graph. | |||||
| Args: | |||||
| tensor_name (str): The tensor name from UI. | |||||
| graph_name (str): The graph name. | |||||
| Returns: | |||||
| dict, tensor graph object. | |||||
| """ | |||||
| log.info("Retrieve tensor graph for %s from %s", tensor_name, graph_name) | |||||
| tensor_graph_ops = TensorDetailInfo(self.cache_store).get_tensor_graph(tensor_name, graph_name) | |||||
| return tensor_graph_ops | |||||
| def retrieve_tensor_hits(self, tensor_name, graph_name): | |||||
| """ | |||||
| Retrieve tensor hit information. | |||||
| Args: | |||||
| tensor_name (str): The tensor name from UI. | |||||
| graph_name (str): The graph name. | |||||
| Returns: | |||||
| dict, tensor hit info. | |||||
| """ | |||||
| log.info("Retrieve tensor hits for %s from %s", tensor_name, graph_name) | |||||
| watch_points = TensorDetailInfo(self.cache_store).get_tensor_watch_points(tensor_name, graph_name) | |||||
| return {'watch_points': watch_points} | |||||
| def _hide_parameters_for_ui(self): | |||||
| """ | |||||
| Hide some parameters on ui. | |||||
| Returns: | |||||
| dict, watch point list. | |||||
| """ | |||||
| reply = self.cache_store.get_stream_handler(Streams.WATCHPOINT).get() | |||||
| watch_points = reply.get('watch_points') | |||||
| for i, watch_point in enumerate(watch_points): | |||||
| watch_condition = watch_point.get('watch_condition') | |||||
| parameters = watch_condition.get('params') | |||||
| watch_condition_id = watch_condition.get('id') | |||||
| mgr_condition = self.condition_mgr.get_condition(watch_condition_id) | |||||
| ui_watch_condition = [] | |||||
| for param in parameters: | |||||
| parameter_definition = mgr_condition.get_parameter_definition(param['name']) | |||||
| if not parameter_definition.visible_on_ui: | |||||
| continue | |||||
| ui_watch_condition.append(param) | |||||
| reply['watch_points'][i]['watch_condition']['params'] = ui_watch_condition | |||||
| return reply | |||||
| @@ -27,6 +27,7 @@ service EventListener { | |||||
| rpc SendGraph (stream Chunk) returns (EventReply) {}; | rpc SendGraph (stream Chunk) returns (EventReply) {}; | ||||
| rpc SendTensors (stream TensorProto) returns (EventReply) {}; | rpc SendTensors (stream TensorProto) returns (EventReply) {}; | ||||
| rpc SendWatchpointHits (stream WatchpointHit) returns (EventReply) {}; | rpc SendWatchpointHits (stream WatchpointHit) returns (EventReply) {}; | ||||
| rpc SendMultiGraphs (stream Chunk) returns (EventReply) {}; | |||||
| } | } | ||||
| message Metadata { | message Metadata { | ||||
| @@ -38,11 +39,15 @@ message Metadata { | |||||
| string cur_node = 4; | string cur_node = 4; | ||||
| // check if training is done. | // check if training is done. | ||||
| bool training_done = 5; | bool training_done = 5; | ||||
| // the number of total graphs | |||||
| int32 graph_num = 6; | |||||
| } | } | ||||
| message Chunk { | message Chunk { | ||||
| bytes buffer = 1; | bytes buffer = 1; | ||||
| bool finished = 2; | |||||
| } | } | ||||
| message EventReply { | message EventReply { | ||||
| enum Status { | enum Status { | ||||
| OK = 0; | OK = 0; | ||||
| @@ -61,13 +66,11 @@ message EventReply { | |||||
| } | } | ||||
| message RunCMD { | message RunCMD { | ||||
| // running level. 'step' or 'node' | |||||
| // step level or node level. "step", "node" or "recheck". | |||||
| string run_level = 1; | string run_level = 1; | ||||
| oneof cmd { | oneof cmd { | ||||
| int32 run_steps = 2; | int32 run_steps = 2; | ||||
| // the full name of next node | |||||
| // the next node full name | |||||
| string node_name = 3; | string node_name = 3; | ||||
| } | } | ||||
| } | } | ||||
| @@ -96,9 +99,27 @@ message WatchCondition { | |||||
| max_min_lt = 8; | max_min_lt = 8; | ||||
| mean_gt = 9; | mean_gt = 9; | ||||
| mean_lt = 10; | mean_lt = 10; | ||||
| sd_gt = 11; | |||||
| sd_lt = 12; | |||||
| tensor_general_overflow = 13; | |||||
| tensor_initialization = 14; | |||||
| tensor_too_large = 15; | |||||
| tensor_too_small = 16; | |||||
| tensor_all_zero = 17; | |||||
| tensor_change_too_large = 18; | |||||
| tensor_change_too_small = 19; | |||||
| tensor_not_changed = 20; | |||||
| } | } | ||||
| Condition condition = 1; | Condition condition = 1; | ||||
| float value = 2; // for between condition, there will be two values | |||||
| float value = 2; | |||||
| message Parameter { | |||||
| string name = 1; | |||||
| bool disabled = 2; | |||||
| double value = 3; | |||||
| bool hit = 4; // Whether this parameter is hit when checking tensor. | |||||
| } | |||||
| // The ID 3 has been used on the mindspore side repeated bool include=3, so skip 3 for backward compatibility. | |||||
| repeated Parameter params = 4; | |||||
| } | } | ||||
| message WatchNode { | message WatchNode { | ||||
| @@ -19,7 +19,7 @@ DESCRIPTOR = _descriptor.FileDescriptor( | |||||
| package='debugger', | package='debugger', | ||||
| syntax='proto3', | syntax='proto3', | ||||
| serialized_options=None, | serialized_options=None, | ||||
| serialized_pb=b'\n+mindinsight/debugger/proto/debug_grpc.proto\x12\x08\x64\x65\x62ugger\x1a)mindinsight/debugger/proto/ms_graph.proto\"k\n\x08Metadata\x12\x13\n\x0b\x64\x65vice_name\x18\x01 \x01(\t\x12\x10\n\x08\x63ur_step\x18\x02 \x01(\x05\x12\x0f\n\x07\x62\x61\x63kend\x18\x03 \x01(\t\x12\x10\n\x08\x63ur_node\x18\x04 \x01(\t\x12\x15\n\rtraining_done\x18\x05 \x01(\x08\"\x17\n\x05\x43hunk\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\"\xec\x01\n\nEventReply\x12+\n\x06status\x18\x01 \x01(\x0e\x32\x1b.debugger.EventReply.Status\x12\x0e\n\x04\x65xit\x18\x02 \x01(\x08H\x00\x12#\n\x07run_cmd\x18\x03 \x01(\x0b\x32\x10.debugger.RunCMDH\x00\x12#\n\x07set_cmd\x18\x04 \x01(\x0b\x32\x10.debugger.SetCMDH\x00\x12%\n\x08view_cmd\x18\x05 \x01(\x0b\x32\x11.debugger.ViewCMDH\x00\")\n\x06Status\x12\x06\n\x02OK\x10\x00\x12\n\n\x06\x46\x41ILED\x10\x01\x12\x0b\n\x07PENDING\x10\x02\x42\x05\n\x03\x63md\"L\n\x06RunCMD\x12\x11\n\trun_level\x18\x01 \x01(\t\x12\x13\n\trun_steps\x18\x02 \x01(\x05H\x00\x12\x13\n\tnode_name\x18\x03 \x01(\tH\x00\x42\x05\n\x03\x63md\"\x81\x01\n\x06SetCMD\x12(\n\x0bwatch_nodes\x18\x01 \x03(\x0b\x32\x13.debugger.WatchNode\x12\x31\n\x0fwatch_condition\x18\x02 \x01(\x0b\x32\x18.debugger.WatchCondition\x12\x0e\n\x06\x64\x65lete\x18\x03 \x01(\x08\x12\n\n\x02id\x18\x04 \x01(\x05\"1\n\x07ViewCMD\x12&\n\x07tensors\x18\x01 \x03(\x0b\x32\x15.debugger.TensorProto\"\xee\x01\n\x0eWatchCondition\x12\x35\n\tcondition\x18\x01 \x01(\x0e\x32\".debugger.WatchCondition.Condition\x12\r\n\x05value\x18\x02 \x01(\x02\"\x95\x01\n\tCondition\x12\x07\n\x03nan\x10\x00\x12\x07\n\x03inf\x10\x01\x12\x0c\n\x08overflow\x10\x02\x12\n\n\x06max_gt\x10\x03\x12\n\n\x06max_lt\x10\x04\x12\n\n\x06min_gt\x10\x05\x12\n\n\x06min_lt\x10\x06\x12\x0e\n\nmax_min_gt\x10\x07\x12\x0e\n\nmax_min_lt\x10\x08\x12\x0b\n\x07mean_gt\x10\t\x12\x0b\n\x07mean_lt\x10\n\"1\n\tWatchNode\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x11\n\tnode_type\x18\x02 \x01(\t\"u\n\rWatchpointHit\x12%\n\x06tensor\x18\x01 \x01(\x0b\x32\x15.debugger.TensorProto\x12\x31\n\x0fwatch_condition\x18\x02 \x01(\x0b\x32\x18.debugger.WatchCondition\x12\n\n\x02id\x18\x03 \x01(\x05\x32\xc3\x02\n\rEventListener\x12\x35\n\x07WaitCMD\x12\x12.debugger.Metadata\x1a\x14.debugger.EventReply\"\x00\x12:\n\x0cSendMetadata\x12\x12.debugger.Metadata\x1a\x14.debugger.EventReply\"\x00\x12\x36\n\tSendGraph\x12\x0f.debugger.Chunk\x1a\x14.debugger.EventReply\"\x00(\x01\x12>\n\x0bSendTensors\x12\x15.debugger.TensorProto\x1a\x14.debugger.EventReply\"\x00(\x01\x12G\n\x12SendWatchpointHits\x12\x17.debugger.WatchpointHit\x1a\x14.debugger.EventReply\"\x00(\x01\x62\x06proto3' | |||||
| serialized_pb=b'\n+mindinsight/debugger/proto/debug_grpc.proto\x12\x08\x64\x65\x62ugger\x1a)mindinsight/debugger/proto/ms_graph.proto\"~\n\x08Metadata\x12\x13\n\x0b\x64\x65vice_name\x18\x01 \x01(\t\x12\x10\n\x08\x63ur_step\x18\x02 \x01(\x05\x12\x0f\n\x07\x62\x61\x63kend\x18\x03 \x01(\t\x12\x10\n\x08\x63ur_node\x18\x04 \x01(\t\x12\x15\n\rtraining_done\x18\x05 \x01(\x08\x12\x11\n\tgraph_num\x18\x06 \x01(\x05\")\n\x05\x43hunk\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\x12\x10\n\x08\x66inished\x18\x02 \x01(\x08\"\xec\x01\n\nEventReply\x12+\n\x06status\x18\x01 \x01(\x0e\x32\x1b.debugger.EventReply.Status\x12\x0e\n\x04\x65xit\x18\x02 \x01(\x08H\x00\x12#\n\x07run_cmd\x18\x03 \x01(\x0b\x32\x10.debugger.RunCMDH\x00\x12#\n\x07set_cmd\x18\x04 \x01(\x0b\x32\x10.debugger.SetCMDH\x00\x12%\n\x08view_cmd\x18\x05 \x01(\x0b\x32\x11.debugger.ViewCMDH\x00\")\n\x06Status\x12\x06\n\x02OK\x10\x00\x12\n\n\x06\x46\x41ILED\x10\x01\x12\x0b\n\x07PENDING\x10\x02\x42\x05\n\x03\x63md\"L\n\x06RunCMD\x12\x11\n\trun_level\x18\x01 \x01(\t\x12\x13\n\trun_steps\x18\x02 \x01(\x05H\x00\x12\x13\n\tnode_name\x18\x03 \x01(\tH\x00\x42\x05\n\x03\x63md\"\x81\x01\n\x06SetCMD\x12(\n\x0bwatch_nodes\x18\x01 \x03(\x0b\x32\x13.debugger.WatchNode\x12\x31\n\x0fwatch_condition\x18\x02 \x01(\x0b\x32\x18.debugger.WatchCondition\x12\x0e\n\x06\x64\x65lete\x18\x03 \x01(\x08\x12\n\n\x02id\x18\x04 \x01(\x05\"1\n\x07ViewCMD\x12&\n\x07tensors\x18\x01 \x03(\x0b\x32\x15.debugger.TensorProto\"\xcc\x04\n\x0eWatchCondition\x12\x35\n\tcondition\x18\x01 \x01(\x0e\x32\".debugger.WatchCondition.Condition\x12\r\n\x05value\x18\x02 \x01(\x02\x12\x32\n\x06params\x18\x04 \x03(\x0b\x32\".debugger.WatchCondition.Parameter\x1aG\n\tParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64isabled\x18\x02 \x01(\x08\x12\r\n\x05value\x18\x03 \x01(\x01\x12\x0b\n\x03hit\x18\x04 \x01(\x08\"\xf6\x02\n\tCondition\x12\x07\n\x03nan\x10\x00\x12\x07\n\x03inf\x10\x01\x12\x0c\n\x08overflow\x10\x02\x12\n\n\x06max_gt\x10\x03\x12\n\n\x06max_lt\x10\x04\x12\n\n\x06min_gt\x10\x05\x12\n\n\x06min_lt\x10\x06\x12\x0e\n\nmax_min_gt\x10\x07\x12\x0e\n\nmax_min_lt\x10\x08\x12\x0b\n\x07mean_gt\x10\t\x12\x0b\n\x07mean_lt\x10\n\x12\t\n\x05sd_gt\x10\x0b\x12\t\n\x05sd_lt\x10\x0c\x12\x1b\n\x17tensor_general_overflow\x10\r\x12\x19\n\x15tensor_initialization\x10\x0e\x12\x14\n\x10tensor_too_large\x10\x0f\x12\x14\n\x10tensor_too_small\x10\x10\x12\x13\n\x0ftensor_all_zero\x10\x11\x12\x1b\n\x17tensor_change_too_large\x10\x12\x12\x1b\n\x17tensor_change_too_small\x10\x13\x12\x16\n\x12tensor_not_changed\x10\x14\"1\n\tWatchNode\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x11\n\tnode_type\x18\x02 \x01(\t\"u\n\rWatchpointHit\x12%\n\x06tensor\x18\x01 \x01(\x0b\x32\x15.debugger.TensorProto\x12\x31\n\x0fwatch_condition\x18\x02 \x01(\x0b\x32\x18.debugger.WatchCondition\x12\n\n\x02id\x18\x03 \x01(\x05\x32\x81\x03\n\rEventListener\x12\x35\n\x07WaitCMD\x12\x12.debugger.Metadata\x1a\x14.debugger.EventReply\"\x00\x12:\n\x0cSendMetadata\x12\x12.debugger.Metadata\x1a\x14.debugger.EventReply\"\x00\x12\x36\n\tSendGraph\x12\x0f.debugger.Chunk\x1a\x14.debugger.EventReply\"\x00(\x01\x12>\n\x0bSendTensors\x12\x15.debugger.TensorProto\x1a\x14.debugger.EventReply\"\x00(\x01\x12G\n\x12SendWatchpointHits\x12\x17.debugger.WatchpointHit\x1a\x14.debugger.EventReply\"\x00(\x01\x12<\n\x0fSendMultiGraphs\x12\x0f.debugger.Chunk\x1a\x14.debugger.EventReply\"\x00(\x01\x62\x06proto3' | |||||
| , | , | ||||
| dependencies=[mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2.DESCRIPTOR,]) | dependencies=[mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2.DESCRIPTOR,]) | ||||
| @@ -46,8 +46,8 @@ _EVENTREPLY_STATUS = _descriptor.EnumDescriptor( | |||||
| ], | ], | ||||
| containing_type=None, | containing_type=None, | ||||
| serialized_options=None, | serialized_options=None, | ||||
| serialized_start=423, | |||||
| serialized_end=464, | |||||
| serialized_start=460, | |||||
| serialized_end=501, | |||||
| ) | ) | ||||
| _sym_db.RegisterEnumDescriptor(_EVENTREPLY_STATUS) | _sym_db.RegisterEnumDescriptor(_EVENTREPLY_STATUS) | ||||
| @@ -101,11 +101,51 @@ _WATCHCONDITION_CONDITION = _descriptor.EnumDescriptor( | |||||
| name='mean_lt', index=10, number=10, | name='mean_lt', index=10, number=10, | ||||
| serialized_options=None, | serialized_options=None, | ||||
| type=None), | type=None), | ||||
| _descriptor.EnumValueDescriptor( | |||||
| name='sd_gt', index=11, number=11, | |||||
| serialized_options=None, | |||||
| type=None), | |||||
| _descriptor.EnumValueDescriptor( | |||||
| name='sd_lt', index=12, number=12, | |||||
| serialized_options=None, | |||||
| type=None), | |||||
| _descriptor.EnumValueDescriptor( | |||||
| name='tensor_general_overflow', index=13, number=13, | |||||
| serialized_options=None, | |||||
| type=None), | |||||
| _descriptor.EnumValueDescriptor( | |||||
| name='tensor_initialization', index=14, number=14, | |||||
| serialized_options=None, | |||||
| type=None), | |||||
| _descriptor.EnumValueDescriptor( | |||||
| name='tensor_too_large', index=15, number=15, | |||||
| serialized_options=None, | |||||
| type=None), | |||||
| _descriptor.EnumValueDescriptor( | |||||
| name='tensor_too_small', index=16, number=16, | |||||
| serialized_options=None, | |||||
| type=None), | |||||
| _descriptor.EnumValueDescriptor( | |||||
| name='tensor_all_zero', index=17, number=17, | |||||
| serialized_options=None, | |||||
| type=None), | |||||
| _descriptor.EnumValueDescriptor( | |||||
| name='tensor_change_too_large', index=18, number=18, | |||||
| serialized_options=None, | |||||
| type=None), | |||||
| _descriptor.EnumValueDescriptor( | |||||
| name='tensor_change_too_small', index=19, number=19, | |||||
| serialized_options=None, | |||||
| type=None), | |||||
| _descriptor.EnumValueDescriptor( | |||||
| name='tensor_not_changed', index=20, number=20, | |||||
| serialized_options=None, | |||||
| type=None), | |||||
| ], | ], | ||||
| containing_type=None, | containing_type=None, | ||||
| serialized_options=None, | serialized_options=None, | ||||
| serialized_start=824, | |||||
| serialized_end=973, | |||||
| serialized_start=986, | |||||
| serialized_end=1360, | |||||
| ) | ) | ||||
| _sym_db.RegisterEnumDescriptor(_WATCHCONDITION_CONDITION) | _sym_db.RegisterEnumDescriptor(_WATCHCONDITION_CONDITION) | ||||
| @@ -152,6 +192,13 @@ _METADATA = _descriptor.Descriptor( | |||||
| message_type=None, enum_type=None, containing_type=None, | message_type=None, enum_type=None, containing_type=None, | ||||
| is_extension=False, extension_scope=None, | is_extension=False, extension_scope=None, | ||||
| serialized_options=None, file=DESCRIPTOR), | serialized_options=None, file=DESCRIPTOR), | ||||
| _descriptor.FieldDescriptor( | |||||
| name='graph_num', full_name='debugger.Metadata.graph_num', index=5, | |||||
| number=6, type=5, cpp_type=1, label=1, | |||||
| has_default_value=False, default_value=0, | |||||
| message_type=None, enum_type=None, containing_type=None, | |||||
| is_extension=False, extension_scope=None, | |||||
| serialized_options=None, file=DESCRIPTOR), | |||||
| ], | ], | ||||
| extensions=[ | extensions=[ | ||||
| ], | ], | ||||
| @@ -165,7 +212,7 @@ _METADATA = _descriptor.Descriptor( | |||||
| oneofs=[ | oneofs=[ | ||||
| ], | ], | ||||
| serialized_start=100, | serialized_start=100, | ||||
| serialized_end=207, | |||||
| serialized_end=226, | |||||
| ) | ) | ||||
| @@ -183,6 +230,13 @@ _CHUNK = _descriptor.Descriptor( | |||||
| message_type=None, enum_type=None, containing_type=None, | message_type=None, enum_type=None, containing_type=None, | ||||
| is_extension=False, extension_scope=None, | is_extension=False, extension_scope=None, | ||||
| serialized_options=None, file=DESCRIPTOR), | serialized_options=None, file=DESCRIPTOR), | ||||
| _descriptor.FieldDescriptor( | |||||
| name='finished', full_name='debugger.Chunk.finished', index=1, | |||||
| number=2, type=8, cpp_type=7, label=1, | |||||
| has_default_value=False, default_value=False, | |||||
| message_type=None, enum_type=None, containing_type=None, | |||||
| is_extension=False, extension_scope=None, | |||||
| serialized_options=None, file=DESCRIPTOR), | |||||
| ], | ], | ||||
| extensions=[ | extensions=[ | ||||
| ], | ], | ||||
| @@ -195,8 +249,8 @@ _CHUNK = _descriptor.Descriptor( | |||||
| extension_ranges=[], | extension_ranges=[], | ||||
| oneofs=[ | oneofs=[ | ||||
| ], | ], | ||||
| serialized_start=209, | |||||
| serialized_end=232, | |||||
| serialized_start=228, | |||||
| serialized_end=269, | |||||
| ) | ) | ||||
| @@ -258,8 +312,8 @@ _EVENTREPLY = _descriptor.Descriptor( | |||||
| name='cmd', full_name='debugger.EventReply.cmd', | name='cmd', full_name='debugger.EventReply.cmd', | ||||
| index=0, containing_type=None, fields=[]), | index=0, containing_type=None, fields=[]), | ||||
| ], | ], | ||||
| serialized_start=235, | |||||
| serialized_end=471, | |||||
| serialized_start=272, | |||||
| serialized_end=508, | |||||
| ) | ) | ||||
| @@ -306,8 +360,8 @@ _RUNCMD = _descriptor.Descriptor( | |||||
| name='cmd', full_name='debugger.RunCMD.cmd', | name='cmd', full_name='debugger.RunCMD.cmd', | ||||
| index=0, containing_type=None, fields=[]), | index=0, containing_type=None, fields=[]), | ||||
| ], | ], | ||||
| serialized_start=473, | |||||
| serialized_end=549, | |||||
| serialized_start=510, | |||||
| serialized_end=586, | |||||
| ) | ) | ||||
| @@ -358,8 +412,8 @@ _SETCMD = _descriptor.Descriptor( | |||||
| extension_ranges=[], | extension_ranges=[], | ||||
| oneofs=[ | oneofs=[ | ||||
| ], | ], | ||||
| serialized_start=552, | |||||
| serialized_end=681, | |||||
| serialized_start=589, | |||||
| serialized_end=718, | |||||
| ) | ) | ||||
| @@ -389,11 +443,62 @@ _VIEWCMD = _descriptor.Descriptor( | |||||
| extension_ranges=[], | extension_ranges=[], | ||||
| oneofs=[ | oneofs=[ | ||||
| ], | ], | ||||
| serialized_start=683, | |||||
| serialized_end=732, | |||||
| serialized_start=720, | |||||
| serialized_end=769, | |||||
| ) | ) | ||||
| _WATCHCONDITION_PARAMETER = _descriptor.Descriptor( | |||||
| name='Parameter', | |||||
| full_name='debugger.WatchCondition.Parameter', | |||||
| filename=None, | |||||
| file=DESCRIPTOR, | |||||
| containing_type=None, | |||||
| fields=[ | |||||
| _descriptor.FieldDescriptor( | |||||
| name='name', full_name='debugger.WatchCondition.Parameter.name', index=0, | |||||
| number=1, type=9, cpp_type=9, label=1, | |||||
| has_default_value=False, default_value=b"".decode('utf-8'), | |||||
| message_type=None, enum_type=None, containing_type=None, | |||||
| is_extension=False, extension_scope=None, | |||||
| serialized_options=None, file=DESCRIPTOR), | |||||
| _descriptor.FieldDescriptor( | |||||
| name='disabled', full_name='debugger.WatchCondition.Parameter.disabled', index=1, | |||||
| number=2, type=8, cpp_type=7, label=1, | |||||
| has_default_value=False, default_value=False, | |||||
| message_type=None, enum_type=None, containing_type=None, | |||||
| is_extension=False, extension_scope=None, | |||||
| serialized_options=None, file=DESCRIPTOR), | |||||
| _descriptor.FieldDescriptor( | |||||
| name='value', full_name='debugger.WatchCondition.Parameter.value', index=2, | |||||
| number=3, type=1, cpp_type=5, label=1, | |||||
| has_default_value=False, default_value=float(0), | |||||
| message_type=None, enum_type=None, containing_type=None, | |||||
| is_extension=False, extension_scope=None, | |||||
| serialized_options=None, file=DESCRIPTOR), | |||||
| _descriptor.FieldDescriptor( | |||||
| name='hit', full_name='debugger.WatchCondition.Parameter.hit', index=3, | |||||
| number=4, type=8, cpp_type=7, label=1, | |||||
| has_default_value=False, default_value=False, | |||||
| message_type=None, enum_type=None, containing_type=None, | |||||
| is_extension=False, extension_scope=None, | |||||
| serialized_options=None, file=DESCRIPTOR), | |||||
| ], | |||||
| extensions=[ | |||||
| ], | |||||
| nested_types=[], | |||||
| enum_types=[ | |||||
| ], | |||||
| serialized_options=None, | |||||
| is_extendable=False, | |||||
| syntax='proto3', | |||||
| extension_ranges=[], | |||||
| oneofs=[ | |||||
| ], | |||||
| serialized_start=912, | |||||
| serialized_end=983, | |||||
| ) | |||||
| _WATCHCONDITION = _descriptor.Descriptor( | _WATCHCONDITION = _descriptor.Descriptor( | ||||
| name='WatchCondition', | name='WatchCondition', | ||||
| full_name='debugger.WatchCondition', | full_name='debugger.WatchCondition', | ||||
| @@ -415,10 +520,17 @@ _WATCHCONDITION = _descriptor.Descriptor( | |||||
| message_type=None, enum_type=None, containing_type=None, | message_type=None, enum_type=None, containing_type=None, | ||||
| is_extension=False, extension_scope=None, | is_extension=False, extension_scope=None, | ||||
| serialized_options=None, file=DESCRIPTOR), | serialized_options=None, file=DESCRIPTOR), | ||||
| _descriptor.FieldDescriptor( | |||||
| name='params', full_name='debugger.WatchCondition.params', index=2, | |||||
| number=4, type=11, cpp_type=10, label=3, | |||||
| has_default_value=False, default_value=[], | |||||
| message_type=None, enum_type=None, containing_type=None, | |||||
| is_extension=False, extension_scope=None, | |||||
| serialized_options=None, file=DESCRIPTOR), | |||||
| ], | ], | ||||
| extensions=[ | extensions=[ | ||||
| ], | ], | ||||
| nested_types=[], | |||||
| nested_types=[_WATCHCONDITION_PARAMETER, ], | |||||
| enum_types=[ | enum_types=[ | ||||
| _WATCHCONDITION_CONDITION, | _WATCHCONDITION_CONDITION, | ||||
| ], | ], | ||||
| @@ -428,8 +540,8 @@ _WATCHCONDITION = _descriptor.Descriptor( | |||||
| extension_ranges=[], | extension_ranges=[], | ||||
| oneofs=[ | oneofs=[ | ||||
| ], | ], | ||||
| serialized_start=735, | |||||
| serialized_end=973, | |||||
| serialized_start=772, | |||||
| serialized_end=1360, | |||||
| ) | ) | ||||
| @@ -466,8 +578,8 @@ _WATCHNODE = _descriptor.Descriptor( | |||||
| extension_ranges=[], | extension_ranges=[], | ||||
| oneofs=[ | oneofs=[ | ||||
| ], | ], | ||||
| serialized_start=975, | |||||
| serialized_end=1024, | |||||
| serialized_start=1362, | |||||
| serialized_end=1411, | |||||
| ) | ) | ||||
| @@ -511,8 +623,8 @@ _WATCHPOINTHIT = _descriptor.Descriptor( | |||||
| extension_ranges=[], | extension_ranges=[], | ||||
| oneofs=[ | oneofs=[ | ||||
| ], | ], | ||||
| serialized_start=1026, | |||||
| serialized_end=1143, | |||||
| serialized_start=1413, | |||||
| serialized_end=1530, | |||||
| ) | ) | ||||
| _EVENTREPLY.fields_by_name['status'].enum_type = _EVENTREPLY_STATUS | _EVENTREPLY.fields_by_name['status'].enum_type = _EVENTREPLY_STATUS | ||||
| @@ -541,7 +653,9 @@ _RUNCMD.fields_by_name['node_name'].containing_oneof = _RUNCMD.oneofs_by_name['c | |||||
| _SETCMD.fields_by_name['watch_nodes'].message_type = _WATCHNODE | _SETCMD.fields_by_name['watch_nodes'].message_type = _WATCHNODE | ||||
| _SETCMD.fields_by_name['watch_condition'].message_type = _WATCHCONDITION | _SETCMD.fields_by_name['watch_condition'].message_type = _WATCHCONDITION | ||||
| _VIEWCMD.fields_by_name['tensors'].message_type = mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2._TENSORPROTO | _VIEWCMD.fields_by_name['tensors'].message_type = mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2._TENSORPROTO | ||||
| _WATCHCONDITION_PARAMETER.containing_type = _WATCHCONDITION | |||||
| _WATCHCONDITION.fields_by_name['condition'].enum_type = _WATCHCONDITION_CONDITION | _WATCHCONDITION.fields_by_name['condition'].enum_type = _WATCHCONDITION_CONDITION | ||||
| _WATCHCONDITION.fields_by_name['params'].message_type = _WATCHCONDITION_PARAMETER | |||||
| _WATCHCONDITION_CONDITION.containing_type = _WATCHCONDITION | _WATCHCONDITION_CONDITION.containing_type = _WATCHCONDITION | ||||
| _WATCHPOINTHIT.fields_by_name['tensor'].message_type = mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2._TENSORPROTO | _WATCHPOINTHIT.fields_by_name['tensor'].message_type = mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2._TENSORPROTO | ||||
| _WATCHPOINTHIT.fields_by_name['watch_condition'].message_type = _WATCHCONDITION | _WATCHPOINTHIT.fields_by_name['watch_condition'].message_type = _WATCHCONDITION | ||||
| @@ -599,11 +713,19 @@ ViewCMD = _reflection.GeneratedProtocolMessageType('ViewCMD', (_message.Message, | |||||
| _sym_db.RegisterMessage(ViewCMD) | _sym_db.RegisterMessage(ViewCMD) | ||||
| WatchCondition = _reflection.GeneratedProtocolMessageType('WatchCondition', (_message.Message,), { | WatchCondition = _reflection.GeneratedProtocolMessageType('WatchCondition', (_message.Message,), { | ||||
| 'Parameter' : _reflection.GeneratedProtocolMessageType('Parameter', (_message.Message,), { | |||||
| 'DESCRIPTOR' : _WATCHCONDITION_PARAMETER, | |||||
| '__module__' : 'mindinsight.debugger.proto.debug_grpc_pb2' | |||||
| # @@protoc_insertion_point(class_scope:debugger.WatchCondition.Parameter) | |||||
| }) | |||||
| , | |||||
| 'DESCRIPTOR' : _WATCHCONDITION, | 'DESCRIPTOR' : _WATCHCONDITION, | ||||
| '__module__' : 'mindinsight.debugger.proto.debug_grpc_pb2' | '__module__' : 'mindinsight.debugger.proto.debug_grpc_pb2' | ||||
| # @@protoc_insertion_point(class_scope:debugger.WatchCondition) | # @@protoc_insertion_point(class_scope:debugger.WatchCondition) | ||||
| }) | }) | ||||
| _sym_db.RegisterMessage(WatchCondition) | _sym_db.RegisterMessage(WatchCondition) | ||||
| _sym_db.RegisterMessage(WatchCondition.Parameter) | |||||
| WatchNode = _reflection.GeneratedProtocolMessageType('WatchNode', (_message.Message,), { | WatchNode = _reflection.GeneratedProtocolMessageType('WatchNode', (_message.Message,), { | ||||
| 'DESCRIPTOR' : _WATCHNODE, | 'DESCRIPTOR' : _WATCHNODE, | ||||
| @@ -627,8 +749,8 @@ _EVENTLISTENER = _descriptor.ServiceDescriptor( | |||||
| file=DESCRIPTOR, | file=DESCRIPTOR, | ||||
| index=0, | index=0, | ||||
| serialized_options=None, | serialized_options=None, | ||||
| serialized_start=1146, | |||||
| serialized_end=1469, | |||||
| serialized_start=1533, | |||||
| serialized_end=1918, | |||||
| methods=[ | methods=[ | ||||
| _descriptor.MethodDescriptor( | _descriptor.MethodDescriptor( | ||||
| name='WaitCMD', | name='WaitCMD', | ||||
| @@ -675,6 +797,15 @@ _EVENTLISTENER = _descriptor.ServiceDescriptor( | |||||
| output_type=_EVENTREPLY, | output_type=_EVENTREPLY, | ||||
| serialized_options=None, | serialized_options=None, | ||||
| ), | ), | ||||
| _descriptor.MethodDescriptor( | |||||
| name='SendMultiGraphs', | |||||
| full_name='debugger.EventListener.SendMultiGraphs', | |||||
| index=5, | |||||
| containing_service=None, | |||||
| input_type=_CHUNK, | |||||
| output_type=_EVENTREPLY, | |||||
| serialized_options=None, | |||||
| ), | |||||
| ]) | ]) | ||||
| _sym_db.RegisterServiceDescriptor(_EVENTLISTENER) | _sym_db.RegisterServiceDescriptor(_EVENTLISTENER) | ||||
| @@ -1,4 +1,5 @@ | |||||
| # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! | ||||
| """Client and server classes corresponding to protobuf-defined services.""" | |||||
| import grpc | import grpc | ||||
| from mindinsight.debugger.proto import debug_grpc_pb2 as mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2 | from mindinsight.debugger.proto import debug_grpc_pb2 as mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2 | ||||
| @@ -6,7 +7,7 @@ from mindinsight.debugger.proto import ms_graph_pb2 as mindinsight_dot_debugger_ | |||||
| class EventListenerStub(object): | class EventListenerStub(object): | ||||
| """Missing associated documentation comment in .proto file""" | |||||
| """Missing associated documentation comment in .proto file.""" | |||||
| def __init__(self, channel): | def __init__(self, channel): | ||||
| """Constructor. | """Constructor. | ||||
| @@ -39,37 +40,48 @@ class EventListenerStub(object): | |||||
| request_serializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.WatchpointHit.SerializeToString, | request_serializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.WatchpointHit.SerializeToString, | ||||
| response_deserializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | response_deserializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | ||||
| ) | ) | ||||
| self.SendMultiGraphs = channel.stream_unary( | |||||
| '/debugger.EventListener/SendMultiGraphs', | |||||
| request_serializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Chunk.SerializeToString, | |||||
| response_deserializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | |||||
| ) | |||||
| class EventListenerServicer(object): | class EventListenerServicer(object): | ||||
| """Missing associated documentation comment in .proto file""" | |||||
| """Missing associated documentation comment in .proto file.""" | |||||
| def WaitCMD(self, request, context): | def WaitCMD(self, request, context): | ||||
| """Missing associated documentation comment in .proto file""" | |||||
| """Missing associated documentation comment in .proto file.""" | |||||
| context.set_code(grpc.StatusCode.UNIMPLEMENTED) | context.set_code(grpc.StatusCode.UNIMPLEMENTED) | ||||
| context.set_details('Method not implemented!') | context.set_details('Method not implemented!') | ||||
| raise NotImplementedError('Method not implemented!') | raise NotImplementedError('Method not implemented!') | ||||
| def SendMetadata(self, request, context): | def SendMetadata(self, request, context): | ||||
| """Missing associated documentation comment in .proto file""" | |||||
| """Missing associated documentation comment in .proto file.""" | |||||
| context.set_code(grpc.StatusCode.UNIMPLEMENTED) | context.set_code(grpc.StatusCode.UNIMPLEMENTED) | ||||
| context.set_details('Method not implemented!') | context.set_details('Method not implemented!') | ||||
| raise NotImplementedError('Method not implemented!') | raise NotImplementedError('Method not implemented!') | ||||
| def SendGraph(self, request_iterator, context): | def SendGraph(self, request_iterator, context): | ||||
| """Missing associated documentation comment in .proto file""" | |||||
| """Missing associated documentation comment in .proto file.""" | |||||
| context.set_code(grpc.StatusCode.UNIMPLEMENTED) | context.set_code(grpc.StatusCode.UNIMPLEMENTED) | ||||
| context.set_details('Method not implemented!') | context.set_details('Method not implemented!') | ||||
| raise NotImplementedError('Method not implemented!') | raise NotImplementedError('Method not implemented!') | ||||
| def SendTensors(self, request_iterator, context): | def SendTensors(self, request_iterator, context): | ||||
| """Missing associated documentation comment in .proto file""" | |||||
| """Missing associated documentation comment in .proto file.""" | |||||
| context.set_code(grpc.StatusCode.UNIMPLEMENTED) | context.set_code(grpc.StatusCode.UNIMPLEMENTED) | ||||
| context.set_details('Method not implemented!') | context.set_details('Method not implemented!') | ||||
| raise NotImplementedError('Method not implemented!') | raise NotImplementedError('Method not implemented!') | ||||
| def SendWatchpointHits(self, request_iterator, context): | def SendWatchpointHits(self, request_iterator, context): | ||||
| """Missing associated documentation comment in .proto file""" | |||||
| """Missing associated documentation comment in .proto file.""" | |||||
| context.set_code(grpc.StatusCode.UNIMPLEMENTED) | |||||
| context.set_details('Method not implemented!') | |||||
| raise NotImplementedError('Method not implemented!') | |||||
| def SendMultiGraphs(self, request_iterator, context): | |||||
| """Missing associated documentation comment in .proto file.""" | |||||
| context.set_code(grpc.StatusCode.UNIMPLEMENTED) | context.set_code(grpc.StatusCode.UNIMPLEMENTED) | ||||
| context.set_details('Method not implemented!') | context.set_details('Method not implemented!') | ||||
| raise NotImplementedError('Method not implemented!') | raise NotImplementedError('Method not implemented!') | ||||
| @@ -102,6 +114,11 @@ def add_EventListenerServicer_to_server(servicer, server): | |||||
| request_deserializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.WatchpointHit.FromString, | request_deserializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.WatchpointHit.FromString, | ||||
| response_serializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.SerializeToString, | response_serializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.SerializeToString, | ||||
| ), | ), | ||||
| 'SendMultiGraphs': grpc.stream_unary_rpc_method_handler( | |||||
| servicer.SendMultiGraphs, | |||||
| request_deserializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Chunk.FromString, | |||||
| response_serializer=mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.SerializeToString, | |||||
| ), | |||||
| } | } | ||||
| generic_handler = grpc.method_handlers_generic_handler( | generic_handler = grpc.method_handlers_generic_handler( | ||||
| 'debugger.EventListener', rpc_method_handlers) | 'debugger.EventListener', rpc_method_handlers) | ||||
| @@ -110,7 +127,7 @@ def add_EventListenerServicer_to_server(servicer, server): | |||||
| # This class is part of an EXPERIMENTAL API. | # This class is part of an EXPERIMENTAL API. | ||||
| class EventListener(object): | class EventListener(object): | ||||
| """Missing associated documentation comment in .proto file""" | |||||
| """Missing associated documentation comment in .proto file.""" | |||||
| @staticmethod | @staticmethod | ||||
| def WaitCMD(request, | def WaitCMD(request, | ||||
| @@ -118,6 +135,7 @@ class EventListener(object): | |||||
| options=(), | options=(), | ||||
| channel_credentials=None, | channel_credentials=None, | ||||
| call_credentials=None, | call_credentials=None, | ||||
| insecure=False, | |||||
| compression=None, | compression=None, | ||||
| wait_for_ready=None, | wait_for_ready=None, | ||||
| timeout=None, | timeout=None, | ||||
| @@ -126,7 +144,7 @@ class EventListener(object): | |||||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Metadata.SerializeToString, | mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Metadata.SerializeToString, | ||||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | ||||
| options, channel_credentials, | options, channel_credentials, | ||||
| call_credentials, compression, wait_for_ready, timeout, metadata) | |||||
| insecure, call_credentials, compression, wait_for_ready, timeout, metadata) | |||||
| @staticmethod | @staticmethod | ||||
| def SendMetadata(request, | def SendMetadata(request, | ||||
| @@ -134,6 +152,7 @@ class EventListener(object): | |||||
| options=(), | options=(), | ||||
| channel_credentials=None, | channel_credentials=None, | ||||
| call_credentials=None, | call_credentials=None, | ||||
| insecure=False, | |||||
| compression=None, | compression=None, | ||||
| wait_for_ready=None, | wait_for_ready=None, | ||||
| timeout=None, | timeout=None, | ||||
| @@ -142,7 +161,7 @@ class EventListener(object): | |||||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Metadata.SerializeToString, | mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Metadata.SerializeToString, | ||||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | ||||
| options, channel_credentials, | options, channel_credentials, | ||||
| call_credentials, compression, wait_for_ready, timeout, metadata) | |||||
| insecure, call_credentials, compression, wait_for_ready, timeout, metadata) | |||||
| @staticmethod | @staticmethod | ||||
| def SendGraph(request_iterator, | def SendGraph(request_iterator, | ||||
| @@ -150,6 +169,7 @@ class EventListener(object): | |||||
| options=(), | options=(), | ||||
| channel_credentials=None, | channel_credentials=None, | ||||
| call_credentials=None, | call_credentials=None, | ||||
| insecure=False, | |||||
| compression=None, | compression=None, | ||||
| wait_for_ready=None, | wait_for_ready=None, | ||||
| timeout=None, | timeout=None, | ||||
| @@ -158,7 +178,7 @@ class EventListener(object): | |||||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Chunk.SerializeToString, | mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Chunk.SerializeToString, | ||||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | ||||
| options, channel_credentials, | options, channel_credentials, | ||||
| call_credentials, compression, wait_for_ready, timeout, metadata) | |||||
| insecure, call_credentials, compression, wait_for_ready, timeout, metadata) | |||||
| @staticmethod | @staticmethod | ||||
| def SendTensors(request_iterator, | def SendTensors(request_iterator, | ||||
| @@ -166,6 +186,7 @@ class EventListener(object): | |||||
| options=(), | options=(), | ||||
| channel_credentials=None, | channel_credentials=None, | ||||
| call_credentials=None, | call_credentials=None, | ||||
| insecure=False, | |||||
| compression=None, | compression=None, | ||||
| wait_for_ready=None, | wait_for_ready=None, | ||||
| timeout=None, | timeout=None, | ||||
| @@ -174,7 +195,7 @@ class EventListener(object): | |||||
| mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2.TensorProto.SerializeToString, | mindinsight_dot_debugger_dot_proto_dot_ms__graph__pb2.TensorProto.SerializeToString, | ||||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | ||||
| options, channel_credentials, | options, channel_credentials, | ||||
| call_credentials, compression, wait_for_ready, timeout, metadata) | |||||
| insecure, call_credentials, compression, wait_for_ready, timeout, metadata) | |||||
| @staticmethod | @staticmethod | ||||
| def SendWatchpointHits(request_iterator, | def SendWatchpointHits(request_iterator, | ||||
| @@ -182,6 +203,7 @@ class EventListener(object): | |||||
| options=(), | options=(), | ||||
| channel_credentials=None, | channel_credentials=None, | ||||
| call_credentials=None, | call_credentials=None, | ||||
| insecure=False, | |||||
| compression=None, | compression=None, | ||||
| wait_for_ready=None, | wait_for_ready=None, | ||||
| timeout=None, | timeout=None, | ||||
| @@ -190,4 +212,21 @@ class EventListener(object): | |||||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.WatchpointHit.SerializeToString, | mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.WatchpointHit.SerializeToString, | ||||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | ||||
| options, channel_credentials, | options, channel_credentials, | ||||
| call_credentials, compression, wait_for_ready, timeout, metadata) | |||||
| insecure, call_credentials, compression, wait_for_ready, timeout, metadata) | |||||
| @staticmethod | |||||
| def SendMultiGraphs(request_iterator, | |||||
| target, | |||||
| options=(), | |||||
| channel_credentials=None, | |||||
| call_credentials=None, | |||||
| insecure=False, | |||||
| compression=None, | |||||
| wait_for_ready=None, | |||||
| timeout=None, | |||||
| metadata=None): | |||||
| return grpc.experimental.stream_unary(request_iterator, target, '/debugger.EventListener/SendMultiGraphs', | |||||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.Chunk.SerializeToString, | |||||
| mindinsight_dot_debugger_dot_proto_dot_debug__grpc__pb2.EventReply.FromString, | |||||
| options, channel_credentials, | |||||
| insecure, call_credentials, compression, wait_for_ready, timeout, metadata) | |||||
| @@ -14,16 +14,66 @@ | |||||
| # ============================================================================ | # ============================================================================ | ||||
| """This file is used to define the basic graph.""" | """This file is used to define the basic graph.""" | ||||
| from collections import deque | from collections import deque | ||||
| from copy import deepcopy | |||||
| from mindinsight.datavisual.data_transform.graph.msgraph import MSGraph | from mindinsight.datavisual.data_transform.graph.msgraph import MSGraph | ||||
| from mindinsight.debugger.common.exceptions.exceptions import \ | from mindinsight.debugger.common.exceptions.exceptions import \ | ||||
| DebuggerNodeNotInGraphError, DebuggerParamValueError | DebuggerNodeNotInGraphError, DebuggerParamValueError | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from .node_type_identifier import NodeTypeIdentifier | |||||
| def _is_match(identifier, node, condition): | |||||
| """Check if the node is matched to the identifier. | |||||
| Args: | |||||
| identifier (NodeTypeIdentifier): The debug name of the node. | |||||
| node (Node obj): The number of layers the user wants to trace. Default is 0. | |||||
| Returns: | |||||
| list, a list of the traced tensors' name and node type, | |||||
| arranged in order from leaf node to root node. | |||||
| int, the number of output tensors. | |||||
| """ | |||||
| if condition: | |||||
| matched = identifier.is_match(node, condition) | |||||
| else: | |||||
| matched = identifier.is_match(node) | |||||
| return matched | |||||
| class DebuggerGraph(MSGraph): | class DebuggerGraph(MSGraph): | ||||
| """The `DebuggerGraph` object provides interfaces to describe a debugger graph.""" | """The `DebuggerGraph` object provides interfaces to describe a debugger graph.""" | ||||
| @property | |||||
| def leaf_nodes(self): | |||||
| """Return the leaf nodes.""" | |||||
| return self._leaf_nodes | |||||
| @property | |||||
| def normal_node_map(self): | |||||
| """Return the normal_node_map""" | |||||
| return self._normal_node_map | |||||
| @property | |||||
| def node_id_map_name(self): | |||||
| """Return the node_id_map_name""" | |||||
| return self._node_id_map_name | |||||
| @property | |||||
| def const_node_temp_cache(self): | |||||
| """Return const_node_temp_cache""" | |||||
| return self._const_node_temp_cache | |||||
| @property | |||||
| def parameter_node_temp_cache(self): | |||||
| """Return parameter_node_temp_cache""" | |||||
| return self._parameter_node_temp_cache | |||||
| @property | |||||
| def full_name_map_name(self): | |||||
| """Return full_name_map_name""" | |||||
| return self._full_name_map_name | |||||
| def get_node_name_by_full_name(self, full_name): | def get_node_name_by_full_name(self, full_name): | ||||
| """Get node name by full names.""" | """Get node name by full names.""" | ||||
| inner_name = self._full_name_map_name.get(full_name, '') | inner_name = self._full_name_map_name.get(full_name, '') | ||||
| @@ -33,12 +83,15 @@ class DebuggerGraph(MSGraph): | |||||
| return inner_name | return inner_name | ||||
| def get_full_name_by_node_name(self, node_name): | def get_full_name_by_node_name(self, node_name): | ||||
| """Get full name by node name for leaf nodes.""" | |||||
| """Get full name by node name.""" | |||||
| if not node_name: | |||||
| return '' | |||||
| node = self._normal_node_map.get(node_name) | node = self._normal_node_map.get(node_name) | ||||
| if not node: | if not node: | ||||
| log.warning("Node %s is not leaf node.", node_name) | |||||
| log.error("Node <%s> is not in graph.", node_name) | |||||
| raise DebuggerNodeNotInGraphError(node_name=node_name) | |||||
| return node.full_name if node else '' | |||||
| return node.full_name | |||||
| def get_node_type(self, node_name): | def get_node_type(self, node_name): | ||||
| """ | """ | ||||
| @@ -48,14 +101,48 @@ class DebuggerGraph(MSGraph): | |||||
| node_name (str): The full name of the node with its scope. | node_name (str): The full name of the node with its scope. | ||||
| Returns: | Returns: | ||||
| A string, leaf or name_scope. | |||||
| str, node type or name_scope. | |||||
| """ | """ | ||||
| if node_name and not self.exist_node(name=node_name): | |||||
| if not node_name: | |||||
| return 'name_scope' | |||||
| node = self._normal_node_map.get(node_name) | |||||
| if not node: | |||||
| log.error("Node <%s> is not in graph.", node_name) | |||||
| raise DebuggerNodeNotInGraphError(node_name=node_name) | raise DebuggerNodeNotInGraphError(node_name=node_name) | ||||
| node = self._normal_node_map.get(node_name) | |||||
| return node.type | return node.type | ||||
| def search_nodes_by_category(self, node_category, condition=None): | |||||
| """ | |||||
| Search nodes by type. | |||||
| Args: | |||||
| node_category (TargetTypeEnum): The node type supported in | |||||
| mindinsight.conditionmgr.condition.TargetTypeEnum. | |||||
| condition (dict): Search condition. Default: None. | |||||
| - activation_func (Union[str, list[str]): The target functions. Used when node_type | |||||
| is TargetTypeEnum.ACTIVATION. | |||||
| - search_range (list[Node]): The list of nodes to be searched from. | |||||
| Returns: | |||||
| list[Node], list of nodes. | |||||
| """ | |||||
| identifier = NodeTypeIdentifier(node_category.value) | |||||
| # get search range | |||||
| condition = {} if condition is None else condition | |||||
| search_range = condition.pop('search_range', None) | |||||
| if not search_range: | |||||
| search_range = self._leaf_nodes.values() | |||||
| # search match nodes | |||||
| matched_nodes = [] | |||||
| for node in search_range: | |||||
| matched = _is_match(identifier, node, condition) | |||||
| if matched: | |||||
| matched_nodes.append(node) | |||||
| return matched_nodes | |||||
| def get_tensor_history(self, node_name, depth=0): | def get_tensor_history(self, node_name, depth=0): | ||||
| """ | """ | ||||
| Get the tensor history of a specified node. | Get the tensor history of a specified node. | ||||
| @@ -188,3 +275,86 @@ class DebuggerGraph(MSGraph): | |||||
| raise DebuggerParamValueError(msg) | raise DebuggerParamValueError(msg) | ||||
| return default_root | return default_root | ||||
| def get_tensor_graph(self, node_name): | |||||
| """ | |||||
| Get graph relative to a node. | |||||
| Args: | |||||
| node_name (str): Node name. | |||||
| Returns: | |||||
| dict, tensor graph, format is: | |||||
| {'nodes': [ | |||||
| {'name': <node name>, | |||||
| 'full_name': <node full name>, | |||||
| 'type': <node type> | |||||
| 'input': <input objects>, | |||||
| 'output': <output objects>, | |||||
| 'slot': {'id': <slot id>} | |||||
| } | |||||
| ]} | |||||
| """ | |||||
| graph_nodes = [] | |||||
| cur_node = self._leaf_nodes.get(node_name) | |||||
| node_detail_info = cur_node.to_dict() | |||||
| cur_node_info = self._get_node_info_for_tensor_graph(cur_node) | |||||
| cur_node_info['input'] = deepcopy(node_detail_info.get('input')) | |||||
| cur_node_info['output'] = deepcopy(node_detail_info.get('output')) | |||||
| self._add_input_node_info(cur_node_info=cur_node_info, graph_nodes=graph_nodes) | |||||
| self._add_output_node_info(cur_node=cur_node, cur_node_info=cur_node_info, graph_nodes=graph_nodes) | |||||
| graph_nodes.append(cur_node_info) | |||||
| return {'nodes': graph_nodes} | |||||
| @staticmethod | |||||
| def _get_node_info_for_tensor_graph(node): | |||||
| """Get node infos for tensor graph.""" | |||||
| node_info = { | |||||
| 'name': node.name, | |||||
| 'full_name': node.full_name, | |||||
| 'type': node.type, | |||||
| 'input': {}, | |||||
| 'output': {}, | |||||
| 'slots': [{'slot': str(slot)} for slot in range(node.output_nums)] | |||||
| } | |||||
| return node_info | |||||
| def _add_output_node_info(self, cur_node, cur_node_info, graph_nodes): | |||||
| """ | |||||
| Add output node info into cur_node_info and node list. | |||||
| Args: | |||||
| cur_node (Node): The current node object. | |||||
| cur_node_info (dict): Current node info. | |||||
| graph_nodes (list[<Node info>]): The nodes in tensor graph. | |||||
| """ | |||||
| output_slot_mapping = self._get_slot_mapping(cur_node) | |||||
| for node_name, edge_info in cur_node_info.get('output').items(): | |||||
| edge_info['slot_mapping'] = output_slot_mapping | |||||
| # add output node info into graph | |||||
| output_node = self._leaf_nodes.get(node_name) | |||||
| output_node_info = self._get_node_info_for_tensor_graph(output_node) | |||||
| output_node_info['input'][cur_node.name] = edge_info | |||||
| graph_nodes.append(output_node_info) | |||||
| def _add_input_node_info(self, cur_node_info, graph_nodes): | |||||
| """ | |||||
| Add input node info into cur_node_info and node list. | |||||
| Args: | |||||
| cur_node_info (dict): Current node info. | |||||
| graph_nodes (list[<Node info>]): The nodes in tensor graph. | |||||
| """ | |||||
| cur_node_name = cur_node_info.get('name') | |||||
| for node_name, edge_info in cur_node_info.get('input').items(): | |||||
| input_node = self._leaf_nodes.get(node_name) | |||||
| edge_info['slot_mapping'] = self._get_slot_mapping(input_node) | |||||
| # add input node info into graph | |||||
| input_node_info = self._get_node_info_for_tensor_graph(input_node) | |||||
| input_node_info['output'][cur_node_name] = edge_info | |||||
| graph_nodes.append(input_node_info) | |||||
| @staticmethod | |||||
| def _get_slot_mapping(input_node): | |||||
| """Get slot mapping between nodes.""" | |||||
| return [[str(slot), ''] for slot in range(input_node.output_nums)] | |||||
| @@ -0,0 +1,81 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """This file is used to define the basic graph.""" | |||||
| import copy | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.datavisual.data_transform.graph.node import Node, NodeTypeEnum | |||||
| from .debugger_graph import DebuggerGraph | |||||
| class DebuggerMultiGraph(DebuggerGraph): | |||||
| """The `DebuggerMultiGraph` object provides interfaces to describe a debugger multigraph.""" | |||||
| def add_graph(self, graph_dict): | |||||
| """ | |||||
| add graphs to DebuggerMultiGraph | |||||
| Args: | |||||
| graph_dict (dict): The <graph_name, graph_object> dict. | |||||
| """ | |||||
| if len(graph_dict) == 1: | |||||
| graph = list(graph_dict.values())[0] | |||||
| self._normal_node_map = graph.normal_node_map | |||||
| self._node_id_map_name = graph.node_id_map_name | |||||
| self._const_node_temp_cache = graph.const_node_temp_cache | |||||
| self._parameter_node_temp_cache = graph.parameter_node_temp_cache | |||||
| self._leaf_nodes = graph.leaf_nodes | |||||
| self._full_name_map_name = graph.full_name_map_name | |||||
| else: | |||||
| for graph_name, graph in graph_dict.items(): | |||||
| log.debug("add graph %s into whole graph.", graph_name) | |||||
| # add nodes | |||||
| normal_nodes = copy.deepcopy(graph.normal_node_map) | |||||
| for _, node_obj in normal_nodes.items(): | |||||
| pre_scope = graph_name + "/" | |||||
| node_obj.name = pre_scope + node_obj.name | |||||
| node_obj.full_name = pre_scope + node_obj.full_name | |||||
| if node_obj.scope: | |||||
| node_obj.scope = pre_scope + node_obj.scope | |||||
| else: | |||||
| node_obj.scope = graph_name | |||||
| # update inputs | |||||
| old_inputs = copy.deepcopy(node_obj.inputs) | |||||
| for src_name, input_attr in old_inputs.items(): | |||||
| new_src_name = graph_name + "/" + src_name | |||||
| node_obj.add_inputs(new_src_name, input_attr) | |||||
| node_obj.delete_inputs(src_name) | |||||
| # update_outputs | |||||
| old_outputs = copy.deepcopy(node_obj.outputs) | |||||
| for dst_name, output_attr in old_outputs.items(): | |||||
| new_dst_name = graph_name + "/" + dst_name | |||||
| node_obj.add_outputs(new_dst_name, output_attr) | |||||
| node_obj.delete_outputs(dst_name) | |||||
| self._cache_node(node_obj) | |||||
| # add graph_node | |||||
| node = Node(name=graph_name, node_id=graph_name) | |||||
| node.type = NodeTypeEnum.NAME_SCOPE.value | |||||
| node.subnode_count = len(graph.list_node_by_scope()) | |||||
| self._cache_node(node) | |||||
| self._leaf_nodes = self._get_leaf_nodes() | |||||
| self._full_name_map_name = self._get_leaf_node_full_name_map() | |||||
| log.info( | |||||
| "Build multi_graph end, all node count: %s, const count: %s, parameter count: %s.", | |||||
| self.normal_node_count, len(self._const_node_temp_cache), | |||||
| len(self._parameter_node_temp_cache)) | |||||
| @@ -0,0 +1,143 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """This file is used to identify the type of the node.""" | |||||
| import sys | |||||
| from mindinsight.datavisual.data_transform.graph import NodeTypeEnum | |||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | |||||
| _ACTIVATIONS = [ | |||||
| 'Softmax', | |||||
| 'LogSoftmax', | |||||
| 'ReLU', | |||||
| 'ReLU6', | |||||
| 'Tanh', | |||||
| 'GELU', | |||||
| 'ELU', | |||||
| 'Sigmoid', | |||||
| 'PReLU', | |||||
| 'LeakyReLU', | |||||
| 'HSwish', | |||||
| 'HSigmoid', | |||||
| 'LogSigmoid' | |||||
| ] | |||||
| class NodeTypeIdentifier: | |||||
| """Node type identifier.""" | |||||
| def __init__(self, node_type): | |||||
| self.identify_func = self.get_identify_func(node_type) | |||||
| @staticmethod | |||||
| def get_identify_func(node_type): | |||||
| """ | |||||
| Get the identify function in this module. | |||||
| Args: | |||||
| node_type (str): The node type. | |||||
| Returns: | |||||
| function, the identify function. | |||||
| """ | |||||
| # the name of the identity function should start with 'is_' and end with '_node' | |||||
| target_name = 'is_' + node_type + '_node' | |||||
| cur_module = sys.modules[__name__] | |||||
| for sub_module in dir(cur_module): | |||||
| # the rule to get the identify function | |||||
| if sub_module == target_name: | |||||
| return getattr(cur_module, sub_module) | |||||
| raise DebuggerParamValueError("Invalid identify type.") | |||||
| def is_match(self, *args, **kwargs): | |||||
| """Check if the input match the idenfity function.""" | |||||
| return self.identify_func(*args, **kwargs) | |||||
| def is_weight_node(node): | |||||
| """ | |||||
| Check if the node is weight type. | |||||
| Args: | |||||
| node (Node): The node object. | |||||
| Returns: | |||||
| bool, if the node is weight type. | |||||
| """ | |||||
| if node.type == NodeTypeEnum.PARAMETER.value: | |||||
| node_name = node.name.lower() | |||||
| weight_flag = False | |||||
| if node_name.endswith('.weight') or node_name.endswith('.bias'): | |||||
| weight_flag = True | |||||
| if weight_flag and 'optimizer-' not in node_name and not node_name.startswith('gradients/'): | |||||
| return True | |||||
| return False | |||||
| def is_activation_node(node, condition=None): | |||||
| """ | |||||
| Check if the node is activation type. | |||||
| Args: | |||||
| node (Node): The node object. | |||||
| condition (dict): Filter condition. | |||||
| - activation_func (Union[str, list[str]): The target functions. | |||||
| Returns: | |||||
| bool, if the node is activation type. | |||||
| """ | |||||
| activation_funcs = condition.get('activation_func') if condition else _ACTIVATIONS | |||||
| if not activation_funcs: | |||||
| activation_funcs = _ACTIVATIONS | |||||
| if not isinstance(activation_funcs, list): | |||||
| activation_funcs = [activation_funcs] | |||||
| if not is_gradient_node(node): | |||||
| node_type = node.type | |||||
| for activation_name in activation_funcs: | |||||
| if node_type == activation_name: | |||||
| return True | |||||
| return False | |||||
| def is_gradient_node(node): | |||||
| """ | |||||
| Check if the node is gradient type. | |||||
| Args: | |||||
| node (Node): The node object. | |||||
| Returns: | |||||
| bool, if the node is gradient type. | |||||
| """ | |||||
| if node.name.startswith('Gradients/') and node.type != NodeTypeEnum.PARAMETER.value: | |||||
| return True | |||||
| return False | |||||
| def is_tensor_node(node): | |||||
| """ | |||||
| Check if the node is tensor type. | |||||
| Args: | |||||
| node (Node): The node object. | |||||
| Returns: | |||||
| bool, if the node is tensor type. | |||||
| """ | |||||
| if node is not None: | |||||
| return True | |||||
| return False | |||||
| @@ -19,7 +19,7 @@ import numpy as np | |||||
| from mindinsight.utils.tensor import TensorUtils | from mindinsight.utils.tensor import TensorUtils | ||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.common.utils import NUMPY_TYPE_MAP | from mindinsight.debugger.common.utils import NUMPY_TYPE_MAP | ||||
| from mindinsight.debugger.proto.ms_graph_pb2 import DataType | from mindinsight.debugger.proto.ms_graph_pb2 import DataType | ||||
| @@ -177,6 +177,18 @@ class OpTensor(BaseTensor): | |||||
| return res | return res | ||||
| def get_tensor_statistics(self): | |||||
| """ | |||||
| Get Tensor statistics. | |||||
| Returns: | |||||
| dict, overall statistics. | |||||
| """ | |||||
| if not self._stats: | |||||
| self._stats = TensorUtils.get_statistics_from_tensor(self.value) | |||||
| statistics = TensorUtils.get_overall_statistic_dict(self._stats) | |||||
| return statistics | |||||
| def update_tensor_comparisons(self, tensor_comparison): | def update_tensor_comparisons(self, tensor_comparison): | ||||
| """ | """ | ||||
| Update tensor comparison for tensor. | Update tensor comparison for tensor. | ||||
| @@ -13,23 +13,45 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """Define the watchpoint stream.""" | """Define the watchpoint stream.""" | ||||
| from mindinsight.datavisual.data_transform.graph.node import NodeTypeEnum | |||||
| from mindinsight.conditionmgr.common.utils import NodeBasicInfo | |||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.common.utils import is_scope_type | |||||
| from mindinsight.debugger.proto.debug_grpc_pb2 import SetCMD, WatchCondition | from mindinsight.debugger.proto.debug_grpc_pb2 import SetCMD, WatchCondition | ||||
| from mindinsight.conditionmgr.condition import ConditionIdEnum | |||||
| WATCHPOINT_CONDITION_MAPPING = { | WATCHPOINT_CONDITION_MAPPING = { | ||||
| 'INF': WatchCondition.Condition.inf, | |||||
| 'NAN': WatchCondition.Condition.nan, | |||||
| 'OVERFLOW': WatchCondition.Condition.overflow, | |||||
| 'MAX_GT': WatchCondition.Condition.max_gt, | |||||
| 'MAX_LT': WatchCondition.Condition.max_lt, | |||||
| 'MIN_GT': WatchCondition.Condition.min_gt, | |||||
| 'MIN_LT': WatchCondition.Condition.min_lt, | |||||
| 'MAX_MIN_GT': WatchCondition.Condition.max_min_gt, | |||||
| 'MAX_MIN_LT': WatchCondition.Condition.max_min_lt, | |||||
| 'MEAN_GT': WatchCondition.Condition.mean_gt, | |||||
| 'MEAN_LT': WatchCondition.Condition.mean_lt | |||||
| ConditionIdEnum.NAN.value: WatchCondition.Condition.nan, | |||||
| ConditionIdEnum.INF.value: WatchCondition.Condition.inf, | |||||
| ConditionIdEnum.OVERFLOW_ASCEND_CHIP.value: WatchCondition.Condition.overflow, | |||||
| ConditionIdEnum.MAX_GT.value: WatchCondition.Condition.max_gt, | |||||
| ConditionIdEnum.MAX_LT.value: WatchCondition.Condition.max_lt, | |||||
| ConditionIdEnum.MIN_GT.value: WatchCondition.Condition.min_gt, | |||||
| ConditionIdEnum.MIN_LT.value: WatchCondition.Condition.min_lt, | |||||
| ConditionIdEnum.MAX_MIN_GT.value: WatchCondition.Condition.max_min_gt, | |||||
| ConditionIdEnum.MAX_MIN_LT.value: WatchCondition.Condition.max_min_lt, | |||||
| ConditionIdEnum.MEAN_GT.value: WatchCondition.Condition.mean_gt, | |||||
| ConditionIdEnum.MEAN_LT.value: WatchCondition.Condition.mean_lt, | |||||
| ConditionIdEnum.TENSOR_OVERFLOW.value: WatchCondition.Condition.tensor_general_overflow, | |||||
| ConditionIdEnum.WEIGHT_OVERFLOW.value: WatchCondition.Condition.tensor_general_overflow, | |||||
| ConditionIdEnum.OPERATOR_OVERFLOW.value: WatchCondition.Condition.overflow, | |||||
| ConditionIdEnum.TENSOR_INITIALIZATION.value: WatchCondition.Condition.tensor_initialization, | |||||
| ConditionIdEnum.WEIGHT_INITIALIZATION.value: WatchCondition.Condition.tensor_initialization, | |||||
| ConditionIdEnum.TENSOR_TOO_LARGE.value: WatchCondition.Condition.tensor_too_large, | |||||
| ConditionIdEnum.WEIGHT_TOO_LARGE.value: WatchCondition.Condition.tensor_too_large, | |||||
| ConditionIdEnum.GRADIENT_TOO_LARGE.value: WatchCondition.Condition.tensor_too_large, | |||||
| ConditionIdEnum.GRADIENT_EXPLODING.value: WatchCondition.Condition.tensor_general_overflow, | |||||
| ConditionIdEnum.TENSOR_TOO_SMALL.value: WatchCondition.Condition.tensor_too_small, | |||||
| ConditionIdEnum.WEIGHT_TOO_SMALL.value: WatchCondition.Condition.tensor_too_small, | |||||
| ConditionIdEnum.GRADIENT_VANISHING.value: WatchCondition.Condition.tensor_too_small, | |||||
| ConditionIdEnum.TENSOR_ALL_ZERO.value: WatchCondition.Condition.tensor_all_zero, | |||||
| ConditionIdEnum.TENSOR_CHANGE_TOO_LARGE.value: WatchCondition.Condition.tensor_change_too_large, | |||||
| ConditionIdEnum.WEIGHT_CHANGE_TOO_LARGE.value: WatchCondition.Condition.tensor_change_too_large, | |||||
| ConditionIdEnum.TENSOR_CHANGE_TOO_SMALL.value: WatchCondition.Condition.tensor_change_too_small, | |||||
| ConditionIdEnum.WEIGHT_CHANGE_TOO_SMALL.value: WatchCondition.Condition.tensor_change_too_small, | |||||
| ConditionIdEnum.TENSOR_NOT_CHANGED.value: WatchCondition.Condition.tensor_not_changed, | |||||
| ConditionIdEnum.WEIGHT_NOT_CHANGED.value: WatchCondition.Condition.tensor_not_changed | |||||
| } | } | ||||
| @@ -81,10 +103,8 @@ class WatchNodeTree: | |||||
| def _translate_node_type(node_type): | def _translate_node_type(node_type): | ||||
| """Translate node type to watch node type.""" | """Translate node type to watch node type.""" | ||||
| flag = node_type | flag = node_type | ||||
| if not node_type or node_type == NodeTypeEnum.NAME_SCOPE.value: | |||||
| if not node_type or is_scope_type(node_type): | |||||
| flag = 'scope' | flag = 'scope' | ||||
| elif node_type != NodeTypeEnum.AGGREGATION_SCOPE.value: | |||||
| flag = 'leaf' | |||||
| return flag | return flag | ||||
| def get(self, sub_name): | def get(self, sub_name): | ||||
| @@ -191,7 +211,7 @@ class Watchpoint: | |||||
| self._watch_node = other_watchpoint.nodes | self._watch_node = other_watchpoint.nodes | ||||
| def add_nodes(self, nodes): | def add_nodes(self, nodes): | ||||
| """Add node into watchcpoint.""" | |||||
| """Add node into watchpoint.""" | |||||
| if not nodes: | if not nodes: | ||||
| log.warning("Add empty nodes.") | log.warning("Add empty nodes.") | ||||
| return | return | ||||
| @@ -208,8 +228,7 @@ class Watchpoint: | |||||
| if not isinstance(nodes, list): | if not isinstance(nodes, list): | ||||
| nodes = [nodes] | nodes = [nodes] | ||||
| for node in nodes: | for node in nodes: | ||||
| node_name = node.split(':')[0] | |||||
| self._watch_node.remove_node(node_name) | |||||
| self._watch_node.remove_node(node.name) | |||||
| def get_node_status(self, node_name, node_type, full_name): | def get_node_status(self, node_name, node_type, full_name): | ||||
| """Judge if the node is in watch nodes.""" | """Judge if the node is in watch nodes.""" | ||||
| @@ -229,40 +248,56 @@ class Watchpoint: | |||||
| return status | return status | ||||
| def get_watch_node(self, cur_watch_node, watch_node_list): | |||||
| def _get_watch_node(self, cur_watch_node, watch_node_list): | |||||
| """ | """ | ||||
| Traverse the watch nodes and add total watched node list to `watch_node_list`. | Traverse the watch nodes and add total watched node list to `watch_node_list`. | ||||
| Args: | Args: | ||||
| cur_watch_node (WatchNodeTree): The current watch node. | cur_watch_node (WatchNodeTree): The current watch node. | ||||
| watch_node_list (list[WatchNodeTree]): The list of total watched node. | |||||
| watch_node_list (list[NodeBasicInfo]): The list of watch node basic infos. | |||||
| """ | """ | ||||
| if cur_watch_node.watch_status == WatchNodeTree.TOTAL_WATCH and \ | |||||
| cur_watch_node.node_type != NodeTypeEnum.AGGREGATION_SCOPE.value: | |||||
| watch_node_list.append(cur_watch_node) | |||||
| if cur_watch_node.watch_status == WatchNodeTree.TOTAL_WATCH: | |||||
| node_info = NodeBasicInfo(name=cur_watch_node.node_name, | |||||
| full_name=cur_watch_node.full_name, | |||||
| type=cur_watch_node.node_type) | |||||
| watch_node_list.append(node_info) | |||||
| return | return | ||||
| for _, watch_node in cur_watch_node.get_children(): | for _, watch_node in cur_watch_node.get_children(): | ||||
| self.get_watch_node(watch_node, watch_node_list) | |||||
| self._get_watch_node(watch_node, watch_node_list) | |||||
| def get_set_cmd(self): | |||||
| """Return the watchpoint in proto format.""" | |||||
| # get watch nodes. | |||||
| def get_watch_nodes(self): | |||||
| """ | |||||
| Get the name of all total watched nodes. | |||||
| Returns: | |||||
| list[NodeBasicInfo], the list of watch node basic infos. | |||||
| """ | |||||
| watch_nodes = [] | watch_nodes = [] | ||||
| self.get_watch_node(self._watch_node, watch_nodes) | |||||
| self._get_watch_node(self._watch_node, watch_nodes) | |||||
| return watch_nodes | |||||
| def get_pending_cmd(self, watch_nodes): | |||||
| """Return the watchpoint in proto format.""" | |||||
| # construct SetCMD | # construct SetCMD | ||||
| set_cmd = SetCMD() | set_cmd = SetCMD() | ||||
| set_cmd.id = self._id | set_cmd.id = self._id | ||||
| set_cmd.delete = False | set_cmd.delete = False | ||||
| set_cmd.watch_condition.condition = WATCHPOINT_CONDITION_MAPPING.get( | set_cmd.watch_condition.condition = WATCHPOINT_CONDITION_MAPPING.get( | ||||
| self._condition.get('condition')) | |||||
| if self._condition.get('param'): | |||||
| self._condition.get('id')) | |||||
| for param in self._condition.get('params'): | |||||
| # at most one param is provided | # at most one param is provided | ||||
| set_cmd.watch_condition.value = self._condition.get('param') | |||||
| param_proto = set_cmd.watch_condition.params.add() | |||||
| param_proto.name = param.get('name') | |||||
| param_proto.value = param.get('value') | |||||
| param_proto.disabled = param.get('disable') | |||||
| # Only one parameter of condition in current version. | |||||
| set_cmd.watch_condition.value = param.get('value') | |||||
| for watch_node in watch_nodes: | for watch_node in watch_nodes: | ||||
| event_node = set_cmd.watch_nodes.add() | event_node = set_cmd.watch_nodes.add() | ||||
| event_node.node_name = watch_node.full_name | event_node.node_name = watch_node.full_name | ||||
| event_node.node_type = watch_node.node_type | |||||
| event_node.node_type = watch_node.type | |||||
| return set_cmd | return set_cmd | ||||
| def get_watch_condition_info(self): | def get_watch_condition_info(self): | ||||
| @@ -277,22 +312,17 @@ class Watchpoint: | |||||
| class WatchpointHit: | class WatchpointHit: | ||||
| """The watchpoint hit structure.""" | """The watchpoint hit structure.""" | ||||
| def __init__(self, tensor_proto, watchpoint, node_name): | |||||
| self._node_name = node_name | |||||
| def __init__(self, tensor_proto, watchpoint, node_name, graph_name): | |||||
| self._full_name = tensor_proto.node_name | self._full_name = tensor_proto.node_name | ||||
| self._slot = tensor_proto.slot | |||||
| self._watchpoint = watchpoint | self._watchpoint = watchpoint | ||||
| self.node_name = node_name | |||||
| self.slot = tensor_proto.slot | |||||
| self.graph_name = graph_name | |||||
| @property | @property | ||||
| def tensor_full_name(self): | def tensor_full_name(self): | ||||
| """The property of tensor full name.""" | """The property of tensor full name.""" | ||||
| tensor_name = ':'.join([self._full_name, self._slot]) | |||||
| return tensor_name | |||||
| @property | |||||
| def tensor_name(self): | |||||
| """The property of tensor ui name.""" | |||||
| tensor_name = ':'.join([self._node_name, self._slot]) | |||||
| tensor_name = ':'.join([self._full_name, self.slot]) | |||||
| return tensor_name | return tensor_name | ||||
| @property | @property | ||||
| @@ -303,5 +333,7 @@ class WatchpointHit: | |||||
| def __eq__(self, other): | def __eq__(self, other): | ||||
| """Define the equal condition.""" | """Define the equal condition.""" | ||||
| flag = self.tensor_full_name == other.tensor_full_name and self.watchpoint == other.watchpoint | |||||
| flag = self.tensor_full_name == other.tensor_full_name \ | |||||
| and self.watchpoint == other.watchpoint \ | |||||
| and self.graph_name == other.graph_name | |||||
| return flag | return flag | ||||
| @@ -18,7 +18,7 @@ from queue import Queue, Empty | |||||
| from threading import Lock | from threading import Lock | ||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | ||||
| @@ -13,10 +13,14 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """Define the graph stream handler.""" | """Define the graph stream handler.""" | ||||
| from mindinsight.conditionmgr.common.utils import NodeBasicInfo | |||||
| from mindinsight.conditionmgr.condition import TargetTypeEnum as CategoryTypeEnum | |||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | ||||
| DebuggerNodeNotInGraphError, DebuggerGraphNotExistError | DebuggerNodeNotInGraphError, DebuggerGraphNotExistError | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.common.utils import is_scope_type | |||||
| from mindinsight.debugger.stream_cache.debugger_graph import DebuggerGraph | from mindinsight.debugger.stream_cache.debugger_graph import DebuggerGraph | ||||
| from mindinsight.debugger.stream_cache.debugger_multigraph import DebuggerMultiGraph | |||||
| from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | ||||
| @@ -24,16 +28,41 @@ class GraphHandler(StreamHandlerBase): | |||||
| """Metadata Handler.""" | """Metadata Handler.""" | ||||
| def __init__(self): | def __init__(self): | ||||
| self._graph_proto = None | |||||
| self._graph = None | |||||
| self._searched_node_list = [] | |||||
| # dict of <graph_name, GraphProto object> | |||||
| self._graph_proto = {} | |||||
| # dict of <graph_name, DebuggerGraph object> | |||||
| self._graph = {} | |||||
| self._searched_node_list = {} | |||||
| # list of node names in bfs order | |||||
| self.bfs_order = [] | self.bfs_order = [] | ||||
| # dict of <node full name, graph_name> | |||||
| self.graph_node_map = {} | |||||
| # dict of <node ui name, Node object> for all graphs | |||||
| self._all_leaf_nodes = {} | |||||
| # the whole graph | |||||
| self._whole_graph = None | |||||
| @property | |||||
| def whole_graph(self): | |||||
| """The property of whole_graph.""" | |||||
| return self._whole_graph | |||||
| @property | @property | ||||
| def graph(self): | def graph(self): | ||||
| """The property of graph.""" | """The property of graph.""" | ||||
| return self._graph_proto | return self._graph_proto | ||||
| @property | |||||
| def graph_names(self): | |||||
| """The property of graph names.""" | |||||
| return list(self._graph) | |||||
| @property | |||||
| def debugger_graph_obj(self): | |||||
| """The property of graph object.""" | |||||
| return self._graph | |||||
| def put(self, value): | def put(self, value): | ||||
| """ | """ | ||||
| Put value into graph cache. Called by grpc server. | Put value into graph cache. Called by grpc server. | ||||
| @@ -41,14 +70,23 @@ class GraphHandler(StreamHandlerBase): | |||||
| Args: | Args: | ||||
| value (GraphProto): The Graph proto message. | value (GraphProto): The Graph proto message. | ||||
| """ | """ | ||||
| self._graph_proto = value | |||||
| log.info("Put graph into cache.") | log.info("Put graph into cache.") | ||||
| # build graph | |||||
| graph = DebuggerGraph() | |||||
| graph.build_graph(value) | |||||
| self._graph = graph | |||||
| self.bfs_order = self._graph.get_bfs_order() | |||||
| for graph_name, graph_value in value.items(): | |||||
| self._graph_proto[graph_name] = graph_value | |||||
| # build sub graph | |||||
| graph = DebuggerGraph() | |||||
| graph.build_graph(graph_value) | |||||
| self._graph[graph_name] = graph | |||||
| self.bfs_order.extend(graph.get_bfs_order()) | |||||
| leaf_nodes = graph.leaf_nodes | |||||
| self._all_leaf_nodes.update(leaf_nodes) | |||||
| for _, node in leaf_nodes.items(): | |||||
| self.graph_node_map[node.full_name] = graph_name | |||||
| # build whole graph | |||||
| graph = DebuggerMultiGraph() | |||||
| graph.add_graph(self._graph) | |||||
| self._whole_graph = graph | |||||
| def get(self, filter_condition=None): | def get(self, filter_condition=None): | ||||
| """ | """ | ||||
| @@ -58,7 +96,7 @@ class GraphHandler(StreamHandlerBase): | |||||
| filter_condition (dict): | filter_condition (dict): | ||||
| - name (str): The full debug node name. | - name (str): The full debug node name. | ||||
| - graph_name (str): The relative graph_name of the node. | |||||
| - single_node (bool): If True, return the graph from root | - single_node (bool): If True, return the graph from root | ||||
| to the specific node; else, return the sublayer of the | to the specific node; else, return the sublayer of the | ||||
| graph. Default: False. | graph. Default: False. | ||||
| @@ -73,47 +111,121 @@ class GraphHandler(StreamHandlerBase): | |||||
| 'please start the training script first.') | 'please start the training script first.') | ||||
| return {'graph': {}} | return {'graph': {}} | ||||
| graph = {} | |||||
| if filter_condition is None: | if filter_condition is None: | ||||
| filter_condition = {} | filter_condition = {} | ||||
| graph = {'graph_names': self.graph_names} | |||||
| single_node = filter_condition.get('single_node', False) | single_node = filter_condition.get('single_node', False) | ||||
| name = filter_condition.get('name') | name = filter_condition.get('name') | ||||
| graph = {} | |||||
| graph_name = filter_condition.get('graph_name') | |||||
| if single_node is True: | if single_node is True: | ||||
| nodes = self.get_single_node(name) | |||||
| nodes = self._get_single_node(name, graph_name) | |||||
| else: | else: | ||||
| nodes = self.list_nodes(name) | |||||
| nodes = self._list_nodes(name, graph_name) | |||||
| graph.update(nodes) | graph.update(nodes) | ||||
| return {'graph': graph} | return {'graph': graph} | ||||
| def get_tensor_history(self, node_name, depth=0): | |||||
| def _get_single_node(self, name, graph_name=None): | |||||
| """ | |||||
| Search node, and return every layer nodes until this node. | |||||
| Args: | |||||
| graph_name(str): The graph_name. | |||||
| name (str): The name of node. | |||||
| Returns: | |||||
| dict, every layer nodes until this node. | |||||
| """ | |||||
| if graph_name: | |||||
| graph = self._get_graph(graph_name=graph_name) | |||||
| searched_graph = graph.search_single_node(name) | |||||
| else: | |||||
| searched_graph = self._whole_graph.search_single_node(name) | |||||
| return searched_graph | |||||
| def _list_nodes(self, scope, graph_name): | |||||
| """ | |||||
| Get the nodes of every layer in graph. | |||||
| Args: | |||||
| scope (str): The name of a scope. | |||||
| graph_name(str): The graph name. | |||||
| Returns: | |||||
| TypedDict{'nodes': ['Node_1', ...], 'graph_names': ['graph_name_1', ...]}, | |||||
| format is {'nodes': [<NodeObject>], 'graph_names': [<str>]}. | |||||
| example: | |||||
| { | |||||
| "nodes" : [ | |||||
| { | |||||
| "attr" : | |||||
| { | |||||
| "index" : "i: 0\n" | |||||
| }, | |||||
| "input" : {}, | |||||
| "name" : "input_tensor", | |||||
| "output" : | |||||
| { | |||||
| "Default/TensorAdd-op17" : | |||||
| { | |||||
| "edge_type" : "data", | |||||
| "scope" : "name_scope", | |||||
| "shape" : [1, 16, 128, 128] | |||||
| } | |||||
| }, | |||||
| "output_i" : -1, | |||||
| "proxy_input" : {}, | |||||
| "proxy_output" : {}, | |||||
| "independent_layout" : False, | |||||
| "subnode_count" : 0, | |||||
| "type" : "Data" | |||||
| } | |||||
| ] | |||||
| } | |||||
| """ | |||||
| if graph_name: | |||||
| graph = self._get_graph(graph_name, scope) | |||||
| nodes = graph.list_node_by_scope(scope=scope) | |||||
| res = {'nodes': nodes} | |||||
| else: | |||||
| nodes = self._whole_graph.list_node_by_scope(scope=scope) | |||||
| res = {'nodes': nodes} | |||||
| return res | |||||
| def get_tensor_history(self, node_name, graph_name=None, depth=0): | |||||
| """ | """ | ||||
| Get the tensor history of a specified node. | Get the tensor history of a specified node. | ||||
| Args: | Args: | ||||
| node_name (str): The debug name of the node. | node_name (str): The debug name of the node. | ||||
| graph_name (str): The graph_name. Default: None. | |||||
| depth (int): The number of layers the user | depth (int): The number of layers the user | ||||
| wants to trace. Default is 0. | wants to trace. Default is 0. | ||||
| Returns: | Returns: | ||||
| dict, basic tensor history, only including tensor name and tensor type and node type. | dict, basic tensor history, only including tensor name and tensor type and node type. | ||||
| """ | """ | ||||
| self._graph_exists() | |||||
| if not self._graph.exist_node(node_name): | |||||
| raise DebuggerNodeNotInGraphError(node_name) | |||||
| tensor_history, cur_outputs_nums = self._graph.get_tensor_history( | |||||
| node_name, depth | |||||
| ) | |||||
| graph_name, node_name = self._parse_node_name(node_name, graph_name) | |||||
| graph = self._get_graph(graph_name=graph_name, node_name=node_name) | |||||
| # validate node type, scope node has no tensor history | |||||
| node_type = graph.get_node_type(node_name) | |||||
| if is_scope_type(node_type): | |||||
| log.error("Scope type node has no tensor history.") | |||||
| raise DebuggerParamValueError("Invalid leaf node name.") | |||||
| # get tensor history | |||||
| tensor_history, cur_outputs_nums = graph.get_tensor_history(node_name, depth) | |||||
| # add the tensor type for tensor history | # add the tensor type for tensor history | ||||
| self._update_tensor_history(tensor_history[0:cur_outputs_nums], 'output') | |||||
| self._update_tensor_history(tensor_history[cur_outputs_nums:], 'input') | |||||
| self._update_tensor_history(tensor_history[0:cur_outputs_nums], 'output', graph_name) | |||||
| self._update_tensor_history(tensor_history[cur_outputs_nums:], 'input', graph_name) | |||||
| log.debug("Get %d tensors in tensor history for node <%s>.", len(tensor_history), node_name) | log.debug("Get %d tensors in tensor history for node <%s>.", len(tensor_history), node_name) | ||||
| return {'tensor_history': tensor_history} | return {'tensor_history': tensor_history} | ||||
| @staticmethod | @staticmethod | ||||
| def _update_tensor_history(tensor_history, tensor_type): | |||||
| def _update_tensor_history(tensor_history, tensor_type, graph_name): | |||||
| """ | """ | ||||
| Add tensor source type for tensor history. | Add tensor source type for tensor history. | ||||
| @@ -122,115 +234,285 @@ class GraphHandler(StreamHandlerBase): | |||||
| keys: `node_type` and `name`. `node_type` refers to the type of the node which | keys: `node_type` and `name`. `node_type` refers to the type of the node which | ||||
| the tensor come from. `name` refers to the tensor name. | the tensor come from. `name` refers to the tensor name. | ||||
| tensor_type (str): The source type of the tensor. `input` or `output`. | tensor_type (str): The source type of the tensor. `input` or `output`. | ||||
| graph_name (str): The graph name. | |||||
| """ | """ | ||||
| for single_tensor_info in tensor_history: | for single_tensor_info in tensor_history: | ||||
| single_tensor_info['type'] = tensor_type | single_tensor_info['type'] = tensor_type | ||||
| single_tensor_info['graph_name'] = graph_name | |||||
| def search_nodes(self, pattern): | def search_nodes(self, pattern): | ||||
| """ | """ | ||||
| Search nodes by given pattern. | Search nodes by given pattern. | ||||
| Args: | Args: | ||||
| pattern (Union[str, None]): The pattern of the node to search, | |||||
| if None, return all node names. | |||||
| pattern (dict): Filter condition. | |||||
| - name (str): The name pattern. | |||||
| - graph_name (str): The graph name. | |||||
| - node_category (str): The node_category. Default: None | |||||
| - condition (dict): The additional filter condition. | |||||
| Returns: | Returns: | ||||
| dict, the searched node. | dict, the searched node. | ||||
| """ | """ | ||||
| self._graph_exists() | |||||
| self._searched_node_list = self._graph.search_nodes_by_pattern(pattern) | |||||
| nodes = self._graph.get_nodes(self._searched_node_list) | |||||
| graph_name = pattern.pop('graph_name', None) | |||||
| search_nodes = self.get_searched_nodes(pattern, graph_name) | |||||
| # construct to search tree | |||||
| if not self._has_graph_scope(graph_name): | |||||
| for graph_name, searched_node_list in search_nodes.items(): | |||||
| graph = self._get_graph(graph_name=graph_name) | |||||
| format_nodes = graph.get_nodes(searched_node_list) | |||||
| return {'nodes': format_nodes} | |||||
| # deal with graph_name is None | |||||
| res = [] | |||||
| for graph_name, graph in self._graph.items(): | |||||
| format_nodes = graph.get_nodes(search_nodes.get(graph_name, [])) | |||||
| if not format_nodes: | |||||
| continue | |||||
| self._add_graph_scope_for_nodes(format_nodes, graph_name) | |||||
| search_graph = { | |||||
| 'name': graph_name, | |||||
| 'type': 'name_scope', | |||||
| 'nodes': format_nodes | |||||
| } | |||||
| res.append(search_graph) | |||||
| return {'nodes': res} | |||||
| def get_searched_node_list(self, pattern, graph_name): | |||||
| """Get searched node list in single graph.""" | |||||
| searched_nodes = self.get_searched_nodes(pattern, graph_name) | |||||
| return searched_nodes.get(graph_name, []) | |||||
| def get_searched_nodes(self, pattern, graph_name=None): | |||||
| """ | |||||
| Search nodes by given pattern. | |||||
| Args: | |||||
| pattern (dict): Filter condition. | |||||
| - name (str): The name pattern. | |||||
| - node_category (str): The node_category. Default: None | |||||
| - condition (dict): The additional filter condition. | |||||
| graph_name (str): The graph name. If not given, search in all sub graphs. Default: None. | |||||
| Returns: | |||||
| dict, the searched nodes. The format is dict of <graph_name, list[Node]>. | |||||
| """ | |||||
| if not graph_name: | |||||
| graph_names = self.graph_names | |||||
| else: | |||||
| graph_names = [graph_name] | |||||
| search_nodes = {} | |||||
| for sub_graph_name in graph_names: | |||||
| search_nodes[sub_graph_name] = self._search_in_single_graph(pattern, sub_graph_name) | |||||
| return search_nodes | |||||
| return {'nodes': nodes} | |||||
| def _search_in_single_graph(self, pattern, graph_name=None): | |||||
| """ | |||||
| Search nodes by given pattern. | |||||
| def get_nodes_by_scope(self, scope_name): | |||||
| Args: | |||||
| pattern (dict): Filter condition. | |||||
| - name (str): The name pattern. | |||||
| - node_category (str): The node_category. Default: None. | |||||
| - condition (dict): The additional filter condition. | |||||
| graph_name (str): The graph name. | |||||
| Returns: | |||||
| list, the searched node list. | |||||
| """ | |||||
| temp_node_list = [] | |||||
| node_category = pattern.get('node_category') | |||||
| if graph_name: | |||||
| graph = self._get_graph(graph_name=graph_name) | |||||
| else: | |||||
| graph = self._whole_graph | |||||
| # filter nodes by name | |||||
| if pattern.get('name'): | |||||
| if node_category: | |||||
| # get leaf nodes for forward filter | |||||
| temp_node_list = graph.search_leaf_nodes_by_pattern(pattern.get('name')) | |||||
| else: | |||||
| # optimize search nodes | |||||
| temp_node_list = graph.search_nodes_by_pattern(pattern.get('name')) | |||||
| if not temp_node_list: | |||||
| log.debug("No node named %s", pattern.get('name')) | |||||
| return [] | |||||
| # filter nodes by category | |||||
| if node_category: | |||||
| node_category = self._get_inner_node_category(node_category) | |||||
| condition = pattern['condition'].copy() if pattern.get('condition') else {} | |||||
| condition['search_range'] = temp_node_list | |||||
| temp_node_list = graph.search_nodes_by_category(node_category, condition=condition) | |||||
| return temp_node_list | |||||
| @staticmethod | |||||
| def _get_inner_node_category(node_category): | |||||
| """ | |||||
| Get inner node category. | |||||
| Args: | |||||
| node_category (str): The node category supported in | |||||
| mindinsight.conditionmgr.condition.TargetTypeEnum. | |||||
| Returns: | |||||
| CategoryTypeEnum, the translated value. | |||||
| """ | |||||
| try: | |||||
| res = CategoryTypeEnum(node_category) | |||||
| except ValueError as err: | |||||
| log.error("Invalid node category. %s", err) | |||||
| raise DebuggerParamValueError("Invalid node_category.") | |||||
| return res | |||||
| def get_nodes_by_scope(self, scope_name, graph_name): | |||||
| """ | """ | ||||
| Get node by a given scope name. | Get node by a given scope name. | ||||
| Args: | Args: | ||||
| scope_name (str): The name of scope. | scope_name (str): The name of scope. | ||||
| graph_name (str): The relative graph_name of the watched node. Default: None. | |||||
| Returns: | Returns: | ||||
| list[Node], a list of node. | list[Node], a list of node. | ||||
| """ | """ | ||||
| return self._graph.search_leaf_nodes_by_pattern(scope_name) | |||||
| if graph_name: | |||||
| graph = self._get_graph(graph_name) | |||||
| else: | |||||
| graph = self._whole_graph | |||||
| return graph.search_leaf_nodes_by_pattern(scope_name) | |||||
| def get_graph_id_by_name(self, node_name): | |||||
| """ | |||||
| Get graph id by full name. | |||||
| Args: | |||||
| node_name (str): The name of the node. | |||||
| Returns: | |||||
| str, the graph name of the node. | |||||
| Raises: | |||||
| DebuggerNodeNotInGraphError: If can not find the node in all graphs. | |||||
| """ | |||||
| if node_name: | |||||
| for graph_name, sub_graph in self._graph.items(): | |||||
| if sub_graph.exist_node(name=node_name): | |||||
| return graph_name | |||||
| log.error('Failed to find node %s in graph. Please make sure the graph has been sent and ' | |||||
| 'the node name is correct, and try again.', node_name) | |||||
| raise DebuggerGraphNotExistError | |||||
| def get_graph_id_by_full_name(self, node_name): | |||||
| """ | |||||
| Get graph id by full name. | |||||
| def get_searched_node_list(self): | |||||
| """Get searched node list.""" | |||||
| return self._searched_node_list | |||||
| Args: | |||||
| node_name (str): The full name of the node. | |||||
| Returns: | |||||
| str, the graph name of the node. | |||||
| Raises: | |||||
| DebuggerNodeNotInGraphError: If can not find the node in all graphs. | |||||
| """ | |||||
| graph_id = self.graph_node_map.get(node_name) if node_name else None | |||||
| if not graph_id: | |||||
| log.error("Failed to get graph id by full name: %s", node_name) | |||||
| raise DebuggerNodeNotInGraphError(node_name) | |||||
| return graph_id | |||||
| def get_node_type(self, node_name): | |||||
| def get_node_type(self, node_name, graph_name=None): | |||||
| """ | """ | ||||
| Get the type of the specified node. | Get the type of the specified node. | ||||
| Args: | Args: | ||||
| node_name (str): The debug name of the node. | node_name (str): The debug name of the node. | ||||
| graph_name (str): The relative graph_name of the node. Default: None. | |||||
| Returns: | Returns: | ||||
| A string of the node type, name_scope or leaf. | A string of the node type, name_scope or leaf. | ||||
| """ | """ | ||||
| self._graph_exists() | |||||
| node_type = self._graph.get_node_type(node_name) | |||||
| if graph_name: | |||||
| graph = self._get_graph(node_name=node_name, graph_name=graph_name) | |||||
| else: | |||||
| graph = self._whole_graph | |||||
| node_type = graph.get_node_type(node_name) | |||||
| return node_type | return node_type | ||||
| def get_full_name(self, node_name): | |||||
| def get_full_name(self, node_name, graph_name=None): | |||||
| """Get full name according to ui node name.""" | """Get full name according to ui node name.""" | ||||
| full_name = self._graph.get_full_name_by_node_name(node_name) if node_name else '' | |||||
| full_name = '' | |||||
| if node_name: | |||||
| if graph_name: | |||||
| graph = self._get_graph(node_name=node_name, graph_name=graph_name) | |||||
| else: | |||||
| graph = self._whole_graph | |||||
| full_name = graph.get_full_name_by_node_name(node_name) | |||||
| return full_name | return full_name | ||||
| def get_node_name_by_full_name(self, full_name): | |||||
| """Get UI node name by full name.""" | |||||
| if self._graph: | |||||
| node_name = self._graph.get_node_name_by_full_name(full_name) | |||||
| else: | |||||
| node_name = '' | |||||
| log.info("No graph received yet.") | |||||
| return node_name | |||||
| def get_node_basic_info(self, node_name, graph_name): | |||||
| """Get node basic info with graph scope.""" | |||||
| graph_name, node_name = self._parse_node_name(node_name=node_name, graph_name=graph_name) | |||||
| graph = self._get_graph(graph_name, node_name) | |||||
| full_name = graph.get_full_name_by_node_name(node_name) | |||||
| node_type = graph.get_node_type(node_name) | |||||
| return self.construct_node_basic_info(full_name, graph_name, node_name, node_type) | |||||
| def list_nodes(self, scope): | |||||
| def get_tensor_graph(self, tensor_name, graph_name): | |||||
| """ | """ | ||||
| Get the nodes of every layer in graph. | |||||
| Get tensor graph according to node name. | |||||
| Args: | Args: | ||||
| scope (str): The name of a scope. | |||||
| tensor_name (str): Tensor name, format is "node_name:<node_value>". | |||||
| graph_name (str): The relative graph_name of the node. Default: None. | |||||
| Returns: | Returns: | ||||
| TypedDict('Nodes', {'nodes': list[Node]}), format is {'nodes': [<Node object>]}. | |||||
| example: | |||||
| { | |||||
| "nodes" : [ | |||||
| { | |||||
| "attr" : | |||||
| { | |||||
| "index" : "i: 0\n" | |||||
| }, | |||||
| "input" : {}, | |||||
| "name" : "input_tensor", | |||||
| "output" : | |||||
| { | |||||
| "Default/TensorAdd-op17" : | |||||
| { | |||||
| "edge_type" : "data", | |||||
| "scope" : "name_scope", | |||||
| "shape" : [1, 16, 128, 128] | |||||
| } | |||||
| }, | |||||
| "output_i" : -1, | |||||
| "proxy_input" : {}, | |||||
| "proxy_output" : {}, | |||||
| "independent_layout" : False, | |||||
| "subnode_count" : 0, | |||||
| "type" : "Data" | |||||
| } | |||||
| ] | |||||
| } | |||||
| dict, relative node. | |||||
| """ | |||||
| node_name, _ = tensor_name.rsplit(':', 1) | |||||
| graph = self._get_graph(graph_name=graph_name, node_name=node_name) | |||||
| tensor_graph = graph.get_tensor_graph(node_name) | |||||
| return {'graph': tensor_graph} | |||||
| @staticmethod | |||||
| def construct_node_basic_info(full_name, graph_name, node_name, node_type): | |||||
| """Construct node basic info.""" | |||||
| node_name_with_graph_scope = '/'.join([graph_name, node_name]) if node_name else graph_name | |||||
| return NodeBasicInfo(name=node_name_with_graph_scope, full_name=full_name, type=node_type) | |||||
| def get_node_basic_info_by_scope(self, scope_name, graph_name): | |||||
| """ | """ | ||||
| if scope and not self._graph.exist_node(scope): | |||||
| raise DebuggerNodeNotInGraphError(node_name=scope) | |||||
| Get node by a given scope name. | |||||
| nodes = self._graph.list_node_by_scope(scope=scope) | |||||
| return {'nodes': nodes} | |||||
| Args: | |||||
| scope_name (str): The name of scope. | |||||
| graph_name (str): The relative graph_name of the watched node. Default: None. | |||||
| Returns: | |||||
| list[NodeBasicInfo], a list of node. | |||||
| """ | |||||
| graph_name, node_name = self._parse_node_name(scope_name, graph_name) | |||||
| graph = self._get_graph(graph_name) | |||||
| nodes = graph.search_leaf_nodes_by_pattern(node_name) | |||||
| res = [self.construct_node_basic_info(full_name=node.full_name, | |||||
| graph_name=graph_name, | |||||
| node_name=node.name, | |||||
| node_type=node.type) for node in nodes] | |||||
| return res | |||||
| def get_node_name_by_full_name(self, full_name, graph_name): | |||||
| """Get UI node name by full name and graph name.""" | |||||
| if graph_name and full_name: | |||||
| graph = self._get_graph(graph_name) | |||||
| node_name = graph.get_node_name_by_full_name(full_name) | |||||
| else: | |||||
| node_name = '' | |||||
| log.debug("Get empty full name.") | |||||
| return node_name | |||||
| def get_node_by_bfs_order(self, node_name=None, ascend=True): | def get_node_by_bfs_order(self, node_name=None, ascend=True): | ||||
| """ | """ | ||||
| @@ -240,11 +522,9 @@ class GraphHandler(StreamHandlerBase): | |||||
| node_name (str): The name of current chosen leaf node. | node_name (str): The name of current chosen leaf node. | ||||
| ascend (bool): If True, traverse the input nodes; | ascend (bool): If True, traverse the input nodes; | ||||
| If False, traverse the output nodes. Default is True. | If False, traverse the output nodes. Default is True. | ||||
| Returns: | Returns: | ||||
| Union[None, dict], the next node object in dict type or None. | Union[None, dict], the next node object in dict type or None. | ||||
| """ | """ | ||||
| self._graph_exists() | |||||
| bfs_order = self.bfs_order | bfs_order = self.bfs_order | ||||
| length = len(bfs_order) | length = len(bfs_order) | ||||
| @@ -269,11 +549,11 @@ class GraphHandler(StreamHandlerBase): | |||||
| f'Please check the node name {err}.' | f'Please check the node name {err}.' | ||||
| raise DebuggerParamValueError(msg) | raise DebuggerParamValueError(msg) | ||||
| next_node = self.get_next_node_in_bfs(index, length, ascend) | |||||
| next_node = self._get_next_node_in_bfs(index, length, ascend) | |||||
| return next_node | return next_node | ||||
| def get_next_node_in_bfs(self, index, length, ascend): | |||||
| def _get_next_node_in_bfs(self, index, length, ascend): | |||||
| """ | """ | ||||
| Get the next node in bfs order. | Get the next node in bfs order. | ||||
| @@ -294,28 +574,116 @@ class GraphHandler(StreamHandlerBase): | |||||
| return next_node | return next_node | ||||
| def get_single_node(self, name): | |||||
| def _graph_exists(self): | |||||
| """ | """ | ||||
| Search node, and return every layer nodes until this node. | |||||
| Check if the graph has been loaded in the debugger cache. | |||||
| Raises: | |||||
| DebuggerGraphNotExistError: If the graph does not exist. | |||||
| """ | |||||
| if not self._graph: | |||||
| log.error('The graph does not exist. Please start the ' | |||||
| 'training script and try again.') | |||||
| raise DebuggerGraphNotExistError | |||||
| def _get_graph(self, graph_name=None, node_name=None): | |||||
| """ | |||||
| Get the graph object according to graph name and node name. | |||||
| Args: | Args: | ||||
| name (str): The name of node. | |||||
| graph_name (str): The graph name. | |||||
| node_name (str): The node name. | |||||
| Returns: | Returns: | ||||
| dict, every layer nodes until this node. | |||||
| DebuggerGraph, the graph object. | |||||
| Raises: | |||||
| DebuggerGraphNotExistError: If the graph does not exist. | |||||
| """ | """ | ||||
| nodes = self._graph.search_single_node(name) | |||||
| if not graph_name and not node_name and len(self._graph) == 1: | |||||
| # get the graph if there is only one graph | |||||
| return list(self._graph.values())[0] | |||||
| graph_name = graph_name if graph_name else self.get_graph_id_by_name(node_name) | |||||
| graph = self._graph.get(graph_name) if graph_name else None | |||||
| # get graph according to graph name and check the node | |||||
| if graph and (not node_name or graph.exist_node(name=node_name)): | |||||
| return graph | |||||
| log.error('The graph %s does not exist node %s.', graph_name, node_name) | |||||
| raise DebuggerGraphNotExistError | |||||
| def _has_graph_scope(self, graph_name): | |||||
| """Check if query with graph_scope.""" | |||||
| return bool(graph_name is None and len(self._graph) > 1) | |||||
| def validate_graph_name(self, graph_name): | |||||
| """Validate graph_name.""" | |||||
| if graph_name and self._graph.get(graph_name) is None: | |||||
| log.error("No graph named %s in debugger cache.", graph_name) | |||||
| raise DebuggerGraphNotExistError | |||||
| if not graph_name and len(self._graph) == 1: | |||||
| graph_name = self.graph_names[0] | |||||
| return graph_name | |||||
| return nodes | |||||
| def _graph_exists(self): | |||||
| def _add_graph_scope_for_nodes(self, nodes, graph_name): | |||||
| """ | """ | ||||
| Check if the graph has been loaded in the debugger cache. | |||||
| Add graph scope for nodes. | |||||
| Args: | |||||
| nodes (list[Node]): List of nodes object. | |||||
| graph_name (str): The graph name. | |||||
| """ | |||||
| def _get_updated_node_info(cur_node, node_type): | |||||
| """Add graph scope in key.""" | |||||
| old_node = cur_node.get(node_type) | |||||
| if not old_node: | |||||
| return | |||||
| new_values = {} | |||||
| for old_name, node_info in old_node.items(): | |||||
| new_name = '/'.join([graph_name, old_name]) if old_name else graph_name | |||||
| new_values[new_name] = node_info | |||||
| cur_node[node_type] = new_values | |||||
| for node in nodes: | |||||
| node['name'] = '/'.join([graph_name, node['name']]) if node['name'] else graph_name | |||||
| _get_updated_node_info(node, 'input') | |||||
| _get_updated_node_info(node, 'output') | |||||
| if node.get('nodes'): | |||||
| self._add_graph_scope_for_nodes(node.get('nodes'), graph_name) | |||||
| def _parse_node_name(self, node_name, graph_name): | |||||
| """ | |||||
| Check if the node name should have graph scope. | |||||
| Args: | |||||
| node_name (str): The ui node name. | |||||
| graph_name (str): The graph name. | |||||
| Returns: | |||||
| str, parsed graph name. | |||||
| str, parsed node name. | |||||
| """ | |||||
| node_name = '' if node_name is None else node_name | |||||
| if self._has_graph_scope(graph_name): | |||||
| names = node_name.split("/", 1) | |||||
| graph_name = names[0] | |||||
| node_name = names[1] if len(names) == 2 else '' | |||||
| if graph_name is None and len(self._graph) == 1: | |||||
| graph_name = self.graph_names[0] | |||||
| return graph_name, node_name | |||||
| def validate_node_name(self, node_name, graph_name): | |||||
| """ | |||||
| Validate the graph exist the specified node. | |||||
| Args: | |||||
| node_name (str): The ui node name. | |||||
| graph_name (str): The graph name. | |||||
| Raises: | Raises: | ||||
| DebuggerGraphNotExistError: If the graph does not exist. | |||||
| DebuggerNodeNotInGraphError: If can not find the node in all graphs. | |||||
| """ | """ | ||||
| if self._graph is None: | |||||
| log.error('The graph does not exist. Please start the ' | |||||
| 'training script and try again.') | |||||
| raise DebuggerGraphNotExistError | |||||
| graph = self._get_graph(graph_name=graph_name) | |||||
| if not graph.exist_node(name=node_name): | |||||
| log.error("graph %s doesn't find node: %s.", graph_name, node_name) | |||||
| raise DebuggerNodeNotInGraphError(node_name) | |||||
| @@ -13,7 +13,7 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """Define the metadata stream handler.""" | """Define the metadata stream handler.""" | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.common.utils import ServerStatus | from mindinsight.debugger.common.utils import ServerStatus | ||||
| from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | ||||
| @@ -29,6 +29,8 @@ class MetadataHandler(StreamHandlerBase): | |||||
| self._cur_node_name = "" | self._cur_node_name = "" | ||||
| self._cur_full_name = "" | self._cur_full_name = "" | ||||
| self._backend = "" | self._backend = "" | ||||
| self._enable_recheck = False | |||||
| self._cur_graph_name = "" | |||||
| @property | @property | ||||
| def device_name(self): | def device_name(self): | ||||
| @@ -50,6 +52,16 @@ class MetadataHandler(StreamHandlerBase): | |||||
| """The property of current node name.""" | """The property of current node name.""" | ||||
| self._cur_node_name = node_name | self._cur_node_name = node_name | ||||
| @property | |||||
| def graph_name(self): | |||||
| """The property of current node name.""" | |||||
| return self._cur_graph_name | |||||
| @graph_name.setter | |||||
| def graph_name(self, graph_name): | |||||
| """The property of current node name.""" | |||||
| self._cur_graph_name = graph_name if graph_name else '' | |||||
| @property | @property | ||||
| def full_name(self): | def full_name(self): | ||||
| """The property of current node name.""" | """The property of current node name.""" | ||||
| @@ -90,6 +102,21 @@ class MetadataHandler(StreamHandlerBase): | |||||
| """ | """ | ||||
| self._client_ip = str(value) | self._client_ip = str(value) | ||||
| @property | |||||
| def enable_recheck(self): | |||||
| """The property of enable_recheck.""" | |||||
| return self._enable_recheck and self._state == ServerStatus.WAITING and self._step > 0 | |||||
| @enable_recheck.setter | |||||
| def enable_recheck(self, value): | |||||
| """ | |||||
| Set the property of enable_recheck. | |||||
| Args: | |||||
| value (bool): The new ip. | |||||
| """ | |||||
| self._enable_recheck = bool(value) | |||||
| def put(self, value): | def put(self, value): | ||||
| """ | """ | ||||
| Put value into metadata cache. Called by grpc server. | Put value into metadata cache. Called by grpc server. | ||||
| @@ -108,7 +135,7 @@ class MetadataHandler(StreamHandlerBase): | |||||
| Get updated value. Called by main server. | Get updated value. Called by main server. | ||||
| Args: | Args: | ||||
| filter_condition (str): The filter property. | |||||
| filter_condition (Union[str, list[str]]): The filter property. | |||||
| Returns: | Returns: | ||||
| dict, the metadata. | dict, the metadata. | ||||
| @@ -122,10 +149,15 @@ class MetadataHandler(StreamHandlerBase): | |||||
| 'pos': '0', | 'pos': '0', | ||||
| 'ip': self.client_ip, | 'ip': self.client_ip, | ||||
| 'node_name': self.node_name, | 'node_name': self.node_name, | ||||
| 'backend': self.backend | |||||
| 'backend': self.backend, | |||||
| 'enable_recheck': self.enable_recheck, | |||||
| 'graph_name': self.graph_name | |||||
| } | } | ||||
| else: | else: | ||||
| metadata[filter_condition] = getattr(self, filter_condition) if \ | |||||
| hasattr(self, filter_condition) else '' | |||||
| if not isinstance(filter_condition, list): | |||||
| filter_condition = [filter_condition] | |||||
| for field in filter_condition: | |||||
| metadata[field] = getattr(self, field) if \ | |||||
| hasattr(self, field) else None | |||||
| return {'metadata': metadata} | return {'metadata': metadata} | ||||
| @@ -17,7 +17,7 @@ import numpy as np | |||||
| from mindinsight.datavisual.data_transform.graph.node import NodeTypeEnum | from mindinsight.datavisual.data_transform.graph.node import NodeTypeEnum | ||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.proto.ms_graph_pb2 import DataType | from mindinsight.debugger.proto.ms_graph_pb2 import DataType | ||||
| from mindinsight.debugger.stream_cache.tensor import OpTensor, ConstTensor | from mindinsight.debugger.stream_cache.tensor import OpTensor, ConstTensor | ||||
| from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | ||||
| @@ -32,6 +32,16 @@ class TensorHandler(StreamHandlerBase): | |||||
| self._tensors = {} | self._tensors = {} | ||||
| self._cur_step = 0 | self._cur_step = 0 | ||||
| @property | |||||
| def cur_step(self): | |||||
| """The property of current step.""" | |||||
| return self._cur_step | |||||
| @property | |||||
| def prev_step(self): | |||||
| """The property of previous step.""" | |||||
| return self._cur_step - 1 | |||||
| def put(self, value): | def put(self, value): | ||||
| """ | """ | ||||
| Put value into tensor cache. Called by grpc server. | Put value into tensor cache. Called by grpc server. | ||||
| @@ -98,7 +108,7 @@ class TensorHandler(StreamHandlerBase): | |||||
| self._tensors[tensor.name] = cache_tensor | self._tensors[tensor.name] = cache_tensor | ||||
| old_tensor = cache_tensor.get(step) | old_tensor = cache_tensor.get(step) | ||||
| if old_tensor and not self.is_value_diff(old_tensor.value, tensor.value): | |||||
| if old_tensor and not self._is_value_diff(old_tensor.value, tensor.value): | |||||
| log.debug("Tensor %s of step %s has no change. Ignore it.", tensor.name, step) | log.debug("Tensor %s of step %s has no change. Ignore it.", tensor.name, step) | ||||
| return False | return False | ||||
| cache_tensor[step] = tensor | cache_tensor[step] = tensor | ||||
| @@ -106,7 +116,7 @@ class TensorHandler(StreamHandlerBase): | |||||
| return True | return True | ||||
| @staticmethod | @staticmethod | ||||
| def is_value_diff(old_value, new_value): | |||||
| def _is_value_diff(old_value, new_value): | |||||
| """Check tensor value if there are equal.""" | """Check tensor value if there are equal.""" | ||||
| log.debug("old value type: %s, new_value type: %s", type(old_value), type(new_value)) | log.debug("old value type: %s, new_value type: %s", type(old_value), type(new_value)) | ||||
| if old_value is None and new_value is None: | if old_value is None and new_value is None: | ||||
| @@ -142,22 +152,28 @@ class TensorHandler(StreamHandlerBase): | |||||
| Args: | Args: | ||||
| filter_condition (dict): Filter condition. | filter_condition (dict): Filter condition. | ||||
| - name (str): The name of tensor. | |||||
| - name (str): The full name of tensor. | |||||
| - node_type (str): The type of the node. | - node_type (str): The type of the node. | ||||
| - prev (bool): Whether to get previous tensor. | |||||
| Returns: | Returns: | ||||
| dict, the tensor_value. | dict, the tensor_value. | ||||
| """ | """ | ||||
| name = filter_condition.get('name') | name = filter_condition.get('name') | ||||
| node_type = filter_condition.get('node_type') | node_type = filter_condition.get('node_type') | ||||
| shape = filter_condition.get('shape') | shape = filter_condition.get('shape') | ||||
| tensor = self._get_tensor(name, node_type) | |||||
| if filter_condition.get('prev'): | |||||
| step = self.prev_step | |||||
| else: | |||||
| step = self.cur_step | |||||
| tensor = self._get_tensor(name, node_type, step) | |||||
| if not tensor: | if not tensor: | ||||
| log.error("No tensor named %s", name) | |||||
| log.error("No tensor named %s at the step %s", name, step) | |||||
| raise DebuggerParamValueError("No tensor named {}".format(name)) | raise DebuggerParamValueError("No tensor named {}".format(name)) | ||||
| tensor_info = tensor.get_full_info(shape) | tensor_info = tensor.get_full_info(shape) | ||||
| self._update_has_prev_step_field(tensor_info, name, node_type) | |||||
| self._update_has_prev_step_field(tensor_info, name, node_type, step) | |||||
| return {'tensor_value': tensor_info} | return {'tensor_value': tensor_info} | ||||
| def _get_tensor(self, tensor_name, node_type=None, step=None): | def _get_tensor(self, tensor_name, node_type=None, step=None): | ||||
| @@ -167,7 +183,7 @@ class TensorHandler(StreamHandlerBase): | |||||
| Args: | Args: | ||||
| tensor_name (str): Tensor name, format like `node_name:slot`. | tensor_name (str): Tensor name, format like `node_name:slot`. | ||||
| node_type (str): Node type. | node_type (str): Node type. | ||||
| step (int): The step of tensor info. Default: None. Noe | |||||
| step (int): The step of tensor info. Default: None. | |||||
| Returns: | Returns: | ||||
| Union[OPTensor, ConstTensor], the tensor object. | Union[OPTensor, ConstTensor], the tensor object. | ||||
| @@ -178,7 +194,8 @@ class TensorHandler(StreamHandlerBase): | |||||
| if not tensor and node_type == NodeTypeEnum.CONST.value: | if not tensor and node_type == NodeTypeEnum.CONST.value: | ||||
| const_name = tensor_name.rsplit('/', 1)[-1] | const_name = tensor_name.rsplit('/', 1)[-1] | ||||
| tensor = self._const_vals.get(const_name) | tensor = self._const_vals.get(const_name) | ||||
| self._tensors[tensor_name] = {step: tensor} | |||||
| if tensor: | |||||
| self._tensors[tensor_name] = {step: tensor} | |||||
| return tensor | return tensor | ||||
| @@ -205,7 +222,7 @@ class TensorHandler(StreamHandlerBase): | |||||
| tensor_name = tensor_info.get('full_name') | tensor_name = tensor_info.get('full_name') | ||||
| node_type = tensor_info.get('node_type') | node_type = tensor_info.get('node_type') | ||||
| basic_info = self._get_basic_info(tensor_name, node_type) | basic_info = self._get_basic_info(tensor_name, node_type) | ||||
| flag = self._update_has_prev_step_field(basic_info, tensor_name, node_type) | |||||
| flag = self._update_has_prev_step_field(basic_info, tensor_name, node_type, self.cur_step) | |||||
| if flag is False: | if flag is False: | ||||
| missed_tensor = tensor_info.copy() | missed_tensor = tensor_info.copy() | ||||
| missed_tensor['iter'] = 'prev' | missed_tensor['iter'] = 'prev' | ||||
| @@ -223,22 +240,23 @@ class TensorHandler(StreamHandlerBase): | |||||
| return missed_tensors | return missed_tensors | ||||
| def _update_has_prev_step_field(self, tensor_info, tensor_name, node_type): | |||||
| def _update_has_prev_step_field(self, tensor_info, tensor_name, node_type, step): | |||||
| """Update has_prev_step field in tensor info.""" | """Update has_prev_step field in tensor info.""" | ||||
| flag = None | flag = None | ||||
| cur_tensor_value = bool(tensor_info and tensor_info.get('value') is not None) | cur_tensor_value = bool(tensor_info and tensor_info.get('value') is not None) | ||||
| if node_type == NodeTypeEnum.PARAMETER.value: | if node_type == NodeTypeEnum.PARAMETER.value: | ||||
| flag = self._get_prev_tensor_value_status(tensor_name) | |||||
| flag = self._get_prev_tensor_value_status(tensor_name, step) | |||||
| if flag and cur_tensor_value: | if flag and cur_tensor_value: | ||||
| tensor_info['has_prev_step'] = True | tensor_info['has_prev_step'] = True | ||||
| return flag | return flag | ||||
| def _get_prev_tensor_value_status(self, tensor_name): | |||||
| def _get_prev_tensor_value_status(self, tensor_name, step): | |||||
| """ | """ | ||||
| Get the status of tensor value of previous step. | Get the status of tensor value of previous step. | ||||
| Args: | Args: | ||||
| tensor_name (str): Tensor name. | tensor_name (str): Tensor name. | ||||
| step (int): The step of the tensor. | |||||
| Returns: | Returns: | ||||
| Union[None, bool], the status of previous tensor value. If True, there is valid previous | Union[None, bool], the status of previous tensor value. If True, there is valid previous | ||||
| @@ -247,7 +265,7 @@ class TensorHandler(StreamHandlerBase): | |||||
| """ | """ | ||||
| flag = None | flag = None | ||||
| # check if the tensor has previous step value. | # check if the tensor has previous step value. | ||||
| prev_step = self._cur_step - 1 | |||||
| prev_step = step - 1 | |||||
| if prev_step < 0: | if prev_step < 0: | ||||
| return flag | return flag | ||||
| tensor = self._get_tensor(tensor_name, step=prev_step) | tensor = self._get_tensor(tensor_name, step=prev_step) | ||||
| @@ -314,6 +332,8 @@ class TensorHandler(StreamHandlerBase): | |||||
| tensor_comparison = curr_tensor.tensor_comparison | tensor_comparison = curr_tensor.tensor_comparison | ||||
| if not tensor_comparison or tensor_comparison.tolerance != tolerance: | if not tensor_comparison or tensor_comparison.tolerance != tolerance: | ||||
| if isinstance(curr_tensor.value, np.ndarray) and isinstance(prev_tensor.value, np.ndarray): | if isinstance(curr_tensor.value, np.ndarray) and isinstance(prev_tensor.value, np.ndarray): | ||||
| if curr_tensor.value.shape != prev_tensor.value.shape: | |||||
| raise DebuggerParamValueError("The shape of these two step tensors is not the same.") | |||||
| tensor_diff = TensorUtils.calc_diff_between_two_tensor(curr_tensor.value, prev_tensor.value, tolerance) | tensor_diff = TensorUtils.calc_diff_between_two_tensor(curr_tensor.value, prev_tensor.value, tolerance) | ||||
| if not tensor_comparison: | if not tensor_comparison: | ||||
| stats = TensorUtils.get_statistics_from_tensor(tensor_diff) | stats = TensorUtils.get_statistics_from_tensor(tensor_diff) | ||||
| @@ -333,9 +353,34 @@ class TensorHandler(StreamHandlerBase): | |||||
| result = np.stack([prev_tensor_slice, curr_tensor_slice, tensor_diff_slice], axis=-1) | result = np.stack([prev_tensor_slice, curr_tensor_slice, tensor_diff_slice], axis=-1) | ||||
| tensor_info['diff'] = result.tolist() | tensor_info['diff'] = result.tolist() | ||||
| stats = TensorUtils.get_statistics_from_tensor(tensor_diff_slice) | stats = TensorUtils.get_statistics_from_tensor(tensor_diff_slice) | ||||
| curr_tensor_stats = TensorUtils.get_statistics_from_tensor(curr_tensor.value) | |||||
| curr_tensor_slice_stats = TensorUtils.get_statistics_from_tensor(curr_tensor_slice) | |||||
| prev_tensor_stats = TensorUtils.get_statistics_from_tensor(prev_tensor.value) | |||||
| prev_tensor_slice_stats = TensorUtils.get_statistics_from_tensor(prev_tensor_slice) | |||||
| tensor_info['curr_step_statistics'] = TensorUtils.get_statistics_dict(stats=curr_tensor_slice_stats, | |||||
| overall_stats=curr_tensor_stats) | |||||
| tensor_info['prev_step_statistics'] = TensorUtils.get_statistics_dict(stats=prev_tensor_slice_stats, | |||||
| overall_stats=prev_tensor_stats) | |||||
| tensor_info['statistics'] = TensorUtils.get_statistics_dict(stats=stats, | tensor_info['statistics'] = TensorUtils.get_statistics_dict(stats=stats, | ||||
| overall_stats=tensor_comparison.stats) | overall_stats=tensor_comparison.stats) | ||||
| elif isinstance(curr_tensor_slice, str): | elif isinstance(curr_tensor_slice, str): | ||||
| tensor_info['diff'] = curr_tensor_slice | tensor_info['diff'] = curr_tensor_slice | ||||
| reply = {'tensor_value': tensor_info} | reply = {'tensor_value': tensor_info} | ||||
| return reply | return reply | ||||
| def get_tensor_statistics(self, tensor_name, node_type): | |||||
| """ | |||||
| Get Tensor statistics. | |||||
| Args: | |||||
| tensor_name (str): Tensor name, format like `node_name:slot`. | |||||
| node_type (str): Node type. | |||||
| Returns: | |||||
| dict, overall statistics. | |||||
| """ | |||||
| res = {} | |||||
| tensor = self._get_tensor(tensor_name, node_type) | |||||
| if tensor: | |||||
| res = tensor.get_tensor_statistics() | |||||
| return res | |||||
| @@ -13,25 +13,37 @@ | |||||
| # limitations under the License. | # limitations under the License. | ||||
| # ============================================================================ | # ============================================================================ | ||||
| """Define the watchpoint stream handler.""" | """Define the watchpoint stream handler.""" | ||||
| import numpy as np | |||||
| from mindinsight.conditionmgr.condition import ValueTypeEnum | |||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | ||||
| DebuggerParamTypeError | DebuggerParamTypeError | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.common.utils import is_scope_type | |||||
| from mindinsight.debugger.proto.debug_grpc_pb2 import SetCMD | from mindinsight.debugger.proto.debug_grpc_pb2 import SetCMD | ||||
| from mindinsight.debugger.stream_cache.watchpoint import Watchpoint, WatchpointHit, \ | from mindinsight.debugger.stream_cache.watchpoint import Watchpoint, WatchpointHit, \ | ||||
| WATCHPOINT_CONDITION_MAPPING | |||||
| WatchNodeTree | |||||
| from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | from mindinsight.debugger.stream_handler.base_handler import StreamHandlerBase | ||||
| class WatchpointHandler(StreamHandlerBase): | class WatchpointHandler(StreamHandlerBase): | ||||
| """watchpoint Handler.""" | |||||
| """Watchpoint Handler.""" | |||||
| def __init__(self): | def __init__(self): | ||||
| self._watchpoints = {} | self._watchpoints = {} | ||||
| # list of ids of new created watchpoints | |||||
| self._created_watchpoints = [] | |||||
| # list of SetCMD of watchpoints to be deleted | |||||
| self._deleted_watchpoints = [] | self._deleted_watchpoints = [] | ||||
| # dict of <id, SetCMD> of watchpoint to be updated | |||||
| self._updated_watchpoints = {} | self._updated_watchpoints = {} | ||||
| # the collection of watched node full names, which have been sent to MindSpore | |||||
| self._all_watched_node_full_names = set() | |||||
| # the collection of new watched node full names, which have not been sent to MindSpore | |||||
| self._new_watched_node_full_names = set() | |||||
| # record the temp stored nodes in MS, which could be set as watch node for recheck on GPU | |||||
| # should be clean at the beginning of each step | |||||
| self._temp_cached_node_full_names = set() | |||||
| self._latest_id = 0 | self._latest_id = 0 | ||||
| self._cache_set_cmd = {} | |||||
| def put(self, value): | def put(self, value): | ||||
| """ | """ | ||||
| @@ -42,34 +54,50 @@ class WatchpointHandler(StreamHandlerBase): | |||||
| """ | """ | ||||
| new_id = value.watchpoint_id | new_id = value.watchpoint_id | ||||
| self._watchpoints[new_id] = value | self._watchpoints[new_id] = value | ||||
| self._created_watchpoints.append(new_id) | |||||
| self._updated_watchpoints[new_id] = value | self._updated_watchpoints[new_id] = value | ||||
| self._latest_id = new_id | self._latest_id = new_id | ||||
| log.debug("Put watchpoint %d into cache.", new_id) | log.debug("Put watchpoint %d into cache.", new_id) | ||||
| def sync_set_cmd(self): | |||||
| def clean_temp_cached_names(self): | |||||
| """Clean temp cached node.""" | |||||
| self._temp_cached_node_full_names.clear() | |||||
| def add_temp_cached_name(self, node_full_name): | |||||
| """Add temp stored node in cache.""" | |||||
| if node_full_name: | |||||
| self._temp_cached_node_full_names.add(node_full_name) | |||||
| def sync_set_cmd(self, set_cmds): | |||||
| """Clean temp watchpoints.""" | """Clean temp watchpoints.""" | ||||
| self._new_watched_node_full_names = set() | |||||
| self._created_watchpoints = [] | |||||
| self._deleted_watchpoints = [] | self._deleted_watchpoints = [] | ||||
| self._updated_watchpoints = {} | self._updated_watchpoints = {} | ||||
| for set_cmd in set_cmds: | |||||
| self._cache_set_cmd[set_cmd.id] = set_cmd | |||||
| def clean_cache_set_cmd(self, set_cmd): | |||||
| """Clean cache set command.""" | |||||
| self._cache_set_cmd.pop(set_cmd.id, None) | |||||
| def get_watchpoint_by_id(self, watchpoint_id): | def get_watchpoint_by_id(self, watchpoint_id): | ||||
| """Get watchpoint by watchpoint id.""" | """Get watchpoint by watchpoint id.""" | ||||
| watchpoint = self._watchpoints.get(watchpoint_id) | |||||
| if not watchpoint: | |||||
| log.error("Invalid watchpoint id %d", watchpoint_id) | |||||
| raise DebuggerParamValueError("Invalid watchpoint id {}".format(watchpoint_id)) | |||||
| res = self.get(watchpoint_id) | |||||
| watchpoint = res.get('watch_points')[0] | |||||
| return watchpoint | return watchpoint | ||||
| def get(self, filter_condition=False): | |||||
| def get(self, filter_condition=None): | |||||
| """ | """ | ||||
| Get the watchpoints. | Get the watchpoints. | ||||
| Args: | Args: | ||||
| filter_condition (bool): If True, get all watchpoints without nodes. If False, | |||||
| get updated watchpoints in SetCMD proto format. Default: False. | |||||
| filter_condition (Union[None, int]): The filter conditions. Get watchpoint by | |||||
| id. If None, return all watchpoint. Default: None. | |||||
| Returns: | Returns: | ||||
| dict, the watchpoints. | |||||
| dict, the watchpoint list. | |||||
| """ | """ | ||||
| reply = [] | reply = [] | ||||
| if not filter_condition: | if not filter_condition: | ||||
| @@ -78,17 +106,85 @@ class WatchpointHandler(StreamHandlerBase): | |||||
| watchpoint_info = watchpoint.get_watch_condition_info() | watchpoint_info = watchpoint.get_watch_condition_info() | ||||
| reply.append(watchpoint_info) | reply.append(watchpoint_info) | ||||
| else: | else: | ||||
| # get updated watchpoint list | |||||
| for _, watchpoint in self._updated_watchpoints.items(): | |||||
| set_cmd = watchpoint.get_set_cmd() | |||||
| reply.append(set_cmd) | |||||
| reply.extend(self._deleted_watchpoints) | |||||
| self.validate_watchpoint_id(filter_condition) | |||||
| reply = [self._watchpoints.get(filter_condition)] | |||||
| log.debug("get the watch points with filter_condition:%s", filter_condition) | log.debug("get the watch points with filter_condition:%s", filter_condition) | ||||
| return {'watch_points': reply} | return {'watch_points': reply} | ||||
| def set_watch_nodes(self, graph, graph_stream, watch_point_id): | |||||
| def get_pending_commands(self, graph_stream): | |||||
| """ | |||||
| Get all watchpoint in SetCMD proto format. | |||||
| Args: | |||||
| graph_stream (GraphHandler): Graph handler. | |||||
| Returns: | |||||
| list[SetCMD], updated watchpoint to be sent to MindSpore. | |||||
| """ | |||||
| res = [] | |||||
| new_watched_nodes = set() | |||||
| self._all_watched_node_full_names.clear() | |||||
| for _, watchpoint in self._updated_watchpoints.items(): | |||||
| # construct set command with leaf nodes | |||||
| watch_nodes = watchpoint.get_watch_nodes() | |||||
| leaf_watch_nodes = self._expand_to_leaf_nodes(graph_stream, watch_nodes) | |||||
| res.append(watchpoint.get_pending_cmd(leaf_watch_nodes)) | |||||
| # update all watched node names | |||||
| watch_node_names = [watch_node.full_name for watch_node in [*watch_nodes, *leaf_watch_nodes]] | |||||
| new_watched_nodes.update(watch_node_names) | |||||
| res.extend(self._deleted_watchpoints) | |||||
| for _, set_cmd in self._cache_set_cmd.items(): | |||||
| res.append(set_cmd) | |||||
| self._all_watched_node_full_names = new_watched_nodes | |||||
| return res | |||||
| @staticmethod | |||||
| def _expand_to_leaf_nodes(graph_stream, watch_nodes): | |||||
| """ | |||||
| Get all leaf node basic info according to watch nodes. | |||||
| Args: | |||||
| graph_stream (GraphHandler): Graph handler. | |||||
| watch_nodes (list[NodeBasicInfo]): The list of watch node basic infos. | |||||
| Returns: | |||||
| list[NodeBasicInfo], expanded leaf basic node infos. | |||||
| """ | |||||
| leaf_watch_nodes = [] | |||||
| for node in watch_nodes: | |||||
| if is_scope_type(node.type): | |||||
| pure_node_name = None | |||||
| if len(node.name.split('/')) > 1: | |||||
| graph_name, pure_node_name = node.name.split('/', 1) | |||||
| else: | |||||
| graph_name = node.name | |||||
| search_node_infos = graph_stream.get_node_basic_info_by_scope(pure_node_name, graph_name=graph_name) | |||||
| leaf_watch_nodes.extend(search_node_infos) | |||||
| else: | |||||
| leaf_watch_nodes.append(node) | |||||
| return leaf_watch_nodes | |||||
| def is_recheckable(self, backend=None): | |||||
| """ | |||||
| Check if current status is able to recheck. | |||||
| Args: | |||||
| backend (str): The backend info. 'Ascend' or 'GPU'. Default: None. | |||||
| Returns: | |||||
| bool, if enable to recheck. | |||||
| """ | |||||
| enable_recheck = bool(self._updated_watchpoints or self._deleted_watchpoints) | |||||
| if backend == 'GPU' and enable_recheck: | |||||
| # on GPU, disable to recheck if there are new watched node of which the tensor | |||||
| # has not been stored on MindSpore | |||||
| diff_set = self._new_watched_node_full_names - self._all_watched_node_full_names | |||||
| enable_recheck = not diff_set or diff_set.issubset(self._temp_cached_node_full_names) | |||||
| return enable_recheck | |||||
| def set_watch_nodes(self, graph, graph_stream, watch_point_id, graph_name=None): | |||||
| """ | """ | ||||
| set watch nodes for graph. | set watch nodes for graph. | ||||
| @@ -96,54 +192,88 @@ class WatchpointHandler(StreamHandlerBase): | |||||
| graph (dict): The graph with list of nodes. | graph (dict): The graph with list of nodes. | ||||
| graph_stream (GraphHandler): The graph handler. | graph_stream (GraphHandler): The graph handler. | ||||
| watch_point_id (int): The id of watchpoint. | watch_point_id (int): The id of watchpoint. | ||||
| graph_name (str): The graph name. | |||||
| """ | """ | ||||
| if not (watch_point_id and graph): | if not (watch_point_id and graph): | ||||
| return | return | ||||
| log.debug("add watch flags") | log.debug("add watch flags") | ||||
| watchpoint = self._watchpoints.get(watch_point_id) | watchpoint = self._watchpoints.get(watch_point_id) | ||||
| self._set_watch_status_recursively(graph, graph_stream, watchpoint) | |||||
| self._set_watch_status_recursively(graph, graph_stream, watchpoint, graph_name) | |||||
| def _set_watch_status_recursively(self, graph, graph_stream, watchpoint): | |||||
| def _set_watch_status_recursively(self, graph, graph_stream, watchpoint, graph_name=None): | |||||
| """Set watch status to graph.""" | """Set watch status to graph.""" | ||||
| if not isinstance(graph, dict): | |||||
| log.warning("The graph is not dict.") | |||||
| return | |||||
| if graph.get('children'): | if graph.get('children'): | ||||
| self._set_watch_status_recursively(graph.get('children'), graph_stream, watchpoint) | |||||
| self._set_watch_status_recursively( | |||||
| graph.get('children'), graph_stream, watchpoint, graph_name) | |||||
| for node in graph.get('nodes', []): | |||||
| if not isinstance(node, dict): | |||||
| log.warning("The node is not dict.") | |||||
| return | |||||
| if graph.get('nodes'): | |||||
| _ = self._set_watch_state_for_nodes(graph['nodes'], graph_stream, watchpoint, graph_name) | |||||
| def _set_watch_state_for_nodes(self, nodes, graph_stream, watchpoint, graph_name): | |||||
| """ | |||||
| Set watch state for nodes. | |||||
| Args: | |||||
| nodes (list[Node]): List of node info. | |||||
| Returns: | |||||
| int, the number of all watched nodes. | |||||
| """ | |||||
| all_watched_num = 0 | |||||
| for node in nodes: | |||||
| node_name = node.get('name') | node_name = node.get('name') | ||||
| if not node_name: | |||||
| continue | |||||
| full_name = graph_stream.get_full_name(node_name) | |||||
| flag = watchpoint.get_node_status(node_name, node.get('type'), full_name) | |||||
| node['watched'] = flag | |||||
| # search result could have `nodes` in nodes object | |||||
| if node.get('nodes'): | if node.get('nodes'): | ||||
| self._set_watch_status_recursively(node, graph_stream, watchpoint) | |||||
| flag = self._set_watch_state_for_nodes(node.get('nodes'), graph_stream, watchpoint, graph_name) | |||||
| else: | |||||
| full_name = graph_stream.get_full_name(node_name, graph_name) | |||||
| new_node_name = node_name if graph_name is None else '/'.join([graph_name, node_name]) | |||||
| flag = watchpoint.get_node_status(new_node_name, node.get('type'), full_name) | |||||
| node['watched'] = flag | |||||
| if flag == WatchNodeTree.TOTAL_WATCH: | |||||
| all_watched_num += 1 | |||||
| # calculate the state of current node. | |||||
| if not all_watched_num: | |||||
| state = WatchNodeTree.NOT_WATCH | |||||
| elif all_watched_num == len(nodes): | |||||
| state = WatchNodeTree.TOTAL_WATCH | |||||
| else: | |||||
| state = WatchNodeTree.PARTIAL_WATCH | |||||
| return state | |||||
| def create_watchpoint(self, watch_condition, watch_nodes=None, watch_point_id=None): | |||||
| def create_watchpoint(self, condition_mgr, watch_condition, watch_nodes=None, watch_point_id=None): | |||||
| """ | """ | ||||
| Create watchpoint. | Create watchpoint. | ||||
| Args: | Args: | ||||
| condition_mgr (ConditionMgr): Instance of ConditionMgr. | |||||
| watch_condition (dict): The watch condition. | watch_condition (dict): The watch condition. | ||||
| - condition (str): Accept `INF` or `NAN`. | |||||
| - param (list[float]): Not defined yet. | |||||
| "condition": { | |||||
| id: "tensor_too_large", | |||||
| "params": [ | |||||
| { | |||||
| "name": "abs_mean_gt", | |||||
| "disable": false, | |||||
| "value": 1.1 | |||||
| } | |||||
| ] | |||||
| } | |||||
| - id (str): Id of condition. | |||||
| - param (list[dict]): The list of param for this condition. | |||||
| watch_nodes (list[NodeBasicInfo]): The list of node basic info. | watch_nodes (list[NodeBasicInfo]): The list of node basic info. | ||||
| watch_point_id (int): The id of watchpoint. | watch_point_id (int): The id of watchpoint. | ||||
| Returns: | Returns: | ||||
| int, the new id of watchpoint. | int, the new id of watchpoint. | ||||
| """ | """ | ||||
| validate_watch_condition(watch_condition) | |||||
| validate_watch_condition(condition_mgr, watch_condition) | |||||
| watch_condition = set_default_param(condition_mgr, watch_condition) | |||||
| new_id = self._latest_id + 1 | new_id = self._latest_id + 1 | ||||
| watchpoint = Watchpoint(new_id, watch_condition) | watchpoint = Watchpoint(new_id, watch_condition) | ||||
| if watch_nodes: | if watch_nodes: | ||||
| watchpoint.add_nodes(watch_nodes) | watchpoint.add_nodes(watch_nodes) | ||||
| self._add_watch_node_in_cache(watch_nodes) | |||||
| elif watch_point_id: | elif watch_point_id: | ||||
| self.validate_watchpoint_id(watch_point_id) | self.validate_watchpoint_id(watch_point_id) | ||||
| watchpoint.copy_nodes_from(self._watchpoints.get(watch_point_id)) | watchpoint.copy_nodes_from(self._watchpoints.get(watch_point_id)) | ||||
| @@ -157,34 +287,51 @@ class WatchpointHandler(StreamHandlerBase): | |||||
| Args: | Args: | ||||
| watch_point_id (int): The id of watchpoint. | watch_point_id (int): The id of watchpoint. | ||||
| watch_nodes (list[str]): The list of node names. | |||||
| watch_nodes (list[NodeBasicInfo]): The list of node basic info. | |||||
| watched (bool): The update operator on nodes. If False, remove nodes from watch nodes. | watched (bool): The update operator on nodes. If False, remove nodes from watch nodes. | ||||
| If True, add nodes to watch nodes. Default: False. | If True, add nodes to watch nodes. Default: False. | ||||
| Returns: | |||||
| dict, empty response. | |||||
| """ | """ | ||||
| self.validate_watchpoint_id(watch_point_id) | self.validate_watchpoint_id(watch_point_id) | ||||
| watchpoint = self._watchpoints.get(watch_point_id) | watchpoint = self._watchpoints.get(watch_point_id) | ||||
| if watched: | if watched: | ||||
| watchpoint.add_nodes(watch_nodes) | watchpoint.add_nodes(watch_nodes) | ||||
| self._add_watch_node_in_cache(watch_nodes) | |||||
| else: | else: | ||||
| watchpoint.remove_nodes(watch_nodes) | watchpoint.remove_nodes(watch_nodes) | ||||
| self._remove_watch_node_from_cache(watch_nodes) | |||||
| self._updated_watchpoints[watch_point_id] = watchpoint | self._updated_watchpoints[watch_point_id] = watchpoint | ||||
| log.debug("Update watchpoint %d in cache.", watch_point_id) | log.debug("Update watchpoint %d in cache.", watch_point_id) | ||||
| def delete_watchpoint(self, watch_point_id): | |||||
| def delete_watchpoint(self, watch_point_id=None): | |||||
| """ | """ | ||||
| Delete watchpoint. | Delete watchpoint. | ||||
| Args: | Args: | ||||
| watch_point_id (int): The id of watchpoint. | |||||
| watch_point_id (Union[None, int]): The id of watchpoint. | |||||
| If None, delete all watchpoints. Default: None. | |||||
| """ | |||||
| if watch_point_id is None: | |||||
| watch_point_ids = [sub_id for sub_id, _ in self._watchpoints.items()] | |||||
| else: | |||||
| self.validate_watchpoint_id(watch_point_id) | |||||
| watch_point_ids = [watch_point_id] | |||||
| for single_id in watch_point_ids: | |||||
| self._delete_single_watchpoint(single_id) | |||||
| Returns: | |||||
| dict, empty response. | |||||
| def _delete_single_watchpoint(self, watch_point_id): | |||||
| """ | |||||
| Delete single watchpoint. | |||||
| Args: | |||||
| watch_point_id (int): The id of watchpoint. | |||||
| """ | """ | ||||
| self.validate_watchpoint_id(watch_point_id) | |||||
| self._watchpoints.pop(watch_point_id) | self._watchpoints.pop(watch_point_id) | ||||
| # if the watchpoint has not been created by MindSpore, clean the relative cache directly | |||||
| if watch_point_id in self._created_watchpoints: | |||||
| self._created_watchpoints.remove(watch_point_id) | |||||
| self._updated_watchpoints.pop(watch_point_id) | |||||
| log.debug("Cancel create watchpoint %d in cache.", watch_point_id) | |||||
| return | |||||
| set_cmd = SetCMD() | set_cmd = SetCMD() | ||||
| set_cmd.id = watch_point_id | set_cmd.id = watch_point_id | ||||
| set_cmd.delete = True | set_cmd.delete = True | ||||
| @@ -200,11 +347,33 @@ class WatchpointHandler(StreamHandlerBase): | |||||
| log.error("Invalid watchpoint id: %d.", watch_point_id) | log.error("Invalid watchpoint id: %d.", watch_point_id) | ||||
| raise DebuggerParamValueError("Invalid watchpoint id: {}".format(watch_point_id)) | raise DebuggerParamValueError("Invalid watchpoint id: {}".format(watch_point_id)) | ||||
| def _add_watch_node_in_cache(self, watch_nodes): | |||||
| """ | |||||
| Add watch nodes in cache. | |||||
| Args: | |||||
| watch_nodes (list[NodeBasicInfo]): The list of node basic info. | |||||
| """ | |||||
| node_full_names = [node.full_name for node in watch_nodes] | |||||
| self._new_watched_node_full_names.update(node_full_names) | |||||
| def _remove_watch_node_from_cache(self, watch_nodes): | |||||
| """ | |||||
| Remove watch nodes from cache. | |||||
| Args: | |||||
| watch_nodes (list[NodeBasicInfo]): The list of node basic info. | |||||
| """ | |||||
| for node in watch_nodes: | |||||
| if node.full_name in self._new_watched_node_full_names: | |||||
| self._new_watched_node_full_names.remove(node.full_name) | |||||
| class WatchpointHitHandler(StreamHandlerBase): | class WatchpointHitHandler(StreamHandlerBase): | ||||
| """Watchpoint hit handler.""" | """Watchpoint hit handler.""" | ||||
| def __init__(self): | def __init__(self): | ||||
| # dict of <ui node_name, dict of <slot, WatchpointHit>>, | |||||
| self._hits = {} | self._hits = {} | ||||
| @property | @property | ||||
| @@ -224,20 +393,41 @@ class WatchpointHitHandler(StreamHandlerBase): | |||||
| - watchpoint (Watchpoint): The Watchpoint that a node hit. | - watchpoint (Watchpoint): The Watchpoint that a node hit. | ||||
| - node_name (str): The UI node name. | - node_name (str): The UI node name. | ||||
| - graph_name (str): The graph name. | |||||
| """ | """ | ||||
| watchpoint_hit = WatchpointHit( | watchpoint_hit = WatchpointHit( | ||||
| tensor_proto=value.get('tensor_proto'), | tensor_proto=value.get('tensor_proto'), | ||||
| watchpoint=value.get('watchpoint'), | watchpoint=value.get('watchpoint'), | ||||
| node_name=value.get('node_name') | |||||
| node_name=value.get('node_name'), | |||||
| graph_name=value.get('graph_name') | |||||
| ) | ) | ||||
| # get all hit watchpoints according to node name ans tensor slot | |||||
| watchpoint_hits = self._get_watchpoints_by_tensor_name(watchpoint_hit.node_name, | |||||
| watchpoint_hit.slot) | |||||
| if watchpoint_hit not in watchpoint_hits: | |||||
| watchpoint_hits.append(watchpoint_hit) | |||||
| def _get_watchpoints_by_tensor_name(self, node_name, slot): | |||||
| """ | |||||
| Get hit tensors according to ui node name and slot. | |||||
| node_name = value.get('node_name') | |||||
| hit_tensors = self._hits.get(node_name) | |||||
| Args: | |||||
| node_name (str): The node name. | |||||
| slot (str): The tensor slot. | |||||
| Returns: | |||||
| list, list of watchpoints. | |||||
| """ | |||||
| hit_node = self._hits.get(node_name) | |||||
| if hit_node is None: | |||||
| hit_node = {} | |||||
| self._hits[node_name] = hit_node | |||||
| hit_tensors = hit_node.get(slot) | |||||
| if hit_tensors is None: | if hit_tensors is None: | ||||
| hit_tensors = [] | hit_tensors = [] | ||||
| self._hits[node_name] = hit_tensors | |||||
| if watchpoint_hit not in hit_tensors: | |||||
| hit_tensors.append(watchpoint_hit) | |||||
| hit_node[slot] = hit_tensors | |||||
| return hit_tensors | |||||
| def get(self, filter_condition=None): | def get(self, filter_condition=None): | ||||
| """ | """ | ||||
| @@ -263,34 +453,55 @@ class WatchpointHitHandler(StreamHandlerBase): | |||||
| """Return the list of watchpoint hits.""" | """Return the list of watchpoint hits.""" | ||||
| watch_point_hits = [] | watch_point_hits = [] | ||||
| for node_name, watchpoint_hits in self._hits.items(): | for node_name, watchpoint_hits in self._hits.items(): | ||||
| watch_points = [watchpoint_hit.watchpoint for watchpoint_hit in watchpoint_hits] | |||||
| tensors = [] | |||||
| graph_name = None | |||||
| for slot, tensor_hits in watchpoint_hits.items(): | |||||
| if graph_name is None: | |||||
| graph_name = tensor_hits[0].graph_name | |||||
| tensor_info = self._get_tensor_hit_info(slot, tensor_hits) | |||||
| tensors.append(tensor_info) | |||||
| watch_point_hits.append({ | watch_point_hits.append({ | ||||
| 'node_name': node_name, | 'node_name': node_name, | ||||
| 'watch_points': watch_points | |||||
| 'tensors': tensors, | |||||
| 'graph_name': graph_name | |||||
| }) | }) | ||||
| return {'watch_point_hits': watch_point_hits} | return {'watch_point_hits': watch_point_hits} | ||||
| @staticmethod | |||||
| def _get_tensor_hit_info(slot, tensor_hits): | |||||
| """ | |||||
| Get watchpoint hit info of specified tensor. | |||||
| Args: | |||||
| slot (str): Slot id. | |||||
| tensor_hits (list): A list of watchpoint hit objects that the tensor hit. | |||||
| Returns: | |||||
| dict, tensor hit info. | |||||
| """ | |||||
| res = {} | |||||
| watch_points = [tensor_hit.watchpoint for tensor_hit in tensor_hits] | |||||
| if watch_points: | |||||
| res = { | |||||
| 'slot': slot, | |||||
| 'watch_points': watch_points | |||||
| } | |||||
| return res | |||||
| def _is_tensor_hit(self, tensor_name): | def _is_tensor_hit(self, tensor_name): | ||||
| """ | """ | ||||
| Check if the tensor is record in hit cache. | Check if the tensor is record in hit cache. | ||||
| Args: | Args: | ||||
| tensor_name (str): The name of full tensor name. | |||||
| tensor_name (str): The name of ui tensor name. | |||||
| Returns: | Returns: | ||||
| bool, if the tensor is hit. | bool, if the tensor is hit. | ||||
| """ | """ | ||||
| node_name = tensor_name.split(':')[0] | |||||
| watchpoint_hits = self.get(node_name) | |||||
| if watchpoint_hits is None: | |||||
| return False | |||||
| for watchpoint_hit in watchpoint_hits: | |||||
| if tensor_name == watchpoint_hit.tensor_name: | |||||
| return True | |||||
| return False | |||||
| node_name, slot = tensor_name.rsplit(':', 1) | |||||
| watchpoint_hits = self._hits.get(node_name, {}).get(slot) | |||||
| return bool(watchpoint_hits) | |||||
| def update_tensor_history(self, tensor_history): | def update_tensor_history(self, tensor_history): | ||||
| """ | """ | ||||
| @@ -308,45 +519,109 @@ class WatchpointHitHandler(StreamHandlerBase): | |||||
| hit_flag = self._is_tensor_hit(tensor_name) | hit_flag = self._is_tensor_hit(tensor_name) | ||||
| tensor_info['is_hit'] = hit_flag | tensor_info['is_hit'] = hit_flag | ||||
| def get_tensor_hit_infos(self, tensor_name): | |||||
| """ | |||||
| Get all hit information of a tensor. | |||||
| Args: | |||||
| tensor_name (str): Tensor name showed on UI. | |||||
| def validate_watch_condition(watch_condition): | |||||
| Returns: | |||||
| dict, tensor hit info. | |||||
| """ | |||||
| tensor_hit_info = {} | |||||
| if self._is_tensor_hit(tensor_name): | |||||
| node_name, slot = tensor_name.rsplit(':', 1) | |||||
| tensor_hits = self._get_watchpoints_by_tensor_name(node_name, slot) | |||||
| tensor_hit_info = self._get_tensor_hit_info(slot, tensor_hits) | |||||
| return tensor_hit_info | |||||
| def validate_watch_condition(condition_mgr, watch_condition): | |||||
| """Validate watch condition.""" | """Validate watch condition.""" | ||||
| if not isinstance(watch_condition, dict): | if not isinstance(watch_condition, dict): | ||||
| log.error("<watch_condition> should be dict. %s received.", watch_condition) | log.error("<watch_condition> should be dict. %s received.", watch_condition) | ||||
| raise DebuggerParamTypeError("<watch_condition> should be dict.") | raise DebuggerParamTypeError("<watch_condition> should be dict.") | ||||
| # validate condition | |||||
| condition = watch_condition.get('condition') | |||||
| if condition not in WATCHPOINT_CONDITION_MAPPING.keys(): | |||||
| log.error("Invalid watch condition. Acceptable values are <%s>.", | |||||
| str(WATCHPOINT_CONDITION_MAPPING.keys())) | |||||
| # validate condition_id | |||||
| condition_id = watch_condition.get('id') | |||||
| if condition_id not in condition_mgr.conditions.keys(): | |||||
| log.error("Invalid watch condition. Acceptable values are <%s>. %s received.", | |||||
| str(condition_mgr.conditions.keys()), condition_id) | |||||
| raise DebuggerParamValueError("Invalid watch condition value.") | raise DebuggerParamValueError("Invalid watch condition value.") | ||||
| # validate param | # validate param | ||||
| validate_watch_condition_params(watch_condition) | |||||
| validate_watch_condition_params(condition_mgr, watch_condition) | |||||
| def validate_watch_condition_params(watch_condition): | |||||
| def validate_watch_condition_params(condition_mgr, watch_condition): | |||||
| """ | """ | ||||
| Validate watch condition parameters. | Validate watch condition parameters. | ||||
| Args: | Args: | ||||
| condition_mgr (ConditionMgr): Instance of ConditionMgr. | |||||
| watch_condition (dict): Watch condition. | watch_condition (dict): Watch condition. | ||||
| - condition (str): Condition type. Should be in WATCHPOINT_CONDITION_MAPPING. | |||||
| - id (str): Condition id. Should be in WATCHPOINT_CONDITION_MAPPING. | |||||
| - param (list): Condition value. Should be given for comparison condition. The value will | |||||
| be translated to np.float32. | |||||
| - param (list): Condition value. Should be given for comparison condition. The value | |||||
| will be translated to np.float32. | |||||
| """ | """ | ||||
| condition = watch_condition.get('condition') | |||||
| param = watch_condition.get('param') | |||||
| if condition in ['NAN', 'INF', 'OVERFLOW']: | |||||
| if param: | |||||
| log.error("No param is expected for %s condition.", condition) | |||||
| condition_id = watch_condition.get('id') | |||||
| params = watch_condition.get('params') | |||||
| condition = condition_mgr.get_condition(condition_id) | |||||
| if condition_id in condition_mgr.get_no_param_condition(): | |||||
| if params: | |||||
| log.error("No param is expected for %s condition", condition_id) | |||||
| raise DebuggerParamValueError("No param is expected.") | raise DebuggerParamValueError("No param is expected.") | ||||
| else: | |||||
| if not isinstance(param, (float, int)): | |||||
| log.error("Number param should be given for condition <%s>.", | |||||
| condition) | |||||
| return | |||||
| for param in params: | |||||
| if param.get("name") not in condition.names: | |||||
| log.error("Invalid name of parameter for condition: %s, available values: %s", | |||||
| condition_id, condition.names) | |||||
| raise DebuggerParamValueError("Invalid name of parameter.") | |||||
| condition_param = condition.get_parameter_definition(param.get("name")) | |||||
| if condition_param.type.name in (ValueTypeEnum.FLOAT64.name, ValueTypeEnum.INT64.name) \ | |||||
| and not isinstance(param.get("value"), (float, int)): | |||||
| log.error("Number param should be given for condition: %s", condition_id) | |||||
| raise DebuggerParamValueError("Number param should be given.") | raise DebuggerParamValueError("Number param should be given.") | ||||
| if np.isinf(np.float32(param)): | |||||
| log.error("Condition param should be float32.") | |||||
| raise DebuggerParamValueError("The value of condition param should be within float32.") | |||||
| if condition_param.type.name == ValueTypeEnum.BOOL.name \ | |||||
| and not isinstance(param.get("value"), bool): | |||||
| log.error("Bool param should be given for condition: %s", condition_id) | |||||
| raise DebuggerParamValueError("Bool param should be given.") | |||||
| def set_default_param(condition_mgr, watch_condition): | |||||
| """ | |||||
| Set default param. | |||||
| Args: | |||||
| condition_mgr (ConditionMgr): Instance of ConditionMgr. | |||||
| watch_condition (dict): The watch condition. | |||||
| "condition": { | |||||
| id: "tensor_too_large", | |||||
| "params": [ | |||||
| { | |||||
| "name": "abs_mean_gt", | |||||
| "disable": false, | |||||
| "value": 1.1 | |||||
| } | |||||
| ] | |||||
| } | |||||
| - id (str): Id of condition. | |||||
| - param (list[dict]): The list of param for this condition. | |||||
| Returns: | |||||
| dict, the new watch_condition. | |||||
| """ | |||||
| condition_id = watch_condition.get('id') | |||||
| condition = condition_mgr.get_condition(condition_id) | |||||
| for param in condition.parameters: | |||||
| if not param.visible_on_ui and not param.support_disable: | |||||
| watch_condition["params"].append({ | |||||
| "name": param.name, | |||||
| "disable": False, | |||||
| "value": param.default_value | |||||
| }) | |||||
| watch_condition["abbr"] = condition.abbr | |||||
| return watch_condition | |||||
| @@ -0,0 +1,15 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """This package contains operators using multiple streams to deal with specific task.""" | |||||
| @@ -0,0 +1,120 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """This module is aimed to provide with tensor detail info.""" | |||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.common.utils import Streams | |||||
| class TensorDetailInfo: | |||||
| """Manage tensor detail information.""" | |||||
| def __init__(self, cache): | |||||
| self._tensor_stream = cache.get_stream_handler(Streams.TENSOR) | |||||
| self._graph_stream = cache.get_stream_handler(Streams.GRAPH) | |||||
| self._hit_stream = cache.get_stream_handler(Streams.WATCHPOINT_HIT) | |||||
| def validate_tensor_name(self, tensor_name, graph_name): | |||||
| """ | |||||
| Get the graph id of the tensor. | |||||
| Args: | |||||
| tensor_name (str): The tensor name on UI. | |||||
| graph_name (str): The graph name. | |||||
| """ | |||||
| # validate tensor name format | |||||
| if not isinstance(tensor_name, str) or ':' not in tensor_name: | |||||
| log.error("Invalid tensor name. Received: %s", tensor_name) | |||||
| raise DebuggerParamValueError("Invalid tensor name.") | |||||
| node_name, _ = tensor_name.rsplit(':', 1) | |||||
| # check if the node name is in graph | |||||
| self._graph_stream.validate_node_name(node_name=node_name, graph_name=graph_name) | |||||
| def get_tensor_graph(self, tensor_name, graph_name): | |||||
| """ | |||||
| Get the graph related to specific tensor. | |||||
| Args: | |||||
| tensor_name (str): The name of tensor. Format like {node_name}:{slot}. | |||||
| graph_name (str): The graph name. | |||||
| Returns: | |||||
| dict, tensor graph, format is {'nodes': [Node object]}. | |||||
| The Node object = { | |||||
| 'graph_name': <graph_name>, | |||||
| 'name': <node name>, | |||||
| 'input': {<node name>: <Edge object>}, | |||||
| 'output: {<node name>: <Edge object>}, | |||||
| 'slots': [<Slot object>]. | |||||
| } | |||||
| Edge object = { | |||||
| 'data_type': <data type>, | |||||
| 'edge_type': <edge type>, | |||||
| 'independent_layout': bool, | |||||
| 'shape': list[<dim>], | |||||
| 'slot_mapping': list[pair<slot, slot>], | |||||
| }. | |||||
| """ | |||||
| self.validate_tensor_name(tensor_name=tensor_name, graph_name=graph_name) | |||||
| graph = self._graph_stream.get_tensor_graph(tensor_name, graph_name) | |||||
| # add watchpoint hits info and statistics info for each tensor in tensor graph. | |||||
| nodes = graph.get('graph', {}).get('nodes', []) | |||||
| for node in nodes: | |||||
| node['graph_name'] = graph_name | |||||
| for slot_info in node.get('slots', []): | |||||
| self._add_watchpoint_hit_info(slot_info, node) | |||||
| self._add_statistic_info(slot_info, node) | |||||
| return graph | |||||
| def _add_watchpoint_hit_info(self, slot_info, node): | |||||
| """ | |||||
| Get the watchpoint that the tensor hit. | |||||
| Args: | |||||
| slot_info (dict): Slot object. | |||||
| node (dict): Node object. | |||||
| """ | |||||
| tensor_name = ':'.join([node.get('name'), slot_info.get('slot')]) | |||||
| slot_info.update(self._hit_stream.get_tensor_hit_infos(tensor_name)) | |||||
| def _add_statistic_info(self, slot_info, node): | |||||
| """ | |||||
| Get the watchpoint that the tensor hit. | |||||
| Args: | |||||
| slot_info (dict): Slot object. | |||||
| node (dict): Node object. | |||||
| """ | |||||
| tensor_name = ':'.join([node.get('full_name'), slot_info.get('slot')]) | |||||
| node_type = node.get('type') | |||||
| slot_info['statistics'] = self._tensor_stream.get_tensor_statistics(tensor_name, node_type) | |||||
| def get_tensor_watch_points(self, tensor_name, graph_name): | |||||
| """ | |||||
| Get all watchpoints that the tensor hit. | |||||
| Args: | |||||
| tensor_name (str): Tensor name from UI. | |||||
| graph_name (str): The graph name. | |||||
| Returns: | |||||
| list, watchpoint hit infos. | |||||
| """ | |||||
| # validate tensor_name | |||||
| self.validate_tensor_name(tensor_name=tensor_name, graph_name=graph_name) | |||||
| # get watchpoint info that the tensor hit | |||||
| tensor_hit_info = self._hit_stream.get_tensor_hit_infos(tensor_name) | |||||
| watch_points = tensor_hit_info.get('watch_points', []) | |||||
| return watch_points | |||||
| @@ -32,16 +32,23 @@ class Statistics: | |||||
| avg_value (float): avg value of tensor data. | avg_value (float): avg value of tensor data. | ||||
| count (int): total count of tensor data. | count (int): total count of tensor data. | ||||
| nan_count (int): count of NAN. | nan_count (int): count of NAN. | ||||
| neg_zero_count (int): count of negative zero. | |||||
| pos_zero_count (int): count of positive zero. | |||||
| zero_count (int): count of zero. | |||||
| neg_inf_count (int): count of negative INF. | neg_inf_count (int): count of negative INF. | ||||
| pos_inf_count (int): count of positive INF. | pos_inf_count (int): count of positive INF. | ||||
| """ | """ | ||||
| def __init__(self, max_value=0, min_value=0, avg_value=0, | |||||
| count=0, nan_count=0, neg_inf_count=0, pos_inf_count=0): | |||||
| def __init__(self, max_value=0, min_value=0, avg_value=0, count=0, | |||||
| neg_zero_count=0, pos_zero_count=0, zero_count=0, | |||||
| nan_count=0, neg_inf_count=0, pos_inf_count=0): | |||||
| self._max = max_value | self._max = max_value | ||||
| self._min = min_value | self._min = min_value | ||||
| self._avg = avg_value | self._avg = avg_value | ||||
| self._count = count | self._count = count | ||||
| self._neg_zero_count = neg_zero_count | |||||
| self._pos_zero_count = pos_zero_count | |||||
| self._zero_count = zero_count | |||||
| self._nan_count = nan_count | self._nan_count = nan_count | ||||
| self._neg_inf_count = neg_inf_count | self._neg_inf_count = neg_inf_count | ||||
| self._pos_inf_count = pos_inf_count | self._pos_inf_count = pos_inf_count | ||||
| @@ -81,6 +88,21 @@ class Statistics: | |||||
| """Get count of positive INF.""" | """Get count of positive INF.""" | ||||
| return self._pos_inf_count | return self._pos_inf_count | ||||
| @property | |||||
| def neg_zero_count(self): | |||||
| """Get count of negative zero.""" | |||||
| return self._neg_zero_count | |||||
| @property | |||||
| def pos_zero_count(self): | |||||
| """Get count of positive zero.""" | |||||
| return self._pos_zero_count | |||||
| @property | |||||
| def zero_count(self): | |||||
| """Get count of zero.""" | |||||
| return self._zero_count | |||||
| class TensorComparison: | class TensorComparison: | ||||
| """TensorComparison class. | """TensorComparison class. | ||||
| @@ -204,7 +226,7 @@ class TensorUtils: | |||||
| tensors (numpy.ndarray): An numpy.ndarray of tensor data. | tensors (numpy.ndarray): An numpy.ndarray of tensor data. | ||||
| Returns: | Returns: | ||||
| an instance of Statistics. | |||||
| Statistics, an instance of Statistics. | |||||
| """ | """ | ||||
| ma_value = np.ma.masked_invalid(tensors) | ma_value = np.ma.masked_invalid(tensors) | ||||
| total, valid = tensors.size, ma_value.count() | total, valid = tensors.size, ma_value.count() | ||||
| @@ -240,10 +262,19 @@ class TensorUtils: | |||||
| tensor_min = ma_value.min() | tensor_min = ma_value.min() | ||||
| tensor_max = ma_value.max() | tensor_max = ma_value.max() | ||||
| tensor_sum = ma_value.sum(dtype=np.float64) | tensor_sum = ma_value.sum(dtype=np.float64) | ||||
| with np.errstate(invalid='ignore'): | |||||
| neg_zero_count = np.sum(ma_value < 0) | |||||
| with np.errstate(invalid='ignore'): | |||||
| pos_zero_count = np.sum(ma_value > 0) | |||||
| with np.errstate(invalid='ignore'): | |||||
| zero_count = np.sum(ma_value == 0) | |||||
| statistics = Statistics(max_value=tensor_max, | statistics = Statistics(max_value=tensor_max, | ||||
| min_value=tensor_min, | min_value=tensor_min, | ||||
| avg_value=tensor_sum / valid, | avg_value=tensor_sum / valid, | ||||
| count=total, | count=total, | ||||
| neg_zero_count=neg_zero_count, | |||||
| pos_zero_count=pos_zero_count, | |||||
| zero_count=zero_count, | |||||
| nan_count=nan_count, | nan_count=nan_count, | ||||
| neg_inf_count=neg_inf_count, | neg_inf_count=neg_inf_count, | ||||
| pos_inf_count=pos_inf_count) | pos_inf_count=pos_inf_count) | ||||
| @@ -269,11 +300,35 @@ class TensorUtils: | |||||
| "count": stats.count, | "count": stats.count, | ||||
| "nan_count": stats.nan_count, | "nan_count": stats.nan_count, | ||||
| "neg_inf_count": stats.neg_inf_count, | "neg_inf_count": stats.neg_inf_count, | ||||
| "pos_inf_count": stats.pos_inf_count, | |||||
| "pos_inf_count": stats.pos_inf_count} | |||||
| overall_statistics = TensorUtils.get_overall_statistic_dict(overall_stats) | |||||
| statistics.update(overall_statistics) | |||||
| return statistics | |||||
| @staticmethod | |||||
| def get_overall_statistic_dict(overall_stats): | |||||
| """ | |||||
| Get overall statistics dict according to statistics value. | |||||
| Args: | |||||
| overall_stats (Statistics): An instance of Statistics for whole tensor. | |||||
| Returns: | |||||
| dict, overall statistics. | |||||
| """ | |||||
| res = { | |||||
| "overall_max": float(overall_stats.max), | "overall_max": float(overall_stats.max), | ||||
| "overall_min": float(overall_stats.min) | |||||
| "overall_min": float(overall_stats.min), | |||||
| "overall_avg": float(overall_stats.avg), | |||||
| "overall_count": overall_stats.count, | |||||
| "overall_nan_count": overall_stats.nan_count, | |||||
| "overall_neg_inf_count": overall_stats.neg_inf_count, | |||||
| "overall_pos_inf_count": overall_stats.pos_inf_count, | |||||
| "overall_zero_count": float(overall_stats.zero_count), | |||||
| "overall_neg_zero_count": float(overall_stats.neg_zero_count), | |||||
| "overall_pos_zero_count": float(overall_stats.pos_zero_count) | |||||
| } | } | ||||
| return statistics | |||||
| return res | |||||
| @staticmethod | @staticmethod | ||||
| def calc_diff_between_two_tensor(first_tensor, second_tensor, tolerance): | def calc_diff_between_two_tensor(first_tensor, second_tensor, tolerance): | ||||
| @@ -51,8 +51,9 @@ def init_graph_handler(): | |||||
| @pytest.fixture(scope='session') | @pytest.fixture(scope='session') | ||||
| def app_client(): | def app_client(): | ||||
| """This fixture is flask server.""" | """This fixture is flask server.""" | ||||
| packages = ["mindinsight.backend.debugger"] | |||||
| packages = ["mindinsight.backend.debugger", "mindinsight.backend.conditionmgr"] | |||||
| settings.ENABLE_DEBUGGER = True | settings.ENABLE_DEBUGGER = True | ||||
| mock_obj = Mock(return_value=packages) | mock_obj = Mock(return_value=packages) | ||||
| tools.find_app_package = mock_obj | tools.find_app_package = mock_obj | ||||
| @@ -60,5 +61,10 @@ def app_client(): | |||||
| from mindinsight.backend.debugger.debugger_api import BACKEND_SERVER | from mindinsight.backend.debugger.debugger_api import BACKEND_SERVER | ||||
| APP.response_class = Response | APP.response_class = Response | ||||
| client = APP.test_client() | client = APP.test_client() | ||||
| yield client | |||||
| original_val = settings.ENABLE_RECOMMENDED_WATCHPOINTS | |||||
| settings.ENABLE_RECOMMENDED_WATCHPOINTS = False | |||||
| try: | |||||
| yield client | |||||
| finally: | |||||
| settings.ENABLE_RECOMMENDED_WATCHPOINTS = original_val | |||||
| BACKEND_SERVER.stop() | BACKEND_SERVER.stop() | ||||
| @@ -1 +1 @@ | |||||
| {"metadata": {"state": "pending", "step": 0, "device_name": "", "ip": "", "node_name": "", "backend": ""}} | |||||
| {"metadata": {"state": "pending", "step": 0, "device_name": "", "ip": "", "node_name": "", "backend": "", "enable_recheck": false, "graph_name": ""}} | |||||
| @@ -1,58 +1,77 @@ | |||||
| { | { | ||||
| "tensor_value": { | |||||
| "full_name": "Default/args0:0", | |||||
| "step": 3, | |||||
| "dtype": "DT_FLOAT32", | |||||
| "shape": [ | |||||
| 2, | |||||
| 3 | |||||
| ], | |||||
| "diff": [ | |||||
| [ | |||||
| [ | |||||
| 1.0, | |||||
| 1.0, | |||||
| 0.0 | |||||
| ], | |||||
| [ | |||||
| 2.0, | |||||
| 2.0, | |||||
| 0.0 | |||||
| ], | |||||
| [ | |||||
| 3.0, | |||||
| 3.0, | |||||
| 0.0 | |||||
| ] | |||||
| ], | |||||
| [ | |||||
| [ | |||||
| 4.0, | |||||
| 4.0, | |||||
| 0.0 | |||||
| ], | |||||
| [ | |||||
| 5.0, | |||||
| 5.0, | |||||
| 0.0 | |||||
| ], | |||||
| [ | |||||
| 6.0, | |||||
| 6.0, | |||||
| 0.0 | |||||
| ] | |||||
| ] | |||||
| ], | |||||
| "statistics": { | |||||
| "max": 0.0, | |||||
| "min": 0.0, | |||||
| "avg": 0.0, | |||||
| "count": 6, | |||||
| "nan_count": 0, | |||||
| "neg_inf_count": 0, | |||||
| "pos_inf_count": 0, | |||||
| "overall_max": 0.0, | |||||
| "overall_min": 0.0 | |||||
| } | |||||
| } | |||||
| "tensor_value": { | |||||
| "full_name": "Default/args0:0", | |||||
| "step": 3, | |||||
| "dtype": "DT_FLOAT32", | |||||
| "shape": [2, 3], | |||||
| "diff": [ | |||||
| [ | |||||
| [1.0, 1.0, 0.0], | |||||
| [2.0, 2.0, 0.0], | |||||
| [3.0, 3.0, 0.0] | |||||
| ], | |||||
| [ | |||||
| [4.0, 4.0, 0.0], | |||||
| [5.0, 5.0, 0.0], | |||||
| [6.0, 6.0, 0.0] | |||||
| ] | |||||
| ], | |||||
| "curr_step_statistics": { | |||||
| "max": 6.0, | |||||
| "min": 1.0, | |||||
| "avg": 3.5, | |||||
| "count": 6, | |||||
| "nan_count": 0, | |||||
| "neg_inf_count": 0, | |||||
| "pos_inf_count": 0, | |||||
| "overall_max": 6.0, | |||||
| "overall_min": 1.0, | |||||
| "overall_avg": 3.5, | |||||
| "overall_count": 6, | |||||
| "overall_nan_count": 0, | |||||
| "overall_neg_inf_count": 0, | |||||
| "overall_pos_inf_count": 0, | |||||
| "overall_zero_count": 0.0, | |||||
| "overall_neg_zero_count": 0.0, | |||||
| "overall_pos_zero_count": 6.0 | |||||
| }, | |||||
| "prev_step_statistics": { | |||||
| "max": 6.0, | |||||
| "min": 1.0, | |||||
| "avg": 3.5, | |||||
| "count": 6, | |||||
| "nan_count": 0, | |||||
| "neg_inf_count": 0, | |||||
| "pos_inf_count": 0, | |||||
| "overall_max": 6.0, | |||||
| "overall_min": 1.0, | |||||
| "overall_avg": 3.5, | |||||
| "overall_count": 6, | |||||
| "overall_nan_count": 0, | |||||
| "overall_neg_inf_count": 0, | |||||
| "overall_pos_inf_count": 0, | |||||
| "overall_zero_count": 0.0, | |||||
| "overall_neg_zero_count": 0.0, | |||||
| "overall_pos_zero_count": 6.0 | |||||
| }, | |||||
| "statistics": { | |||||
| "max": 0.0, | |||||
| "min": 0.0, | |||||
| "avg": 0.0, | |||||
| "count": 6, | |||||
| "nan_count": 0, | |||||
| "neg_inf_count": 0, | |||||
| "pos_inf_count": 0, | |||||
| "overall_max": 0.0, | |||||
| "overall_min": 0.0, | |||||
| "overall_avg": 0.0, | |||||
| "overall_count": 6, | |||||
| "overall_nan_count": 0, | |||||
| "overall_neg_inf_count": 0, | |||||
| "overall_pos_inf_count": 0, | |||||
| "overall_zero_count": 6.0, | |||||
| "overall_neg_zero_count": 0.0, | |||||
| "overall_pos_zero_count": 0.0 | |||||
| } | |||||
| } | |||||
| } | } | ||||
| @@ -1 +1 @@ | |||||
| {"watch_points": [{"id": 1, "watch_condition": {"condition": "MAX_GT", "param": 1.0}}, {"id": 2, "watch_condition": {"condition": "MAX_LT", "param": -1.0}}, {"id": 3, "watch_condition": {"condition": "MIN_GT", "param": 1e+32}}, {"id": 5, "watch_condition": {"condition": "MAX_MIN_GT", "param": 0}}, {"id": 6, "watch_condition": {"condition": "MAX_MIN_LT", "param": 0}}, {"id": 7, "watch_condition": {"condition": "MEAN_GT", "param": 0}}, {"id": 8, "watch_condition": {"condition": "MEAN_LT", "param": 0}}, {"id": 9, "watch_condition": {"condition": "INF"}}, {"id": 10, "watch_condition": {"condition": "OVERFLOW"}}]} | |||||
| {"watch_points": [{"id": 1, "watch_condition": {"id": "max_gt", "params": [{"name": "param", "value": 1.0, "disable": false}], "abbr": "MAX>"}}, {"id": 2, "watch_condition": {"id": "max_lt", "params": [{"name": "param", "value": -1.0, "disable": false}], "abbr": "MAX<"}}, {"id": 3, "watch_condition": {"id": "min_gt", "params": [{"name": "param", "value": 1e+32, "disable": false}], "abbr": "MIN>"}}, {"id": 5, "watch_condition": {"id": "max_min_gt", "params": [{"name": "param", "value": 0, "disable": false}], "abbr": "MAX-MIN>"}}, {"id": 6, "watch_condition": {"id": "max_min_lt", "params": [{"name": "param", "value": 0, "disable": false}], "abbr": "MAX-Min<"}}, {"id": 7, "watch_condition": {"id": "mean_gt", "params": [{"name": "param", "value": 0, "disable": false}], "abbr": "MEAN>"}}, {"id": 8, "watch_condition": {"id": "mean_lt", "params": [{"name": "param", "value": 0, "disable": false}], "abbr": "MEAN<"}}, {"id": 9, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}, {"id": 10, "watch_condition": {"id": "overflow", "params": [], "abbr": "OVERFLOW"}}]} | |||||
| @@ -0,0 +1 @@ | |||||
| {"conditions": [{"id": "inf", "parameters": [], "supported_target_type": "TENSOR"}, {"id": "max_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_min_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_min_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "mean_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "mean_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "min_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "min_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "overflow", "parameters": [], "supported_target_type": "TENSOR"}]} | |||||
| @@ -0,0 +1 @@ | |||||
| {"conditions": [{"id": "inf", "parameters": [], "supported_target_type": "TENSOR"}, {"id": "max_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_min_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "max_min_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "mean_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "mean_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "min_gt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "min_lt", "parameters": [{"name": "param", "type": "FLOAT64", "support_disable": true, "default_value": null}], "supported_target_type": "TENSOR"}, {"id": "nan", "parameters": [], "supported_target_type": "TENSOR"}]} | |||||
| @@ -0,0 +1 @@ | |||||
| {"metadata": {"state": "waiting", "step": 1, "device_name": "0", "node_name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0", "backend": "GPU", "enable_recheck": false, "graph_name": "graph_1"}, "graph": {"graph_names": ["graph_0", "graph_1"], "nodes": [{"name": "graph_0", "type": "name_scope", "attr": {}, "input": {}, "output": {}, "output_i": 0, "proxy_input": {}, "proxy_output": {}, "subnode_count": 2, "independent_layout": false}, {"name": "graph_1", "type": "name_scope", "attr": {}, "input": {}, "output": {}, "output_i": 0, "proxy_input": {}, "proxy_output": {}, "subnode_count": 2, "independent_layout": false}]}, "watch_points": []} | |||||
| @@ -0,0 +1,672 @@ | |||||
| { | |||||
| "graph": { | |||||
| "nodes": [ | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/fc3.bias", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[10]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22": { | |||||
| "shape": [ | |||||
| [ | |||||
| 10 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[10]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22": { | |||||
| "shape": [ | |||||
| [ | |||||
| 10 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/learning_rate", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op30": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op33": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op41": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op49": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op56": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/momentum", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op30": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op33": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op41": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op49": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op56": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/fc3.weight", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[10, 84]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25": { | |||||
| "shape": [ | |||||
| [ | |||||
| 10, | |||||
| 84 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.weight", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[10, 84]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25": { | |||||
| "shape": [ | |||||
| [ | |||||
| 10, | |||||
| 84 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/fc2.bias", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[84]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op30": { | |||||
| "shape": [ | |||||
| [ | |||||
| 84 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.fc2.bias", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[84]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op30": { | |||||
| "shape": [ | |||||
| [ | |||||
| 84 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/fc2.weight", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[84, 120]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op33": { | |||||
| "shape": [ | |||||
| [ | |||||
| 84, | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.fc2.weight", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[84, 120]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op33": { | |||||
| "shape": [ | |||||
| [ | |||||
| 84, | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/fc1.bias", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[120]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38": { | |||||
| "shape": [ | |||||
| [ | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[120]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38": { | |||||
| "shape": [ | |||||
| [ | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/fc1.weight", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[120, 400]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op41": { | |||||
| "shape": [ | |||||
| [ | |||||
| 120, | |||||
| 400 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.weight", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[120, 400]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op41": { | |||||
| "shape": [ | |||||
| [ | |||||
| 120, | |||||
| 400 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/conv2.weight", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[16, 6, 5, 5]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op49": { | |||||
| "shape": [ | |||||
| [ | |||||
| 16, | |||||
| 6, | |||||
| 5, | |||||
| 5 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.conv2.weight", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[16, 6, 5, 5]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op49": { | |||||
| "shape": [ | |||||
| [ | |||||
| 16, | |||||
| 6, | |||||
| 5, | |||||
| 5 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/conv1.weight", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[6, 1, 5, 5]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op56": { | |||||
| "shape": [ | |||||
| [ | |||||
| 6, | |||||
| 1, | |||||
| 5, | |||||
| 5 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Default/optimizer-Momentum/Parameter[18]_7/moments.conv1.weight", | |||||
| "type": "Parameter", | |||||
| "attr": { | |||||
| "type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "shape": "[[6, 1, 5, 5]]" | |||||
| }, | |||||
| "input": {}, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op56": { | |||||
| "shape": [ | |||||
| [ | |||||
| 6, | |||||
| 1, | |||||
| 5, | |||||
| 5 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1": { | |||||
| "edge_type": "data" | |||||
| } | |||||
| }, | |||||
| "subnode_count": 0, | |||||
| "independent_layout": true | |||||
| } | |||||
| ] | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,44 @@ | |||||
| { | |||||
| "metadata": { | |||||
| "state": "waiting", | |||||
| "step": 1, | |||||
| "device_name": "0", | |||||
| "node_name": "", | |||||
| "backend": "Ascend", | |||||
| "enable_recheck": false, | |||||
| "graph_name": "" | |||||
| }, | |||||
| "graph": { | |||||
| "graph_names": [ | |||||
| "graph_0", | |||||
| "graph_1" | |||||
| ], | |||||
| "nodes": [ | |||||
| { | |||||
| "name": "graph_0", | |||||
| "type": "name_scope", | |||||
| "attr": {}, | |||||
| "input": {}, | |||||
| "output": {}, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": {}, | |||||
| "subnode_count": 2, | |||||
| "independent_layout": false | |||||
| }, | |||||
| { | |||||
| "name": "graph_1", | |||||
| "type": "name_scope", | |||||
| "attr": {}, | |||||
| "input": {}, | |||||
| "output": {}, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": {}, | |||||
| "subnode_count": 2, | |||||
| "independent_layout": false | |||||
| } | |||||
| ] | |||||
| }, | |||||
| "watch_points": [] | |||||
| } | |||||
| @@ -0,0 +1,534 @@ | |||||
| { | |||||
| "graph": { | |||||
| "nodes": [ | |||||
| { | |||||
| "name": "graph_0/Default", | |||||
| "type": "name_scope", | |||||
| "attr": {}, | |||||
| "input": { | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op21": { | |||||
| "shape": [ | |||||
| [ | |||||
| 10 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op24": { | |||||
| "shape": [ | |||||
| [ | |||||
| 10, | |||||
| 84 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op29": { | |||||
| "shape": [ | |||||
| [ | |||||
| 84 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op32": { | |||||
| "shape": [ | |||||
| [ | |||||
| 84, | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op37": { | |||||
| "shape": [ | |||||
| [ | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op40": { | |||||
| "shape": [ | |||||
| [ | |||||
| 120, | |||||
| 400 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/Conv2DBackpropFilter-op48": { | |||||
| "shape": [ | |||||
| [ | |||||
| 16, | |||||
| 6, | |||||
| 5, | |||||
| 5 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/Conv2DBackpropFilter-op55": { | |||||
| "shape": [ | |||||
| [ | |||||
| 6, | |||||
| 1, | |||||
| 5, | |||||
| 5 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output": { | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/Conv2DBackpropInput-op52": { | |||||
| "shape": [ | |||||
| [ | |||||
| 16, | |||||
| 6, | |||||
| 5, | |||||
| 5 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/gradConv2D/Conv2DBackpropFilter-op55": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 1, | |||||
| 32, | |||||
| 32 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPoolWithArgmax/MaxPoolGradWithArgmax-op53": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 6, | |||||
| 4, | |||||
| 14 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_UINT16]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPoolWithArgmax/MaxPoolGradWithArgmax-op46": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 16, | |||||
| 4, | |||||
| 3 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_UINT16]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op40": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 400 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGrad-op36": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op32": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGrad-op28": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 84 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op24": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 84 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/gradConv2D/Conv2DBackpropFilter-op48": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 6, | |||||
| 14, | |||||
| 14 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/gradSoftmaxCrossEntropyWithLogits/Mul-op20": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 10 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 1, | |||||
| 10, | |||||
| 10, | |||||
| 2 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_UINT8]" | |||||
| }, | |||||
| "graph_0/Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op94": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 1, | |||||
| 28, | |||||
| 28, | |||||
| 2 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_UINT8]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": {}, | |||||
| "subnode_count": 7, | |||||
| "independent_layout": false | |||||
| }, | |||||
| { | |||||
| "name": "graph_0/Gradients", | |||||
| "type": "name_scope", | |||||
| "attr": {}, | |||||
| "input": { | |||||
| "graph_0/Default/tuple_getitem[10]_0/tuple_getitem-op210": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 10 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op15": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 84 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op12": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op205": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 16, | |||||
| 10, | |||||
| 10 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/tuple_getitem[10]_0/tuple_getitem-op206": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 16, | |||||
| 4, | |||||
| 3 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_UINT16]" | |||||
| }, | |||||
| "graph_0/Default/tuple_getitem[10]_0/tuple_getitem-op202": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 1, | |||||
| 10, | |||||
| 10, | |||||
| 2 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_UINT8]" | |||||
| }, | |||||
| "graph_0/Default/tuple_getitem[10]_0/tuple_getitem-op197": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 6, | |||||
| 14, | |||||
| 14 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Cast-op188": { | |||||
| "shape": [ | |||||
| [ | |||||
| 16, | |||||
| 6, | |||||
| 5, | |||||
| 5 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/Cast-op195": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 6, | |||||
| 28, | |||||
| 28 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/tuple_getitem[10]_0/tuple_getitem-op196": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 6, | |||||
| 4, | |||||
| 14 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_UINT16]" | |||||
| }, | |||||
| "graph_0/Default/tuple_getitem[10]_0/tuple_getitem-op192": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 1, | |||||
| 28, | |||||
| 28, | |||||
| 2 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_UINT8]" | |||||
| }, | |||||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 1, | |||||
| 32, | |||||
| 32 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/network-WithLossCell/_backbone-LeNet5/flatten-Flatten/Reshape-op9": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 400 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output": { | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22": { | |||||
| "shape": [ | |||||
| [ | |||||
| 10 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op30": { | |||||
| "shape": [ | |||||
| [ | |||||
| 84 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38": { | |||||
| "shape": [ | |||||
| [ | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op49": { | |||||
| "shape": [ | |||||
| [ | |||||
| 16, | |||||
| 6, | |||||
| 5, | |||||
| 5 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op56": { | |||||
| "shape": [ | |||||
| [ | |||||
| 6, | |||||
| 1, | |||||
| 5, | |||||
| 5 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25": { | |||||
| "shape": [ | |||||
| [ | |||||
| 10, | |||||
| 84 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op33": { | |||||
| "shape": [ | |||||
| [ | |||||
| 84, | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| }, | |||||
| "graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op41": { | |||||
| "shape": [ | |||||
| [ | |||||
| 120, | |||||
| 400 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]" | |||||
| } | |||||
| }, | |||||
| "output_i": 0, | |||||
| "proxy_input": {}, | |||||
| "proxy_output": {}, | |||||
| "subnode_count": 1, | |||||
| "independent_layout": false | |||||
| } | |||||
| ] | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1 @@ | |||||
| {"watch_points": [{"id": 1, "watch_condition": {"id": "overflow", "params": [], "abbr": "OVERFLOW"}}]} | |||||
| @@ -0,0 +1 @@ | |||||
| {"metadata": {"state": "waiting", "step": 2, "device_name": "0", "node_name": "", "backend": "GPU", "enable_recheck": false, "graph_name": ""}, "graph": {"graph_names": ["graph_0", "graph_1"], "nodes": [{"name": "graph_0", "type": "name_scope", "attr": {}, "input": {}, "output": {}, "output_i": 0, "proxy_input": {}, "proxy_output": {}, "subnode_count": 2, "independent_layout": false}, {"name": "graph_1", "type": "name_scope", "attr": {}, "input": {}, "output": {}, "output_i": 0, "proxy_input": {}, "proxy_output": {}, "subnode_count": 2, "independent_layout": false}]}, "watch_points": [{"id": 1, "watch_condition": {"id": "weight_initialization", "params": [{"name": "zero_percentage_ge", "disable": false, "value": 100}], "abbr": "WI"}}, {"id": 2, "watch_condition": {"id": "weight_change_too_large", "params": [{"name": "abs_update_ratio_mean_gt", "disable": false, "value": 0.1}], "abbr": "WCL"}}, {"id": 3, "watch_condition": {"id": "gradient_vanishing", "params": [{"name": "abs_mean_lt", "disable": false, "value": 1e-09}], "abbr": "GV"}}, {"id": 4, "watch_condition": {"id": "tensor_overflow", "params": [], "abbr": "TO"}}, {"id": 5, "watch_condition": {"id": "tensor_all_zero", "params": [{"name": "zero_percentage_ge", "disable": false, "value": 100}], "abbr": "TZ"}}]} | |||||
| @@ -0,0 +1 @@ | |||||
| {"watch_points": [{"id": 1, "watch_condition": {"id": "overflow", "params": [], "abbr": "OVERFLOW"}}]} | |||||
| @@ -4,9 +4,14 @@ | |||||
| "step": 1, | "step": 1, | ||||
| "device_name": "0", | "device_name": "0", | ||||
| "node_name": "", | "node_name": "", | ||||
| "backend": "Ascend" | |||||
| "backend": "Ascend", | |||||
| "enable_recheck": false, | |||||
| "graph_name": "graph_0" | |||||
| }, | }, | ||||
| "graph": { | "graph": { | ||||
| "graph_names": [ | |||||
| "graph_0" | |||||
| ], | |||||
| "nodes": [ | "nodes": [ | ||||
| { | { | ||||
| "name": "Default", | "name": "Default", | ||||
| @@ -4,20 +4,19 @@ | |||||
| "name": "Default/TransData-op99:0", | "name": "Default/TransData-op99:0", | ||||
| "full_name": "Default/TransData-op99:0", | "full_name": "Default/TransData-op99:0", | ||||
| "node_type": "TransData", | "node_type": "TransData", | ||||
| "type": "output" | |||||
| "type": "output", | |||||
| "graph_name": "graph_0" | |||||
| }, | }, | ||||
| { | { | ||||
| "name": "Default/args0:0", | "name": "Default/args0:0", | ||||
| "full_name": "Default/args0:0", | "full_name": "Default/args0:0", | ||||
| "node_type": "Parameter", | "node_type": "Parameter", | ||||
| "type": "input" | |||||
| "type": "input", | |||||
| "graph_name": "graph_0" | |||||
| } | } | ||||
| ], | ], | ||||
| "metadata": { | "metadata": { | ||||
| "state": "waiting", | "state": "waiting", | ||||
| "step": 1, | |||||
| "device_name": "0", | |||||
| "node_name": "", | |||||
| "backend": "Ascend" | |||||
| "step": 1 | |||||
| } | } | ||||
| } | } | ||||
| @@ -5,6 +5,7 @@ | |||||
| "full_name": "Default/TransData-op99:0", | "full_name": "Default/TransData-op99:0", | ||||
| "node_type": "TransData", | "node_type": "TransData", | ||||
| "type": "output", | "type": "output", | ||||
| "graph_name": "graph_0", | |||||
| "step": 1, | "step": 1, | ||||
| "dtype": "DT_FLOAT32", | "dtype": "DT_FLOAT32", | ||||
| "shape": [ | "shape": [ | ||||
| @@ -19,6 +20,7 @@ | |||||
| "full_name": "Default/args0:0", | "full_name": "Default/args0:0", | ||||
| "node_type": "Parameter", | "node_type": "Parameter", | ||||
| "type": "input", | "type": "input", | ||||
| "graph_name": "graph_0", | |||||
| "step": 1, | "step": 1, | ||||
| "dtype": "DT_FLOAT32", | "dtype": "DT_FLOAT32", | ||||
| "shape": [ | "shape": [ | ||||
| @@ -31,9 +33,6 @@ | |||||
| ], | ], | ||||
| "metadata": { | "metadata": { | ||||
| "state": "waiting", | "state": "waiting", | ||||
| "step": 1, | |||||
| "device_name": "0", | |||||
| "node_name": "", | |||||
| "backend": "Ascend" | |||||
| "step": 1 | |||||
| } | } | ||||
| } | } | ||||
| @@ -0,0 +1,138 @@ | |||||
| { | |||||
| "graph": { | |||||
| "nodes": [ | |||||
| { | |||||
| "name": "Default/args0", | |||||
| "full_name": "Default/args0", | |||||
| "type": "Parameter", | |||||
| "input": {}, | |||||
| "output": { | |||||
| "Default/TransData-op99": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 1, | |||||
| 32, | |||||
| 32 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0", | |||||
| "statistics": {} | |||||
| } | |||||
| ], | |||||
| "graph_name": "graph_0" | |||||
| }, | |||||
| { | |||||
| "name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", | |||||
| "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", | |||||
| "type": "Cast", | |||||
| "input": { | |||||
| "Default/TransData-op99": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 1, | |||||
| 32, | |||||
| 32 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "output": {}, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0", | |||||
| "statistics": {} | |||||
| } | |||||
| ], | |||||
| "graph_name": "graph_0" | |||||
| }, | |||||
| { | |||||
| "name": "Default/TransData-op99", | |||||
| "full_name": "Default/TransData-op99", | |||||
| "type": "TransData", | |||||
| "input": { | |||||
| "Default/args0": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 1, | |||||
| 32, | |||||
| 32 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "output": { | |||||
| "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 1, | |||||
| 32, | |||||
| 32 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0", | |||||
| "watch_points": [ | |||||
| { | |||||
| "id": 1, | |||||
| "watch_condition": { | |||||
| "id": "inf", | |||||
| "params": [], | |||||
| "abbr": "INF" | |||||
| } | |||||
| } | |||||
| ], | |||||
| "statistics": {} | |||||
| } | |||||
| ], | |||||
| "graph_name": "graph_0" | |||||
| } | |||||
| ] | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,72 @@ | |||||
| { | |||||
| "graph": { | |||||
| "nodes": [ | |||||
| { | |||||
| "name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38", | |||||
| "full_name": "Default/optimizer-Momentum/ApplyMomentum-op38", | |||||
| "type": "ApplyMomentum", | |||||
| "input": { | |||||
| "Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias": { | |||||
| "shape": [ | |||||
| [ | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "output": {}, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0", | |||||
| "statistics": {} | |||||
| }, | |||||
| { | |||||
| "slot": "1", | |||||
| "statistics": {} | |||||
| } | |||||
| ], | |||||
| "graph_name": "graph_0" | |||||
| }, | |||||
| { | |||||
| "name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias", | |||||
| "full_name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias", | |||||
| "type": "Parameter", | |||||
| "input": {}, | |||||
| "output": { | |||||
| "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op38": { | |||||
| "shape": [ | |||||
| [ | |||||
| 120 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": true, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0", | |||||
| "statistics": {} | |||||
| } | |||||
| ], | |||||
| "graph_name": "graph_0" | |||||
| } | |||||
| ] | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1 @@ | |||||
| {"watch_points": []} | |||||
| @@ -0,0 +1 @@ | |||||
| {"watch_points": []} | |||||
| @@ -1 +1 @@ | |||||
| {"tensor_value": {"full_name": "Default/TransData-op99:0", "step": 1, "dtype": "DT_FLOAT32", "shape": [2, 3], "has_prev_step": false, "statistics": {"max": 6.0, "min": 5.0, "avg": 5.5, "count": 2, "nan_count": 0, "neg_inf_count": 0, "pos_inf_count": 0, "overall_max": 6.0, "overall_min": 1.0}, "value": [5.0, 6.0], "name": "Default/TransData-op99:0"}} | |||||
| {"tensor_value": {"full_name": "Default/TransData-op99:0", "step": 1, "dtype": "DT_FLOAT32", "shape": [2, 3], "has_prev_step": false, "statistics": {"max": 6.0, "min": 5.0, "avg": 5.5, "count": 2, "nan_count": 0, "neg_inf_count": 0, "pos_inf_count": 0, "overall_max": 6.0, "overall_min": 1.0, "overall_avg": 3.5, "overall_count": 6, "overall_nan_count": 0, "overall_neg_inf_count": 0, "overall_pos_inf_count": 0, "overall_zero_count": 0.0, "overall_neg_zero_count": 0.0, "overall_pos_zero_count": 6.0}, "value": [5.0, 6.0], "name": "Default/TransData-op99:0"}} | |||||
| @@ -1 +1 @@ | |||||
| {"watch_point_hits": [{"node_name": "Default/TransData-op99", "watch_points": [{"id": 1, "watch_condition": {"condition": "INF"}}]}, {"node_name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25", "watch_points": [{"id": 1, "watch_condition": {"condition": "INF"}}]}]} | |||||
| {"watch_point_hits": [{"node_name": "Default/TransData-op99", "tensors": [{"slot": "0", "watch_points": [{"id": 1, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}]}], "graph_name": "graph_0"}, {"node_name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op25", "tensors": [{"slot": "0", "watch_points": [{"id": 1, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}]}], "graph_name": "graph_0"}]} | |||||
| @@ -0,0 +1 @@ | |||||
| {"nodes": [{"name": "Default", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op12", "type": "ReLU", "nodes": []}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op15", "type": "ReLU", "nodes": []}]}]}]}]}]} | |||||
| @@ -0,0 +1 @@ | |||||
| {"nodes": [{"name": "graph_0", "type": "name_scope", "nodes": [{"name": "graph_0/Default", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op12", "type": "ReLU", "nodes": []}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op15", "type": "ReLU", "nodes": []}]}]}]}]}]}, {"name": "graph_1", "type": "name_scope", "nodes": [{"name": "graph_1/Default", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op12", "type": "ReLU", "nodes": []}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op15", "type": "ReLU", "nodes": []}]}]}]}]}]}]} | |||||
| @@ -0,0 +1 @@ | |||||
| {"nodes": [{"name": "Gradients", "type": "name_scope", "nodes": [{"name": "Gradients/Default", "type": "name_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense", "type": "name_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd", "type": "name_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op21", "type": "BiasAddGrad", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op29", "type": "BiasAddGrad", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op37", "type": "BiasAddGrad", "nodes": []}]}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul", "type": "name_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5", "type": "aggregation_scope", "nodes": [{"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op24", "type": "MatMul", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op27", "type": "MatMul", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op32", "type": "MatMul", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op35", "type": "MatMul", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op40", "type": "MatMul", "nodes": []}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradMatMul/MatMul[6]_5/MatMul-op44", "type": "MatMul", "nodes": []}]}]}]}]}]}]}]}]} | |||||
| @@ -1 +1,31 @@ | |||||
| {"nodes": [{"name": "Default", "type": "name_scope", "nodes": [{"name": "Default/optimizer-Momentum", "type": "name_scope", "nodes": [{"name": "Default/optimizer-Momentum/Parameter[18]_7", "type": "aggregation_scope", "nodes": [{"name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias", "type": "Parameter", "nodes": [], "watched": 0}], "watched": 1}], "watched": 1}], "watched": 1}]} | |||||
| { | |||||
| "nodes": [ | |||||
| { | |||||
| "name": "Default", | |||||
| "type": "name_scope", | |||||
| "nodes": [ | |||||
| { | |||||
| "name": "Default/optimizer-Momentum", | |||||
| "type": "name_scope", | |||||
| "nodes": [ | |||||
| { | |||||
| "name": "Default/optimizer-Momentum/Parameter[18]_7", | |||||
| "type": "aggregation_scope", | |||||
| "nodes": [ | |||||
| { | |||||
| "name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias", | |||||
| "type": "Parameter", | |||||
| "nodes": [], | |||||
| "watched": 0 | |||||
| } | |||||
| ], | |||||
| "watched": 0 | |||||
| } | |||||
| ], | |||||
| "watched": 0 | |||||
| } | |||||
| ], | |||||
| "watched": 0 | |||||
| } | |||||
| ] | |||||
| } | |||||
| @@ -0,0 +1 @@ | |||||
| {"nodes": [{"name": "Default", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense", "type": "name_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6", "type": "aggregation_scope", "nodes": [{"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.weight", "type": "Parameter", "nodes": []}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.bias", "type": "Parameter", "nodes": []}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.weight", "type": "Parameter", "nodes": []}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.bias", "type": "Parameter", "nodes": []}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.weight", "type": "Parameter", "nodes": []}, {"name": "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.bias", "type": "Parameter", "nodes": []}]}]}]}]}]}]} | |||||
| @@ -0,0 +1 @@ | |||||
| {"nodes": [{"name": "graph_0", "type": "name_scope", "nodes": [{"name": "graph_0/Default", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/conv2.weight", "type": "Parameter", "nodes": []}]}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/conv1.weight", "type": "Parameter", "nodes": []}]}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense", "type": "name_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6", "type": "aggregation_scope", "nodes": [{"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.weight", "type": "Parameter", "nodes": []}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.bias", "type": "Parameter", "nodes": []}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.weight", "type": "Parameter", "nodes": []}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.bias", "type": "Parameter", "nodes": []}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.weight", "type": "Parameter", "nodes": []}, {"name": "graph_0/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.bias", "type": "Parameter", "nodes": []}]}]}]}]}]}]}, {"name": "graph_1", "type": "name_scope", "nodes": [{"name": "graph_1/Default", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/conv2.weight", "type": "Parameter", "nodes": []}]}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/conv1.weight", "type": "Parameter", "nodes": []}]}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense", "type": "name_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6", "type": "aggregation_scope", "nodes": [{"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.weight", "type": "Parameter", "nodes": []}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.bias", "type": "Parameter", "nodes": []}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.weight", "type": "Parameter", "nodes": []}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.bias", "type": "Parameter", "nodes": []}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.weight", "type": "Parameter", "nodes": []}, {"name": "graph_1/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.bias", "type": "Parameter", "nodes": []}]}]}]}]}]}]}]} | |||||
| @@ -28,7 +28,7 @@ from tests.st.func.debugger.conftest import GRAPH_PROTO_FILE | |||||
| class MockDebuggerClient: | class MockDebuggerClient: | ||||
| """Mocked Debugger client.""" | """Mocked Debugger client.""" | ||||
| def __init__(self, hostname='localhost:50051', backend='Ascend'): | |||||
| def __init__(self, hostname='localhost:50051', backend='Ascend', graph_num=1): | |||||
| channel = grpc.insecure_channel(hostname) | channel = grpc.insecure_channel(hostname) | ||||
| self.stub = EventListenerStub(channel) | self.stub = EventListenerStub(channel) | ||||
| self.flag = True | self.flag = True | ||||
| @@ -37,6 +37,7 @@ class MockDebuggerClient: | |||||
| self._leaf_node = [] | self._leaf_node = [] | ||||
| self._cur_node = '' | self._cur_node = '' | ||||
| self._backend = backend | self._backend = backend | ||||
| self._graph_num = graph_num | |||||
| def _clean(self): | def _clean(self): | ||||
| """Clean cache.""" | """Clean cache.""" | ||||
| @@ -122,16 +123,32 @@ class MockDebuggerClient: | |||||
| assert response.status == EventReply.Status.OK | assert response.status == EventReply.Status.OK | ||||
| if training_done is False: | if training_done is False: | ||||
| self.send_graph_cmd() | self.send_graph_cmd() | ||||
| print("finish") | |||||
| def send_graph_cmd(self): | def send_graph_cmd(self): | ||||
| """Send graph to debugger server.""" | """Send graph to debugger server.""" | ||||
| self._step = 1 | self._step = 1 | ||||
| if self._graph_num > 1: | |||||
| chunks = [] | |||||
| for i in range(self._graph_num): | |||||
| chunks.extend(self._get_graph_chunks('graph_' + str(i))) | |||||
| response = self.stub.SendMultiGraphs(self._generate_graph(chunks)) | |||||
| else: | |||||
| chunks = self._get_graph_chunks() | |||||
| response = self.stub.SendGraph(self._generate_graph(chunks)) | |||||
| assert response.status == EventReply.Status.OK | |||||
| # go to command loop | |||||
| self.command_loop() | |||||
| def _get_graph_chunks(self, graph_name='graph_0'): | |||||
| """Get graph chunks.""" | |||||
| with open(GRAPH_PROTO_FILE, 'rb') as file_handle: | with open(GRAPH_PROTO_FILE, 'rb') as file_handle: | ||||
| content = file_handle.read() | content = file_handle.read() | ||||
| size = len(content) | size = len(content) | ||||
| graph = ms_graph_pb2.GraphProto() | graph = ms_graph_pb2.GraphProto() | ||||
| graph.ParseFromString(content) | graph.ParseFromString(content) | ||||
| graph.name = 'graph_name' | |||||
| graph.name = graph_name | |||||
| content = graph.SerializeToString() | |||||
| self._leaf_node = [node.full_name for node in graph.node] | self._leaf_node = [node.full_name for node in graph.node] | ||||
| # the max limit of grpc data size is 4kb | # the max limit of grpc data size is 4kb | ||||
| # split graph into 3kb per chunk | # split graph into 3kb per chunk | ||||
| @@ -141,10 +158,8 @@ class MockDebuggerClient: | |||||
| sub_size = min(chunk_size, size - index) | sub_size = min(chunk_size, size - index) | ||||
| sub_chunk = Chunk(buffer=content[index: index + sub_size]) | sub_chunk = Chunk(buffer=content[index: index + sub_size]) | ||||
| chunks.append(sub_chunk) | chunks.append(sub_chunk) | ||||
| response = self.stub.SendGraph(self._generate_graph(chunks)) | |||||
| assert response.status == EventReply.Status.OK | |||||
| # go to command loop | |||||
| self.command_loop() | |||||
| chunks[-1].finished = True | |||||
| return chunks | |||||
| @staticmethod | @staticmethod | ||||
| def _generate_graph(chunks): | def _generate_graph(chunks): | ||||
| @@ -202,5 +217,5 @@ class MockDebuggerClientThread: | |||||
| return self._debugger_client_thread | return self._debugger_client_thread | ||||
| def __exit__(self, exc_type, exc_val, exc_tb): | def __exit__(self, exc_type, exc_val, exc_tb): | ||||
| self._debugger_client_thread.join(timeout=5) | |||||
| self._debugger_client_thread.join(timeout=3) | |||||
| self._debugger_client.flag = False | self._debugger_client.flag = False | ||||
| @@ -22,12 +22,20 @@ import os | |||||
| import pytest | import pytest | ||||
| from mindinsight.conf import settings | |||||
| from tests.st.func.debugger.conftest import DEBUGGER_BASE_URL | from tests.st.func.debugger.conftest import DEBUGGER_BASE_URL | ||||
| from tests.st.func.debugger.mock_ms_client import MockDebuggerClient | from tests.st.func.debugger.mock_ms_client import MockDebuggerClient | ||||
| from tests.st.func.debugger.utils import check_waiting_state, get_request_result, \ | from tests.st.func.debugger.utils import check_waiting_state, get_request_result, \ | ||||
| send_and_compare_result | send_and_compare_result | ||||
| def send_terminate_cmd(app_client): | |||||
| """Send terminate command to debugger client.""" | |||||
| url = os.path.join(DEBUGGER_BASE_URL, 'control') | |||||
| body_data = {'mode': 'terminate'} | |||||
| send_and_compare_result(app_client, url, body_data) | |||||
| class TestAscendDebugger: | class TestAscendDebugger: | ||||
| """Test debugger on Ascend backend.""" | """Test debugger on Ascend backend.""" | ||||
| @@ -36,23 +44,6 @@ class TestAscendDebugger: | |||||
| """Setup class.""" | """Setup class.""" | ||||
| cls._debugger_client = MockDebuggerClient(backend='Ascend') | cls._debugger_client = MockDebuggerClient(backend='Ascend') | ||||
| @staticmethod | |||||
| def _send_terminate_cmd(app_client): | |||||
| """Send terminate command to debugger client.""" | |||||
| url = os.path.join(DEBUGGER_BASE_URL, 'control') | |||||
| body_data = {'mode': 'terminate'} | |||||
| send_and_compare_result(app_client, url, body_data) | |||||
| @staticmethod | |||||
| def _create_watchpoint(app_client, condition, expect_id): | |||||
| """Create watchpoint.""" | |||||
| url = 'create_watchpoint' | |||||
| body_data = {'condition': condition, | |||||
| 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7', | |||||
| 'Default/TransData-op99']} | |||||
| res = get_request_result(app_client, url, body_data) | |||||
| assert res.get('id') == expect_id | |||||
| @pytest.mark.level0 | @pytest.mark.level0 | ||||
| @pytest.mark.env_single | @pytest.mark.env_single | ||||
| @pytest.mark.platform_x86_cpu | @pytest.mark.platform_x86_cpu | ||||
| @@ -86,10 +77,47 @@ class TestAscendDebugger: | |||||
| """Test retrieve when train_begin.""" | """Test retrieve when train_begin.""" | ||||
| url = 'retrieve' | url = 'retrieve' | ||||
| with self._debugger_client.get_thread_instance(): | with self._debugger_client.get_thread_instance(): | ||||
| flag = check_waiting_state(app_client) | |||||
| assert flag is True | |||||
| check_waiting_state(app_client) | |||||
| send_and_compare_result(app_client, url, body_data, expect_file) | send_and_compare_result(app_client, url, body_data, expect_file) | ||||
| self._send_terminate_cmd(app_client) | |||||
| send_terminate_cmd(app_client) | |||||
| def test_get_conditions(self, app_client): | |||||
| """Test get conditions for ascend.""" | |||||
| url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/conditions' | |||||
| body_data = {} | |||||
| expect_file = 'get_conditions_for_ascend.json' | |||||
| with self._debugger_client.get_thread_instance(): | |||||
| check_waiting_state(app_client) | |||||
| send_and_compare_result(app_client, url, body_data, expect_file, method='get', full_url=True) | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| @pytest.mark.parametrize("body_data, expect_file", [ | |||||
| ({'mode': 'all'}, 'multi_retrieve_all.json'), | |||||
| ({'mode': 'node', 'params': {'name': 'Default', 'graph_name': 'graph_1'}}, 'retrieve_scope_node.json'), | |||||
| ({'mode': 'node', 'params': {'name': 'graph_0'}}, 'multi_retrieve_scope_node.json'), | |||||
| ({'mode': 'node', 'params': {'name': 'graph_0/Default/optimizer-Momentum/Parameter[18]_7'}}, | |||||
| 'multi_retrieve_aggregation_scope_node.json'), | |||||
| ({'mode': 'node', 'params': { | |||||
| 'name': 'graph_0/Default/TransData-op99', | |||||
| 'single_node': True}}, 'multi_retrieve_single_node.json'), | |||||
| ({'mode': 'node', 'params': { | |||||
| 'name': 'Default/TransData-op99', | |||||
| 'single_node': True, 'graph_name': 'graph_0'}}, 'retrieve_single_node.json') | |||||
| ]) | |||||
| def test_multi_retrieve_when_train_begin(self, app_client, body_data, expect_file): | |||||
| """Test retrieve when train_begin.""" | |||||
| url = 'retrieve' | |||||
| debugger_client = MockDebuggerClient(backend='Ascend', graph_num=2) | |||||
| with debugger_client.get_thread_instance(): | |||||
| check_waiting_state(app_client) | |||||
| send_and_compare_result(app_client, url, body_data, expect_file) | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | @pytest.mark.level0 | ||||
| @pytest.mark.env_single | @pytest.mark.env_single | ||||
| @@ -100,22 +128,21 @@ class TestAscendDebugger: | |||||
| def test_create_and_delete_watchpoint(self, app_client): | def test_create_and_delete_watchpoint(self, app_client): | ||||
| """Test create and delete watchpoint.""" | """Test create and delete watchpoint.""" | ||||
| with self._debugger_client.get_thread_instance(): | with self._debugger_client.get_thread_instance(): | ||||
| flag = check_waiting_state(app_client) | |||||
| assert flag is True | |||||
| check_waiting_state(app_client) | |||||
| conditions = [ | conditions = [ | ||||
| {'condition': 'MAX_GT', 'param': 1.0}, | |||||
| {'condition': 'MAX_LT', 'param': -1.0}, | |||||
| {'condition': 'MIN_GT', 'param': 1e+32}, | |||||
| {'condition': 'MIN_LT', 'param': -1e+32}, | |||||
| {'condition': 'MAX_MIN_GT', 'param': 0}, | |||||
| {'condition': 'MAX_MIN_LT', 'param': 0}, | |||||
| {'condition': 'MEAN_GT', 'param': 0}, | |||||
| {'condition': 'MEAN_LT', 'param': 0}, | |||||
| {'condition': 'INF'}, | |||||
| {'condition': 'OVERFLOW'}, | |||||
| {'id': 'max_gt', 'params': [{'name': 'param', 'value': 1.0, 'disable': False}]}, | |||||
| {'id': 'max_lt', 'params': [{'name': 'param', 'value': -1.0, 'disable': False}]}, | |||||
| {'id': 'min_gt', 'params': [{'name': 'param', 'value': 1e+32, 'disable': False}]}, | |||||
| {'id': 'min_lt', 'params': [{'name': 'param', 'value': -1e+32, 'disable': False}]}, | |||||
| {'id': 'max_min_gt', 'params': [{'name': 'param', 'value': 0, 'disable': False}]}, | |||||
| {'id': 'max_min_lt', 'params': [{'name': 'param', 'value': 0, 'disable': False}]}, | |||||
| {'id': 'mean_gt', 'params': [{'name': 'param', 'value': 0, 'disable': False}]}, | |||||
| {'id': 'mean_lt', 'params': [{'name': 'param', 'value': 0, 'disable': False}]}, | |||||
| {'id': 'inf', 'params': []}, | |||||
| {'id': 'overflow', 'params': []}, | |||||
| ] | ] | ||||
| for idx, condition in enumerate(conditions): | for idx, condition in enumerate(conditions): | ||||
| self._create_watchpoint(app_client, condition, idx + 1) | |||||
| create_watchpoint(app_client, condition, idx + 1) | |||||
| # delete 4-th watchpoint | # delete 4-th watchpoint | ||||
| url = 'delete_watchpoint' | url = 'delete_watchpoint' | ||||
| body_data = {'watch_point_id': 4} | body_data = {'watch_point_id': 4} | ||||
| @@ -125,7 +152,7 @@ class TestAscendDebugger: | |||||
| body_data = {'mode': 'watchpoint'} | body_data = {'mode': 'watchpoint'} | ||||
| expect_file = 'create_and_delete_watchpoint.json' | expect_file = 'create_and_delete_watchpoint.json' | ||||
| send_and_compare_result(app_client, url, body_data, expect_file) | send_and_compare_result(app_client, url, body_data, expect_file) | ||||
| self._send_terminate_cmd(app_client) | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | @pytest.mark.level0 | ||||
| @pytest.mark.env_single | @pytest.mark.env_single | ||||
| @@ -138,10 +165,9 @@ class TestAscendDebugger: | |||||
| watch_point_id = 1 | watch_point_id = 1 | ||||
| leaf_node_name = 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias' | leaf_node_name = 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias' | ||||
| with self._debugger_client.get_thread_instance(): | with self._debugger_client.get_thread_instance(): | ||||
| flag = check_waiting_state(app_client) | |||||
| assert flag is True | |||||
| condition = {'condition': 'INF'} | |||||
| self._create_watchpoint(app_client, condition, watch_point_id) | |||||
| check_waiting_state(app_client) | |||||
| condition = {'id': 'inf', 'params': []} | |||||
| create_watchpoint(app_client, condition, watch_point_id) | |||||
| # update watchpoint watchpoint list | # update watchpoint watchpoint list | ||||
| url = 'update_watchpoint' | url = 'update_watchpoint' | ||||
| body_data = {'watch_point_id': watch_point_id, | body_data = {'watch_point_id': watch_point_id, | ||||
| @@ -153,7 +179,7 @@ class TestAscendDebugger: | |||||
| body_data = {'name': leaf_node_name, 'watch_point_id': watch_point_id} | body_data = {'name': leaf_node_name, 'watch_point_id': watch_point_id} | ||||
| expect_file = 'search_unwatched_leaf_node.json' | expect_file = 'search_unwatched_leaf_node.json' | ||||
| send_and_compare_result(app_client, url, body_data, expect_file, method='get') | send_and_compare_result(app_client, url, body_data, expect_file, method='get') | ||||
| self._send_terminate_cmd(app_client) | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | @pytest.mark.level0 | ||||
| @pytest.mark.env_single | @pytest.mark.env_single | ||||
| @@ -164,18 +190,7 @@ class TestAscendDebugger: | |||||
| def test_watchpoint_hit(self, app_client): | def test_watchpoint_hit(self, app_client): | ||||
| """Test retrieve watchpoint hit.""" | """Test retrieve watchpoint hit.""" | ||||
| with self._debugger_client.get_thread_instance(): | with self._debugger_client.get_thread_instance(): | ||||
| flag = check_waiting_state(app_client) | |||||
| assert flag is True | |||||
| self._create_watchpoint(app_client, condition={'condition': 'INF'}, expect_id=1) | |||||
| # send run command to get watchpoint hit | |||||
| url = 'control' | |||||
| body_data = {'mode': 'continue', | |||||
| 'steps': 2} | |||||
| res = get_request_result(app_client, url, body_data) | |||||
| assert res == {'metadata': {'state': 'running'}} | |||||
| # wait for server has received watchpoint hit | |||||
| flag = check_waiting_state(app_client) | |||||
| assert flag is True | |||||
| create_watchpoint_and_wait(app_client) | |||||
| # check watchpoint hit list | # check watchpoint hit list | ||||
| url = 'retrieve' | url = 'retrieve' | ||||
| body_data = {'mode': 'watchpoint_hit'} | body_data = {'mode': 'watchpoint_hit'} | ||||
| @@ -188,11 +203,11 @@ class TestAscendDebugger: | |||||
| 'name': 'Default/TransData-op99', | 'name': 'Default/TransData-op99', | ||||
| 'single_node': True, | 'single_node': True, | ||||
| 'watch_point_id': 1 | 'watch_point_id': 1 | ||||
| } | |||||
| } | |||||
| } | } | ||||
| expect_file = 'retrieve_single_watchpoint_hit.json' | expect_file = 'retrieve_single_watchpoint_hit.json' | ||||
| send_and_compare_result(app_client, url, body_data, expect_file) | send_and_compare_result(app_client, url, body_data, expect_file) | ||||
| self._send_terminate_cmd(app_client) | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | @pytest.mark.level0 | ||||
| @pytest.mark.env_single | @pytest.mark.env_single | ||||
| @@ -204,8 +219,7 @@ class TestAscendDebugger: | |||||
| """Test retrieve tensor value.""" | """Test retrieve tensor value.""" | ||||
| node_name = 'Default/TransData-op99' | node_name = 'Default/TransData-op99' | ||||
| with self._debugger_client.get_thread_instance(): | with self._debugger_client.get_thread_instance(): | ||||
| flag = check_waiting_state(app_client) | |||||
| assert flag is True | |||||
| check_waiting_state(app_client) | |||||
| # prepare tensor value | # prepare tensor value | ||||
| url = 'retrieve_tensor_history' | url = 'retrieve_tensor_history' | ||||
| body_data = {'name': node_name} | body_data = {'name': node_name} | ||||
| @@ -226,7 +240,7 @@ class TestAscendDebugger: | |||||
| } | } | ||||
| expect_file = 'retrieve_tensor_value.json' | expect_file = 'retrieve_tensor_value.json' | ||||
| send_and_compare_result(app_client, url, body_data, expect_file, method='get') | send_and_compare_result(app_client, url, body_data, expect_file, method='get') | ||||
| self._send_terminate_cmd(app_client) | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | @pytest.mark.level0 | ||||
| @pytest.mark.env_single | @pytest.mark.env_single | ||||
| @@ -238,15 +252,13 @@ class TestAscendDebugger: | |||||
| """Test compare tensor value.""" | """Test compare tensor value.""" | ||||
| node_name = 'Default/args0' | node_name = 'Default/args0' | ||||
| with self._debugger_client.get_thread_instance(): | with self._debugger_client.get_thread_instance(): | ||||
| flag = check_waiting_state(app_client) | |||||
| assert flag is True | |||||
| check_waiting_state(app_client) | |||||
| # prepare tensor values | # prepare tensor values | ||||
| url = 'control' | url = 'control' | ||||
| body_data = {'mode': 'continue', | body_data = {'mode': 'continue', | ||||
| 'steps': 2} | 'steps': 2} | ||||
| get_request_result(app_client, url, body_data) | get_request_result(app_client, url, body_data) | ||||
| flag = check_waiting_state(app_client) | |||||
| assert flag is True | |||||
| check_waiting_state(app_client) | |||||
| get_request_result( | get_request_result( | ||||
| app_client=app_client, url='retrieve_tensor_history', body_data={'name': node_name}) | app_client=app_client, url='retrieve_tensor_history', body_data={'name': node_name}) | ||||
| res = get_request_result( | res = get_request_result( | ||||
| @@ -262,7 +274,7 @@ class TestAscendDebugger: | |||||
| } | } | ||||
| expect_file = 'compare_tensors.json' | expect_file = 'compare_tensors.json' | ||||
| send_and_compare_result(app_client, url, body_data, expect_file, method='get') | send_and_compare_result(app_client, url, body_data, expect_file, method='get') | ||||
| self._send_terminate_cmd(app_client) | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | @pytest.mark.level0 | ||||
| @pytest.mark.env_single | @pytest.mark.env_single | ||||
| @@ -277,12 +289,110 @@ class TestAscendDebugger: | |||||
| def test_retrieve_bfs_node(self, app_client, body_data, expect_file): | def test_retrieve_bfs_node(self, app_client, body_data, expect_file): | ||||
| """Test retrieve bfs node.""" | """Test retrieve bfs node.""" | ||||
| with self._debugger_client.get_thread_instance(): | with self._debugger_client.get_thread_instance(): | ||||
| flag = check_waiting_state(app_client) | |||||
| assert flag is True | |||||
| check_waiting_state(app_client) | |||||
| # prepare tensor values | # prepare tensor values | ||||
| url = 'retrieve_node_by_bfs' | url = 'retrieve_node_by_bfs' | ||||
| send_and_compare_result(app_client, url, body_data, expect_file, method='get') | send_and_compare_result(app_client, url, body_data, expect_file, method='get') | ||||
| self._send_terminate_cmd(app_client) | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| def test_pause(self, app_client): | |||||
| """Test pause the training.""" | |||||
| with self._debugger_client.get_thread_instance(): | |||||
| check_waiting_state(app_client) | |||||
| # send run command to execute to next node | |||||
| url = 'control' | |||||
| body_data = {'mode': 'continue', | |||||
| 'steps': -1} | |||||
| res = get_request_result(app_client, url, body_data) | |||||
| assert res == {'metadata': {'state': 'running', 'enable_recheck': False}} | |||||
| # send pause command | |||||
| url = 'control' | |||||
| body_data = {'mode': 'pause'} | |||||
| res = get_request_result(app_client, url, body_data) | |||||
| assert res == {'metadata': {'state': 'waiting', 'enable_recheck': False}} | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| @pytest.mark.parametrize("url, body_data, enable_recheck", [ | |||||
| ('create_watchpoint', | |||||
| {'condition': {'id': 'inf', 'params': []}, | |||||
| 'watch_nodes': ['Default']}, True), | |||||
| ('update_watchpoint', | |||||
| {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7'], | |||||
| 'mode': 0}, True), | |||||
| ('update_watchpoint', | |||||
| {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum'], | |||||
| 'mode': 1}, True), | |||||
| ('delete_watchpoint', {}, True) | |||||
| ]) | |||||
| def test_recheck(self, app_client, url, body_data, enable_recheck): | |||||
| """Test recheck.""" | |||||
| with self._debugger_client.get_thread_instance(): | |||||
| create_watchpoint_and_wait(app_client) | |||||
| # create watchpoint | |||||
| res = get_request_result(app_client, url, body_data, method='post') | |||||
| assert res['metadata']['enable_recheck'] is enable_recheck | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| def test_recommend_watchpoints(self, app_client): | |||||
| """Test generating recommended watchpoints.""" | |||||
| original_value = settings.ENABLE_RECOMMENDED_WATCHPOINTS | |||||
| settings.ENABLE_RECOMMENDED_WATCHPOINTS = True | |||||
| try: | |||||
| with self._debugger_client.get_thread_instance(): | |||||
| check_waiting_state(app_client) | |||||
| url = 'retrieve' | |||||
| body_data = {'mode': 'watchpoint'} | |||||
| expect_file = 'recommended_watchpoints_at_startup.json' | |||||
| send_and_compare_result(app_client, url, body_data, expect_file, method='post') | |||||
| send_terminate_cmd(app_client) | |||||
| finally: | |||||
| settings.ENABLE_RECOMMENDED_WATCHPOINTS = original_value | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| @pytest.mark.parametrize("body_data, expect_file", [ | |||||
| ({'tensor_name': 'Default/TransData-op99:0', 'graph_name': 'graph_0'}, 'retrieve_tensor_graph-0.json'), | |||||
| ({'tensor_name': 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias:0', 'graph_name': 'graph_0'}, | |||||
| 'retrieve_tensor_graph-1.json') | |||||
| ]) | |||||
| def test_retrieve_tensor_graph(self, app_client, body_data, expect_file): | |||||
| """Test retrieve tensor graph.""" | |||||
| url = 'tensor_graphs' | |||||
| with self._debugger_client.get_thread_instance(): | |||||
| create_watchpoint_and_wait(app_client) | |||||
| send_and_compare_result(app_client, url, body_data, expect_file, method='GET') | |||||
| send_terminate_cmd(app_client) | |||||
| class TestGPUDebugger: | |||||
| """Test debugger on Ascend backend.""" | |||||
| @classmethod | |||||
| def setup_class(cls): | |||||
| """Setup class.""" | |||||
| cls._debugger_client = MockDebuggerClient(backend='GPU') | |||||
| @pytest.mark.level0 | @pytest.mark.level0 | ||||
| @pytest.mark.env_single | @pytest.mark.env_single | ||||
| @@ -294,23 +404,21 @@ class TestAscendDebugger: | |||||
| """Test get next node on GPU.""" | """Test get next node on GPU.""" | ||||
| gpu_debugger_client = MockDebuggerClient(backend='GPU') | gpu_debugger_client = MockDebuggerClient(backend='GPU') | ||||
| with gpu_debugger_client.get_thread_instance(): | with gpu_debugger_client.get_thread_instance(): | ||||
| flag = check_waiting_state(app_client) | |||||
| assert flag is True | |||||
| check_waiting_state(app_client) | |||||
| # send run command to get watchpoint hit | # send run command to get watchpoint hit | ||||
| url = 'control' | url = 'control' | ||||
| body_data = {'mode': 'continue', | body_data = {'mode': 'continue', | ||||
| 'level': 'node', | 'level': 'node', | ||||
| 'name': 'Default/TransData-op99'} | 'name': 'Default/TransData-op99'} | ||||
| res = get_request_result(app_client, url, body_data) | res = get_request_result(app_client, url, body_data) | ||||
| assert res == {'metadata': {'state': 'running'}} | |||||
| assert res == {'metadata': {'state': 'running', 'enable_recheck': False}} | |||||
| # get metadata | # get metadata | ||||
| flag = check_waiting_state(app_client) | |||||
| assert flag is True | |||||
| check_waiting_state(app_client) | |||||
| url = 'retrieve' | url = 'retrieve' | ||||
| body_data = {'mode': 'all'} | body_data = {'mode': 'all'} | ||||
| expect_file = 'retrieve_next_node_on_gpu.json' | expect_file = 'retrieve_next_node_on_gpu.json' | ||||
| send_and_compare_result(app_client, url, body_data, expect_file) | send_and_compare_result(app_client, url, body_data, expect_file) | ||||
| self._send_terminate_cmd(app_client) | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | @pytest.mark.level0 | ||||
| @pytest.mark.env_single | @pytest.mark.env_single | ||||
| @@ -318,20 +426,245 @@ class TestAscendDebugger: | |||||
| @pytest.mark.platform_arm_ascend_training | @pytest.mark.platform_arm_ascend_training | ||||
| @pytest.mark.platform_x86_gpu_training | @pytest.mark.platform_x86_gpu_training | ||||
| @pytest.mark.platform_x86_ascend_training | @pytest.mark.platform_x86_ascend_training | ||||
| def test_pause(self, app_client): | |||||
| """Test pause the training.""" | |||||
| @pytest.mark.parametrize("url, body_data, enable_recheck", [ | |||||
| ('create_watchpoint', | |||||
| {'condition': {'id': 'inf', 'params': []}, | |||||
| 'watch_nodes': ['Default']}, False), | |||||
| ('create_watchpoint', | |||||
| {'condition': {'id': 'inf', 'params': []}, | |||||
| 'watch_nodes': ['Default/TransData-op99']}, True), | |||||
| ('update_watchpoint', | |||||
| {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7'], | |||||
| 'mode': 0}, True), | |||||
| ('update_watchpoint', | |||||
| {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum'], | |||||
| 'mode': 1}, False), | |||||
| ('update_watchpoint', | |||||
| [{'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum'], | |||||
| 'mode': 1}, | |||||
| {'watch_point_id': 1, 'watch_nodes': ['Default/optimizer-Momentum'], | |||||
| 'mode': 0} | |||||
| ], True), | |||||
| ('update_watchpoint', | |||||
| [{'watch_point_id': 1, 'watch_nodes': ['Default/TransData-op99'], | |||||
| 'mode': 0}, | |||||
| {'watch_point_id': 1, 'watch_nodes': ['Default/TransData-op99'], | |||||
| 'mode': 1} | |||||
| ], True), | |||||
| ('delete_watchpoint', {'watch_point_id': 1}, True) | |||||
| ]) | |||||
| def test_recheck_state(self, app_client, url, body_data, enable_recheck): | |||||
| """Test update watchpoint and check the value of enable_recheck.""" | |||||
| with self._debugger_client.get_thread_instance(): | with self._debugger_client.get_thread_instance(): | ||||
| flag = check_waiting_state(app_client) | |||||
| assert flag is True | |||||
| # send run command to execute to next node | |||||
| url = 'control' | |||||
| body_data = {'mode': 'continue', | |||||
| 'steps': -1} | |||||
| res = get_request_result(app_client, url, body_data) | |||||
| assert res == {'metadata': {'state': 'running'}} | |||||
| # send pause command | |||||
| url = 'control' | |||||
| body_data = {'mode': 'pause'} | |||||
| res = get_request_result(app_client, url, body_data) | |||||
| assert res == {'metadata': {'state': 'waiting'}} | |||||
| self._send_terminate_cmd(app_client) | |||||
| create_watchpoint_and_wait(app_client) | |||||
| if not isinstance(body_data, list): | |||||
| body_data = [body_data] | |||||
| for sub_body_data in body_data: | |||||
| res = get_request_result(app_client, url, sub_body_data, method='post') | |||||
| assert res['metadata']['enable_recheck'] is enable_recheck | |||||
| send_terminate_cmd(app_client) | |||||
| def test_get_conditions(self, app_client): | |||||
| """Test get conditions for gpu.""" | |||||
| url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/conditions' | |||||
| body_data = {} | |||||
| expect_file = 'get_conditions_for_gpu.json' | |||||
| with self._debugger_client.get_thread_instance(): | |||||
| check_waiting_state(app_client) | |||||
| send_and_compare_result(app_client, url, body_data, expect_file, method='get', full_url=True) | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| def test_recheck(self, app_client): | |||||
| """Test recheck request.""" | |||||
| with self._debugger_client.get_thread_instance(): | |||||
| create_watchpoint_and_wait(app_client) | |||||
| # send recheck when disable to do recheck | |||||
| get_request_result(app_client, 'recheck', {}, method='post', expect_code=400) | |||||
| # send recheck when enable to do recheck | |||||
| create_watchpoint(app_client, {'id': 'inf', 'params': []}, 2) | |||||
| res = get_request_result(app_client, 'recheck', {}, method='post') | |||||
| assert res['metadata']['enable_recheck'] is False | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| @pytest.mark.parametrize("filter_condition, expect_file", [ | |||||
| ({'name': 'fc', 'node_category': 'weight'}, 'search_weight.json'), | |||||
| ({'name': 'fc', 'node_category': 'gradient'}, 'search_gradient.json'), | |||||
| ({'node_category': 'activation'}, 'search_activation.json') | |||||
| ]) | |||||
| def test_search_by_category(self, app_client, filter_condition, expect_file): | |||||
| """Test recheck request.""" | |||||
| with self._debugger_client.get_thread_instance(): | |||||
| check_waiting_state(app_client) | |||||
| send_and_compare_result(app_client, 'search', filter_condition, expect_file, | |||||
| method='get') | |||||
| send_terminate_cmd(app_client) | |||||
| class TestMultiGraphDebugger: | |||||
| """Test debugger on Ascend backend.""" | |||||
| @classmethod | |||||
| def setup_class(cls): | |||||
| """Setup class.""" | |||||
| cls._debugger_client = MockDebuggerClient(backend='Ascend', graph_num=2) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| @pytest.mark.parametrize("body_data, expect_file", [ | |||||
| ({'mode': 'all'}, 'multi_retrieve_all.json'), | |||||
| ({'mode': 'node', 'params': {'name': 'Default', 'graph_name': 'graph_1'}}, 'retrieve_scope_node.json'), | |||||
| ({'mode': 'node', 'params': {'name': 'graph_0'}}, 'multi_retrieve_scope_node.json'), | |||||
| ({'mode': 'node', 'params': {'name': 'graph_0/Default/optimizer-Momentum/Parameter[18]_7'}}, | |||||
| 'multi_retrieve_aggregation_scope_node.json'), | |||||
| ({'mode': 'node', 'params': { | |||||
| 'name': 'graph_0/Default/TransData-op99', | |||||
| 'single_node': True}}, 'multi_retrieve_single_node.json'), | |||||
| ({'mode': 'node', 'params': { | |||||
| 'name': 'Default/TransData-op99', | |||||
| 'single_node': True, 'graph_name': 'graph_0'}}, 'retrieve_single_node.json') | |||||
| ]) | |||||
| def test_multi_retrieve_when_train_begin(self, app_client, body_data, expect_file): | |||||
| """Test retrieve when train_begin.""" | |||||
| url = 'retrieve' | |||||
| with self._debugger_client.get_thread_instance(): | |||||
| check_waiting_state(app_client) | |||||
| send_and_compare_result(app_client, url, body_data, expect_file) | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| @pytest.mark.parametrize("filter_condition, expect_file", [ | |||||
| ({'name': '', 'node_category': 'weight'}, 'search_weight_multi_graph.json'), | |||||
| ({'node_category': 'activation'}, 'search_activation_multi_graph.json') | |||||
| ]) | |||||
| def test_search_by_category_with_multi_graph(self, app_client, filter_condition, expect_file): | |||||
| """Test search by category request.""" | |||||
| with self._debugger_client.get_thread_instance(): | |||||
| check_waiting_state(app_client) | |||||
| send_and_compare_result(app_client, 'search', filter_condition, expect_file, method='get') | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| @pytest.mark.parametrize("filter_condition, expect_id", [ | |||||
| ({'condition': {'id': 'inf'}, | |||||
| 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7'], | |||||
| 'graph_name': 'graph_0'}, 1), | |||||
| ({'condition': {'id': 'inf'}, | |||||
| 'watch_nodes': ['graph_0/Default/optimizer-Momentum/ApplyMomentum[8]_1'], | |||||
| 'graph_name': None}, 1) | |||||
| ]) | |||||
| def test_create_watchpoint(self, app_client, filter_condition, expect_id): | |||||
| """Test create watchpoint with multiple graphs.""" | |||||
| url = 'create_watchpoint' | |||||
| with self._debugger_client.get_thread_instance(): | |||||
| check_waiting_state(app_client) | |||||
| res = get_request_result(app_client, url, filter_condition) | |||||
| assert res.get('id') == expect_id | |||||
| send_terminate_cmd(app_client) | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| @pytest.mark.parametrize("params, expect_file", [ | |||||
| ({'level': 'node'}, 'multi_next_node.json'), | |||||
| ({'level': 'node', 'node_name': 'graph_0/Default/TransData-op99'}, 'multi_next_node.json'), | |||||
| ({'level': 'node', 'node_name': 'Default/TransData-op99', 'graph_name': 'graph_0'}, | |||||
| 'multi_next_node.json') | |||||
| ]) | |||||
| def test_continue_on_gpu(self, app_client, params, expect_file): | |||||
| """Test get next node on GPU.""" | |||||
| gpu_debugger_client = MockDebuggerClient(backend='GPU', graph_num=2) | |||||
| original_value = settings.ENABLE_RECOMMENDED_WATCHPOINTS | |||||
| settings.ENABLE_RECOMMENDED_WATCHPOINTS = True | |||||
| try: | |||||
| with gpu_debugger_client.get_thread_instance(): | |||||
| check_waiting_state(app_client) | |||||
| # send run command to get watchpoint hit | |||||
| url = 'control' | |||||
| body_data = {'mode': 'continue'} | |||||
| body_data.update(params) | |||||
| res = get_request_result(app_client, url, body_data) | |||||
| assert res == {'metadata': {'state': 'running', 'enable_recheck': False}} | |||||
| # get metadata | |||||
| check_waiting_state(app_client) | |||||
| url = 'retrieve' | |||||
| body_data = {'mode': 'all'} | |||||
| send_and_compare_result(app_client, url, body_data, expect_file) | |||||
| send_terminate_cmd(app_client) | |||||
| finally: | |||||
| settings.ENABLE_RECOMMENDED_WATCHPOINTS = original_value | |||||
| @pytest.mark.level0 | |||||
| @pytest.mark.env_single | |||||
| @pytest.mark.platform_x86_cpu | |||||
| @pytest.mark.platform_arm_ascend_training | |||||
| @pytest.mark.platform_x86_gpu_training | |||||
| @pytest.mark.platform_x86_ascend_training | |||||
| @pytest.mark.parametrize("body_data, expect_file", [ | |||||
| ({'tensor_name': 'Default/TransData-op99:0', 'graph_name': 'graph_0'}, 'retrieve_tensor_hits-0.json'), | |||||
| ({'tensor_name': 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias:0', 'graph_name': 'graph_0'}, | |||||
| 'retrieve_tensor_hits-1.json') | |||||
| ]) | |||||
| def test_retrieve_tensor_hits(self, app_client, body_data, expect_file): | |||||
| """Test retrieve tensor graph.""" | |||||
| url = 'tensor_hits' | |||||
| with self._debugger_client.get_thread_instance(): | |||||
| check_waiting_state(app_client) | |||||
| send_and_compare_result(app_client, url, body_data, expect_file, method='GET') | |||||
| send_terminate_cmd(app_client) | |||||
| def create_watchpoint(app_client, condition, expect_id): | |||||
| """Create watchpoint.""" | |||||
| url = 'create_watchpoint' | |||||
| body_data = {'condition': condition, | |||||
| 'watch_nodes': ['Default/optimizer-Momentum/Parameter[18]_7', | |||||
| 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias', | |||||
| 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc1.bias', | |||||
| 'Default/TransData-op99']} | |||||
| res = get_request_result(app_client, url, body_data) | |||||
| assert res.get('id') == expect_id | |||||
| def create_watchpoint_and_wait(app_client): | |||||
| """Preparation for recheck.""" | |||||
| check_waiting_state(app_client) | |||||
| create_watchpoint(app_client, condition={'id': 'inf', 'params': []}, expect_id=1) | |||||
| # send run command to get watchpoint hit | |||||
| url = 'control' | |||||
| body_data = {'mode': 'continue', | |||||
| 'steps': 2} | |||||
| res = get_request_result(app_client, url, body_data) | |||||
| assert res == {'metadata': {'state': 'running', 'enable_recheck': False}} | |||||
| # wait for server has received watchpoint hit | |||||
| check_waiting_state(app_client) | |||||
| @@ -27,19 +27,24 @@ def check_waiting_state(app_client): | |||||
| body_data = {'mode': 'all'} | body_data = {'mode': 'all'} | ||||
| max_try_times = 30 | max_try_times = 30 | ||||
| count = 0 | count = 0 | ||||
| flag = False | |||||
| while count < max_try_times: | while count < max_try_times: | ||||
| res = get_request_result(app_client, url, body_data) | res = get_request_result(app_client, url, body_data) | ||||
| state = res.get('metadata', {}).get('state') | state = res.get('metadata', {}).get('state') | ||||
| if state == 'waiting': | if state == 'waiting': | ||||
| return True | |||||
| flag = True | |||||
| break | |||||
| count += 1 | count += 1 | ||||
| time.sleep(0.1) | time.sleep(0.1) | ||||
| return False | |||||
| assert flag is True | |||||
| def get_request_result(app_client, url, body_data, method='post', expect_code=200): | |||||
| def get_request_result(app_client, url, body_data, method='post', expect_code=200, full_url=False): | |||||
| """Get request result.""" | """Get request result.""" | ||||
| real_url = os.path.join(DEBUGGER_BASE_URL, url) | |||||
| if not full_url: | |||||
| real_url = os.path.join(DEBUGGER_BASE_URL, url) | |||||
| else: | |||||
| real_url = url | |||||
| if method == 'post': | if method == 'post': | ||||
| response = app_client.post(real_url, data=json.dumps(body_data)) | response = app_client.post(real_url, data=json.dumps(body_data)) | ||||
| else: | else: | ||||
| @@ -50,9 +55,9 @@ def get_request_result(app_client, url, body_data, method='post', expect_code=20 | |||||
| return res | return res | ||||
| def send_and_compare_result(app_client, url, body_data, expect_file=None, method='post'): | |||||
| def send_and_compare_result(app_client, url, body_data, expect_file=None, method='post', full_url=False): | |||||
| """Send and compare result.""" | """Send and compare result.""" | ||||
| res = get_request_result(app_client, url, body_data, method=method) | |||||
| res = get_request_result(app_client, url, body_data, method=method, full_url=full_url) | |||||
| delete_random_items(res) | delete_random_items(res) | ||||
| if expect_file: | if expect_file: | ||||
| real_path = os.path.join(DEBUGGER_EXPECTED_RESULTS, 'restful_results', expect_file) | real_path = os.path.join(DEBUGGER_EXPECTED_RESULTS, 'restful_results', expect_file) | ||||
| @@ -18,8 +18,6 @@ import os | |||||
| from google.protobuf import json_format | from google.protobuf import json_format | ||||
| from mindinsight.datavisual.data_transform.graph import NodeTypeEnum | |||||
| from mindinsight.debugger.common.utils import NodeBasicInfo | |||||
| from mindinsight.debugger.proto import ms_graph_pb2 | from mindinsight.debugger.proto import ms_graph_pb2 | ||||
| from mindinsight.debugger.stream_handler.graph_handler import GraphHandler | from mindinsight.debugger.stream_handler.graph_handler import GraphHandler | ||||
| from mindinsight.debugger.stream_handler.watchpoint_handler import WatchpointHitHandler | from mindinsight.debugger.stream_handler.watchpoint_handler import WatchpointHitHandler | ||||
| @@ -46,7 +44,7 @@ def init_graph_handler(): | |||||
| """Init GraphHandler.""" | """Init GraphHandler.""" | ||||
| graph = get_graph_proto() | graph = get_graph_proto() | ||||
| graph_handler = GraphHandler() | graph_handler = GraphHandler() | ||||
| graph_handler.put(graph) | |||||
| graph_handler.put({graph.name: graph}) | |||||
| return graph_handler | return graph_handler | ||||
| @@ -64,16 +62,10 @@ def get_node_basic_infos(node_names): | |||||
| if not node_names: | if not node_names: | ||||
| return [] | return [] | ||||
| graph_stream = init_graph_handler() | graph_stream = init_graph_handler() | ||||
| graph_name = graph_stream.graph_names[0] | |||||
| node_infos = [] | node_infos = [] | ||||
| for node_name in node_names: | for node_name in node_names: | ||||
| node_type = graph_stream.get_node_type(node_name) | |||||
| if node_type == NodeTypeEnum.AGGREGATION_SCOPE.value: | |||||
| sub_nodes = graph_stream.get_nodes_by_scope(node_name) | |||||
| sub_infos = [NodeBasicInfo(name=node.name, full_name=node.full_name, type=node.type) | |||||
| for node in sub_nodes] | |||||
| node_infos.extend(sub_infos) | |||||
| full_name = graph_stream.get_full_name(node_name) | |||||
| node_infos.append(NodeBasicInfo(name=node_name, full_name=full_name, type=node_type)) | |||||
| node_infos.append(graph_stream.get_node_basic_info(node_name, graph_name)) | |||||
| return node_infos | return node_infos | ||||
| @@ -81,13 +73,10 @@ def get_watch_nodes_by_search(watch_nodes): | |||||
| """Get watched leaf nodes by search name.""" | """Get watched leaf nodes by search name.""" | ||||
| watched_leaf_nodes = [] | watched_leaf_nodes = [] | ||||
| graph_stream = init_graph_handler() | graph_stream = init_graph_handler() | ||||
| graph_name = graph_stream.graph_names[0] | |||||
| for search_name in watch_nodes: | for search_name in watch_nodes: | ||||
| search_nodes = graph_stream.get_searched_node_list() | |||||
| search_node_names = [ | |||||
| NodeBasicInfo(name=node.name, full_name=node.full_name, type=node.type) | |||||
| for node in search_nodes | |||||
| if node.name.startswith(search_name)] | |||||
| watched_leaf_nodes.extend(search_node_names) | |||||
| search_node_info = graph_stream.get_node_basic_info_by_scope(search_name, graph_name) | |||||
| watched_leaf_nodes.extend(search_node_info) | |||||
| return watched_leaf_nodes | return watched_leaf_nodes | ||||
| @@ -141,7 +130,7 @@ def mock_tensor_history(): | |||||
| return tensor_history | return tensor_history | ||||
| def compare_debugger_result_with_file(res, expect_file): | |||||
| def compare_debugger_result_with_file(res, expect_file, save=False): | |||||
| """ | """ | ||||
| Compare debugger result with file. | Compare debugger result with file. | ||||
| @@ -150,4 +139,8 @@ def compare_debugger_result_with_file(res, expect_file): | |||||
| expect_file: The expected file name. | expect_file: The expected file name. | ||||
| """ | """ | ||||
| real_path = os.path.join(DEBUGGER_EXPECTED_RESULTS, expect_file) | real_path = os.path.join(DEBUGGER_EXPECTED_RESULTS, expect_file) | ||||
| compare_result_with_file(res, real_path) | |||||
| if save: | |||||
| with open(real_path, 'w') as file_handler: | |||||
| json.dump(res, file_handler) | |||||
| else: | |||||
| compare_result_with_file(res, real_path) | |||||
| @@ -1 +1 @@ | |||||
| {"metadata": {"state": "waiting", "step": 0, "device_name": "", "pos": "0", "ip": "", "node_name": "", "backend": ""}, "graph": {}, "watch_points": []} | |||||
| {"metadata": {"state": "waiting", "step": 0, "device_name": "", "pos": "0", "ip": "", "node_name": "", "backend": "", "enable_recheck": false, "graph_name": ""}, "graph": {}, "watch_points": []} | |||||
| @@ -1 +1,36 @@ | |||||
| {"tensor_history": [{"name": "Default/TransData-op99:0", "full_name": "Default/TransData-op99:0", "node_type": "TransData", "type": "output", "step": 0, "dtype": "DT_FLOAT32", "shape": [2, 3], "has_prev_step": false, "value": "click to view"}, {"name": "Default/args0:0", "full_name": "Default/args0:0", "node_type": "Parameter", "type": "input", "step": 0, "dtype": "DT_FLOAT32", "shape": [2, 3], "has_prev_step": false, "value": "click to view"}], "metadata": {"state": "waiting", "step": 0, "device_name": "", "pos": "0", "ip": "", "node_name": "", "backend": ""}} | |||||
| { | |||||
| "tensor_history": [ | |||||
| { | |||||
| "name": "Default/TransData-op99:0", | |||||
| "full_name": "Default/TransData-op99:0", | |||||
| "node_type": "TransData", | |||||
| "type": "output", | |||||
| "step": 0, | |||||
| "dtype": "DT_FLOAT32", | |||||
| "shape": [ | |||||
| 2, | |||||
| 3 | |||||
| ], | |||||
| "has_prev_step": false, | |||||
| "value": "click to view" | |||||
| }, | |||||
| { | |||||
| "name": "Default/args0:0", | |||||
| "full_name": "Default/args0:0", | |||||
| "node_type": "Parameter", | |||||
| "type": "input", | |||||
| "step": 0, | |||||
| "dtype": "DT_FLOAT32", | |||||
| "shape": [ | |||||
| 2, | |||||
| 3 | |||||
| ], | |||||
| "has_prev_step": false, | |||||
| "value": "click to view" | |||||
| } | |||||
| ], | |||||
| "metadata": { | |||||
| "state": "waiting", | |||||
| "step": 0 | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,197 @@ | |||||
| { | |||||
| "graph": { | |||||
| "nodes": [ | |||||
| { | |||||
| "name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/args1", | |||||
| "full_name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/args1", | |||||
| "type": "Parameter", | |||||
| "input": {}, | |||||
| "output": { | |||||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_INT32]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst1", | |||||
| "full_name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst1", | |||||
| "type": "Const", | |||||
| "input": {}, | |||||
| "output": { | |||||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst2", | |||||
| "full_name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst2", | |||||
| "type": "Const", | |||||
| "input": {}, | |||||
| "output": { | |||||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/SoftmaxCrossEntropyWithLogits-op18", | |||||
| "full_name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/SoftmaxCrossEntropyWithLogits-op18", | |||||
| "type": "SoftmaxCrossEntropyWithLogits", | |||||
| "input": { | |||||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 10 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "output": {}, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| }, | |||||
| { | |||||
| "slot": "1" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0", | |||||
| "full_name": "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0", | |||||
| "type": "OneHot", | |||||
| "input": { | |||||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/args1": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_INT32]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| }, | |||||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst1": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| }, | |||||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst2": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "output": { | |||||
| "Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/SoftmaxCrossEntropyWithLogits-op18": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 10 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| } | |||||
| ] | |||||
| } | |||||
| ] | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,176 @@ | |||||
| { | |||||
| "graph": { | |||||
| "nodes": [ | |||||
| { | |||||
| "name": "Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Cast-op201", | |||||
| "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Cast-op201", | |||||
| "type": "Cast", | |||||
| "input": {}, | |||||
| "output": { | |||||
| "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLUV2-op89": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 16, | |||||
| 10, | |||||
| 10 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "name": "Default/tuple_getitem[10]_0/tuple_getitem-op203", | |||||
| "full_name": "Default/tuple_getitem-op203", | |||||
| "type": "tuple_getitem", | |||||
| "input": { | |||||
| "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLUV2-op89": { | |||||
| "shape": [ | |||||
| [], | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TUPLE['DT_TENSOR[DT_FLOAT32]', 'DT_TENSOR[DT_UINT8]']", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ], | |||||
| [ | |||||
| "1", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "output": {}, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "name": "Default/tuple_getitem[10]_0/tuple_getitem-op202", | |||||
| "full_name": "Default/tuple_getitem-op202", | |||||
| "type": "tuple_getitem", | |||||
| "input": { | |||||
| "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLUV2-op89": { | |||||
| "shape": [ | |||||
| [], | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TUPLE['DT_TENSOR[DT_FLOAT32]', 'DT_TENSOR[DT_UINT8]']", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ], | |||||
| [ | |||||
| "1", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "output": {}, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "name": "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLUV2-op89", | |||||
| "full_name": "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLUV2-op89", | |||||
| "type": "ReLUV2", | |||||
| "input": { | |||||
| "Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/Cast-op201": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 16, | |||||
| 10, | |||||
| 10 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_FLOAT32]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "output": { | |||||
| "Default/tuple_getitem[10]_0/tuple_getitem-op203": { | |||||
| "shape": [ | |||||
| [], | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TUPLE['DT_TENSOR[DT_FLOAT32]', 'DT_TENSOR[DT_UINT8]']", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ], | |||||
| [ | |||||
| "1", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| }, | |||||
| "Default/tuple_getitem[10]_0/tuple_getitem-op202": { | |||||
| "shape": [ | |||||
| [], | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TUPLE['DT_TENSOR[DT_FLOAT32]', 'DT_TENSOR[DT_UINT8]']", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ], | |||||
| [ | |||||
| "1", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| }, | |||||
| { | |||||
| "slot": "1" | |||||
| } | |||||
| ] | |||||
| } | |||||
| ] | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1,166 @@ | |||||
| { | |||||
| "graph": { | |||||
| "nodes": [ | |||||
| { | |||||
| "name": "Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/MaxPoolWithArgmax-op7", | |||||
| "full_name": "Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/MaxPoolWithArgmax-op7", | |||||
| "type": "MaxPoolWithArgmax", | |||||
| "input": {}, | |||||
| "output": { | |||||
| "Default/tuple_getitem[10]_0/tuple_getitem-op206": { | |||||
| "shape": [ | |||||
| [], | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TUPLE['DT_TENSOR[DT_FLOAT32]', 'DT_TENSOR[DT_UINT16]']", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ], | |||||
| [ | |||||
| "1", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| }, | |||||
| { | |||||
| "slot": "1" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "name": "Default/tuple_getitem[10]_0/cst28", | |||||
| "full_name": "Default/tuple_getitem[10]_0/cst28", | |||||
| "type": "Const", | |||||
| "input": {}, | |||||
| "output": { | |||||
| "Default/tuple_getitem[10]_0/tuple_getitem-op206": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPoolWithArgmax/MaxPoolGradWithArgmax-op46", | |||||
| "full_name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPoolWithArgmax/MaxPoolGradWithArgmax-op46", | |||||
| "type": "MaxPoolGradWithArgmax", | |||||
| "input": { | |||||
| "Default/tuple_getitem[10]_0/tuple_getitem-op206": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 16, | |||||
| 4, | |||||
| 3 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_UINT16]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "output": {}, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| } | |||||
| ] | |||||
| }, | |||||
| { | |||||
| "name": "Default/tuple_getitem[10]_0/tuple_getitem-op206", | |||||
| "full_name": "Default/tuple_getitem-op206", | |||||
| "type": "tuple_getitem", | |||||
| "input": { | |||||
| "Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/MaxPoolWithArgmax-op7": { | |||||
| "shape": [ | |||||
| [], | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TUPLE['DT_TENSOR[DT_FLOAT32]', 'DT_TENSOR[DT_UINT16]']", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ], | |||||
| [ | |||||
| "1", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| }, | |||||
| "Default/tuple_getitem[10]_0/cst28": { | |||||
| "shape": [ | |||||
| [] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "output": { | |||||
| "Gradients/Default/network-WithLossCell/_backbone-LeNet5/max_pool2d-MaxPool2d/gradMaxPoolWithArgmax/MaxPoolGradWithArgmax-op46": { | |||||
| "shape": [ | |||||
| [ | |||||
| 32, | |||||
| 16, | |||||
| 4, | |||||
| 3 | |||||
| ] | |||||
| ], | |||||
| "edge_type": "data", | |||||
| "independent_layout": false, | |||||
| "data_type": "DT_TENSOR[DT_UINT16]", | |||||
| "slot_mapping": [ | |||||
| [ | |||||
| "0", | |||||
| "" | |||||
| ] | |||||
| ] | |||||
| } | |||||
| }, | |||||
| "slots": [ | |||||
| { | |||||
| "slot": "0" | |||||
| } | |||||
| ] | |||||
| } | |||||
| ] | |||||
| } | |||||
| } | |||||
| @@ -0,0 +1 @@ | |||||
| {"node_names": ["Default/network-WithLossCell/_backbone-LeNet5/conv2-Conv2d/conv2.weight", "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/conv1.weight", "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.weight", "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc1.bias", "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.weight", "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc2.bias", "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.weight", "Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/Parameter[6]_6/fc3.bias"]} | |||||
| @@ -0,0 +1 @@ | |||||
| {"node_names": ["Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op12", "Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLU-op15"]} | |||||
| @@ -1 +0,0 @@ | |||||
| {"tensor_history": [{"name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190:0", "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190:0", "node_type": "Cast", "type": "output"}, {"name": "Default/TransData-op99:0", "full_name": "Default/TransData-op99:0", "node_type": "TransData", "type": "input"}]} | |||||
| @@ -0,0 +1,18 @@ | |||||
| { | |||||
| "tensor_history": [ | |||||
| { | |||||
| "name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190:0", | |||||
| "full_name": "Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190:0", | |||||
| "node_type": "Cast", | |||||
| "type": "output", | |||||
| "graph_name": "kernel_graph_0" | |||||
| }, | |||||
| { | |||||
| "name": "Default/TransData-op99:0", | |||||
| "full_name": "Default/TransData-op99:0", | |||||
| "node_type": "TransData", | |||||
| "type": "input", | |||||
| "graph_name": "kernel_graph_0" | |||||
| } | |||||
| ] | |||||
| } | |||||
| @@ -1 +1,53 @@ | |||||
| {"tensor_history": [{"name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22:0", "full_name": "Default/optimizer-Momentum/ApplyMomentum-op22:0", "node_type": "ApplyMomentum", "type": "output"}, {"name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22:1", "full_name": "Default/optimizer-Momentum/ApplyMomentum-op22:1", "node_type": "ApplyMomentum", "type": "output"}, {"name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op21:0", "full_name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op21:0", "node_type": "BiasAddGrad", "type": "input"}, {"name": "Default/optimizer-Momentum/Parameter[18]_7/fc3.bias:0", "full_name": "Default/optimizer-Momentum/Parameter[18]_7/fc3.bias:0", "node_type": "Parameter", "type": "input"}, {"name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias:0", "full_name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias:0", "node_type": "Parameter", "type": "input"}, {"name": "Default/optimizer-Momentum/Parameter[18]_7/learning_rate:0", "full_name": "Default/optimizer-Momentum/Parameter[18]_7/learning_rate:0", "node_type": "Parameter", "type": "input"}, {"name": "Default/optimizer-Momentum/Parameter[18]_7/momentum:0", "full_name": "Default/optimizer-Momentum/Parameter[18]_7/momentum:0", "node_type": "Parameter", "type": "input"}]} | |||||
| { | |||||
| "tensor_history": [ | |||||
| { | |||||
| "name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22:0", | |||||
| "full_name": "Default/optimizer-Momentum/ApplyMomentum-op22:0", | |||||
| "node_type": "ApplyMomentum", | |||||
| "type": "output", | |||||
| "graph_name": "kernel_graph_0" | |||||
| }, | |||||
| { | |||||
| "name": "Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22:1", | |||||
| "full_name": "Default/optimizer-Momentum/ApplyMomentum-op22:1", | |||||
| "node_type": "ApplyMomentum", | |||||
| "type": "output", | |||||
| "graph_name": "kernel_graph_0" | |||||
| }, | |||||
| { | |||||
| "name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op21:0", | |||||
| "full_name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/fc3-Dense/gradBiasAdd/BiasAddGrad-op21:0", | |||||
| "node_type": "BiasAddGrad", | |||||
| "type": "input", | |||||
| "graph_name": "kernel_graph_0" | |||||
| }, | |||||
| { | |||||
| "name": "Default/optimizer-Momentum/Parameter[18]_7/fc3.bias:0", | |||||
| "full_name": "Default/optimizer-Momentum/Parameter[18]_7/fc3.bias:0", | |||||
| "node_type": "Parameter", | |||||
| "type": "input", | |||||
| "graph_name": "kernel_graph_0" | |||||
| }, | |||||
| { | |||||
| "name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias:0", | |||||
| "full_name": "Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias:0", | |||||
| "node_type": "Parameter", | |||||
| "type": "input", | |||||
| "graph_name": "kernel_graph_0" | |||||
| }, | |||||
| { | |||||
| "name": "Default/optimizer-Momentum/Parameter[18]_7/learning_rate:0", | |||||
| "full_name": "Default/optimizer-Momentum/Parameter[18]_7/learning_rate:0", | |||||
| "node_type": "Parameter", | |||||
| "type": "input", | |||||
| "graph_name": "kernel_graph_0" | |||||
| }, | |||||
| { | |||||
| "name": "Default/optimizer-Momentum/Parameter[18]_7/momentum:0", | |||||
| "full_name": "Default/optimizer-Momentum/Parameter[18]_7/momentum:0", | |||||
| "node_type": "Parameter", | |||||
| "type": "input", | |||||
| "graph_name": "kernel_graph_0" | |||||
| } | |||||
| ] | |||||
| } | |||||
| @@ -3,31 +3,23 @@ | |||||
| "watchCondition": { | "watchCondition": { | ||||
| "condition": "inf" | "condition": "inf" | ||||
| }, | }, | ||||
| "id": 1 | |||||
| "id": 1, | |||||
| "watch_nodes_num": 0 | |||||
| }, | }, | ||||
| { | { | ||||
| "watchCondition": { | "watchCondition": { | ||||
| "condition": "inf" | "condition": "inf" | ||||
| }, | }, | ||||
| "id": 2, | "id": 2, | ||||
| "watchNodes": [ | |||||
| { | |||||
| "nodeName": "Default", | |||||
| "nodeType": "scope" | |||||
| } | |||||
| ] | |||||
| "watch_nodes_num": 172 | |||||
| }, | }, | ||||
| { | { | ||||
| "watchCondition": { | "watchCondition": { | ||||
| "condition": "max_gt", | "condition": "max_gt", | ||||
| "value": 1.0 | |||||
| "params": [{"name": "param", "value": 1}], | |||||
| "value": 1 | |||||
| }, | }, | ||||
| "id": 3, | "id": 3, | ||||
| "watchNodes": [ | |||||
| { | |||||
| "nodeName": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92", | |||||
| "nodeType": "leaf" | |||||
| } | |||||
| ] | |||||
| "watch_nodes_num": 1 | |||||
| } | } | ||||
| ] | ] | ||||
| @@ -1 +1 @@ | |||||
| [{"id": 1, "watch_condition": {"condition": "INF"}}, {"id": 2, "watch_condition": {"condition": "INF"}}, {"id": 3, "watch_condition": {"condition": "MAX_GT", "param": 1}}] | |||||
| [{"id": 1, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}, {"id": 2, "watch_condition": {"id": "inf", "params": [], "abbr": "INF"}}, {"id": 3, "watch_condition": {"id": "max_gt", "params": [{"name": "param", "value": 1, "disable": false}], "abbr": "MAX>"}}] | |||||
| @@ -1 +1,22 @@ | |||||
| {"watch_point_hits": [{"node_name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92", "watch_points": [{"id": 1, "watch_condition": {"condition": "MAX_GT", "param": 1}}]}]} | |||||
| { | |||||
| "watch_point_hits": [ | |||||
| { | |||||
| "node_name": "Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92", | |||||
| "tensors": [ | |||||
| { | |||||
| "slot": "0", | |||||
| "watch_points": [ | |||||
| { | |||||
| "id": 1, | |||||
| "watch_condition": { | |||||
| "condition": "MAX_GT", | |||||
| "param": 1 | |||||
| } | |||||
| } | |||||
| ] | |||||
| } | |||||
| ], | |||||
| "graph_name": "kernel_graph_0" | |||||
| } | |||||
| ] | |||||
| } | |||||
| @@ -0,0 +1,15 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """Test for debugger stream cache.""" | |||||
| @@ -0,0 +1,77 @@ | |||||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||||
| # | |||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||||
| # you may not use this file except in compliance with the License. | |||||
| # You may obtain a copy of the License at | |||||
| # | |||||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||||
| # | |||||
| # Unless required by applicable law or agreed to in writing, software | |||||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
| # See the License for the specific language governing permissions and | |||||
| # limitations under the License. | |||||
| # ============================================================================ | |||||
| """ | |||||
| Function: | |||||
| Test query debugger node type identifier. | |||||
| Usage: | |||||
| pytest tests/ut/debugger | |||||
| """ | |||||
| from unittest.mock import MagicMock | |||||
| import pytest | |||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | |||||
| from mindinsight.debugger.stream_cache.node_type_identifier import NodeTypeIdentifier | |||||
| class TestNodeTypeIdentifier: | |||||
| """Test NodeTypeIdentifier.""" | |||||
| @pytest.mark.parametrize("name, node_type, result", [ | |||||
| ('Default/mock/node_name.bias', "Parameter", True), | |||||
| ('Default/mock/node_name.weight', "Parameter", True), | |||||
| ('Gradients/mock/node_name.bias', "Parameter", False), | |||||
| ('Default/optimizer-mock/node_name.bias', "Parameter", False), | |||||
| ]) | |||||
| def test_weight_node(self, name, node_type, result): | |||||
| """Test weight node.""" | |||||
| identifier = NodeTypeIdentifier('weight') | |||||
| mock_node = MagicMock(type=node_type) | |||||
| mock_node.name = name | |||||
| res = identifier.is_match(mock_node) | |||||
| assert res is result | |||||
| @pytest.mark.parametrize("name, node_type, result", [ | |||||
| ('Default/mock/node_name.bias', "Parameter", False), | |||||
| ('Gradients/mock/node_name.bias', "Parameter", False), | |||||
| ('Gradients/mock-mock/node_name.bias', "ReluGrad", True), | |||||
| ]) | |||||
| def test_gradient_node(self, name, node_type, result): | |||||
| """Test gradient node.""" | |||||
| identifier = NodeTypeIdentifier('gradient') | |||||
| mock_node = MagicMock(type=node_type) | |||||
| mock_node.name = name | |||||
| res = identifier.is_match(mock_node) | |||||
| assert res is result | |||||
| @pytest.mark.parametrize("name, node_type, condition, result", [ | |||||
| ('Default/mock/relu_ReLU-op11', "ReLU", None, True), | |||||
| ('Gradients/mock/relu_ReLU-op11', "ReLU", None, False), | |||||
| ('Default/mock/relu_ReLU-op11', "Parameter", None, False), | |||||
| ('Default/mock/relu_ReLU-op11', "ReLU", {'activation_func': 'Softmax'}, False), | |||||
| ('Default/mock/relu_ReLU-op11', "Softmax", {'activation_func': ['ReLU', 'Softmax']}, True) | |||||
| ]) | |||||
| def test_activate_node(self, name, node_type, condition, result): | |||||
| """Test activate node.""" | |||||
| identifier = NodeTypeIdentifier('activation') | |||||
| mock_node = MagicMock(type=node_type) | |||||
| mock_node.name = name | |||||
| res = identifier.is_match(mock_node, condition) | |||||
| assert res is result | |||||
| def test_invalid_func(self): | |||||
| """Test invalid func.""" | |||||
| with pytest.raises(DebuggerParamValueError, match='Invalid identify type.'): | |||||
| NodeTypeIdentifier('invalid_type') | |||||
| @@ -22,7 +22,7 @@ import os | |||||
| import pytest | import pytest | ||||
| from tests.ut.debugger.configurations import init_graph_handler | |||||
| from tests.ut.debugger.configurations import init_graph_handler, compare_debugger_result_with_file | |||||
| from tests.utils.tools import compare_result_with_file | from tests.utils.tools import compare_result_with_file | ||||
| @@ -46,11 +46,12 @@ class TestGraphHandler: | |||||
| """Test get.""" | """Test get.""" | ||||
| result = self.graph_handler.get(filter_condition) | result = self.graph_handler.get(filter_condition) | ||||
| file_path = os.path.join(self.graph_results_dir, result_file) | file_path = os.path.join(self.graph_results_dir, result_file) | ||||
| compare_debugger_result_with_file(result, file_path, True) | |||||
| compare_result_with_file(result, file_path) | compare_result_with_file(result, file_path) | ||||
| @pytest.mark.parametrize("node_name, result_file", [ | @pytest.mark.parametrize("node_name, result_file", [ | ||||
| ("Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", | ("Default/network-WithLossCell/_backbone-LeNet5/conv1-Conv2d/Cast-op190", | ||||
| "tenor_hist_0.json"), | |||||
| "tensor_hist_0.json"), | |||||
| ("Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22", | ("Default/optimizer-Momentum/ApplyMomentum[8]_1/ApplyMomentum-op22", | ||||
| "tensor_hist_1.json") | "tensor_hist_1.json") | ||||
| ]) | ]) | ||||
| @@ -66,10 +67,22 @@ class TestGraphHandler: | |||||
| ]) | ]) | ||||
| def test_search_nodes(self, pattern, result_file): | def test_search_nodes(self, pattern, result_file): | ||||
| """Test search nodes.""" | """Test search nodes.""" | ||||
| result = self.graph_handler.search_nodes(pattern) | |||||
| result = self.graph_handler.search_nodes({'name': pattern}) | |||||
| file_path = os.path.join(self.graph_results_dir, result_file) | file_path = os.path.join(self.graph_results_dir, result_file) | ||||
| compare_result_with_file(result, file_path) | compare_result_with_file(result, file_path) | ||||
| @pytest.mark.parametrize("node_type, condition, result_file", [ | |||||
| ("weight", None, "search_nodes_by_type_0.json"), | |||||
| ("activation", {'activation_func': ['ReLU', 'Softmax']}, "search_nodes_by_type_1.json") | |||||
| ]) | |||||
| def test_search_nodes_by_type(self, node_type, condition, result_file): | |||||
| """Test search nodes by type.""" | |||||
| search_nodes = self.graph_handler.get_searched_node_list( | |||||
| {'node_category': node_type, 'condition': condition}, 'kernel_graph_0') | |||||
| file_path = os.path.join(self.graph_results_dir, result_file) | |||||
| result = {'node_names': [node.name for node in search_nodes]} | |||||
| compare_result_with_file(result, file_path) | |||||
| @pytest.mark.parametrize("node_name, expect_type", [ | @pytest.mark.parametrize("node_name, expect_type", [ | ||||
| ("Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst1", 'Const'), | ("Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/cst1", 'Const'), | ||||
| ("Default/TransData-op99", "TransData") | ("Default/TransData-op99", "TransData") | ||||
| @@ -96,7 +109,7 @@ class TestGraphHandler: | |||||
| ]) | ]) | ||||
| def test_get_node_name_by_full_name(self, full_name, expect_node_name): | def test_get_node_name_by_full_name(self, full_name, expect_node_name): | ||||
| """Test get node name by full name.""" | """Test get node name by full name.""" | ||||
| node_name = self.graph_handler.get_node_name_by_full_name(full_name) | |||||
| node_name = self.graph_handler.get_node_name_by_full_name(full_name, 'kernel_graph_0') | |||||
| assert node_name == expect_node_name | assert node_name == expect_node_name | ||||
| @pytest.mark.parametrize("node_name, ascend, expect_next", [ | @pytest.mark.parametrize("node_name, ascend, expect_next", [ | ||||
| @@ -112,3 +125,13 @@ class TestGraphHandler: | |||||
| """Test get node by BFS order.""" | """Test get node by BFS order.""" | ||||
| next_node = self.graph_handler.get_node_by_bfs_order(node_name, ascend) | next_node = self.graph_handler.get_node_by_bfs_order(node_name, ascend) | ||||
| assert next_node == expect_next | assert next_node == expect_next | ||||
| @pytest.mark.parametrize("tensor_name, expect_file", [ | |||||
| ("Default/network-WithLossCell/_loss_fn-SoftmaxCrossEntropyWithLogits/OneHot-op0:0", "get_tensor_graph-0.json"), | |||||
| ("Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/ReLUV2-op89:1", "get_tensor_graph-1.json"), | |||||
| ("Default/tuple_getitem[10]_0/tuple_getitem-op206:1", "get_tensor_graph-2.json"), | |||||
| ]) | |||||
| def test_get_tensor_graph(self, tensor_name, expect_file): | |||||
| """Test get tensor graph.""" | |||||
| res = self.graph_handler.get_tensor_graph(tensor_name, None) | |||||
| compare_debugger_result_with_file(res, expect_file=os.path.join('graph', expect_file)) | |||||
| @@ -14,11 +14,10 @@ | |||||
| # ============================================================================ | # ============================================================================ | ||||
| """Test tensor_handler.py""" | """Test tensor_handler.py""" | ||||
| from unittest import mock | from unittest import mock | ||||
| from unittest.mock import MagicMock | |||||
| import pytest | import pytest | ||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.stream_handler.tensor_handler import TensorHandler | from mindinsight.debugger.stream_handler.tensor_handler import TensorHandler | ||||
| @@ -41,34 +40,6 @@ class TestTensorHandler: | |||||
| self.tensor_handler.get(filter_condition) | self.tensor_handler.get(filter_condition) | ||||
| assert "No tensor named {}".format(None) in str(ex.value) | assert "No tensor named {}".format(None) in str(ex.value) | ||||
| @mock.patch.object(TensorHandler, '_get_prev_tensor_value_status') | |||||
| @pytest.mark.parametrize( | |||||
| "node_type, tensor_name, tensor_info", [('Parameter', 'name', {'full_name': 'name', 'step': 1})]) | |||||
| def test_update_has_prev_step_field(self, mock_get_pre, node_type, tensor_name, tensor_info): | |||||
| """Test update has_prev_step field in tensor info.""" | |||||
| mock_get_pre.return_value = True | |||||
| res = self.tensor_handler._update_has_prev_step_field(tensor_info, tensor_name, node_type) | |||||
| assert res | |||||
| def test_get_prev_tensor_value_status_none(self): | |||||
| """ | |||||
| test _get_prev_tensor_value_status. | |||||
| """ | |||||
| res = self.tensor_handler._get_prev_tensor_value_status('tensor_name') | |||||
| assert res is None | |||||
| @mock.patch.object(TensorHandler, '_get_tensor') | |||||
| def test_get_prev_tensor_value_status_false(self, mock_get_tensor): | |||||
| """ | |||||
| test _get_prev_tensor_value_status. | |||||
| """ | |||||
| self.tensor_handler._cur_step = 1 | |||||
| mock_tensor = MagicMock() | |||||
| mock_tensor.value = None | |||||
| mock_get_tensor.return_value = mock_tensor | |||||
| res = self.tensor_handler._get_prev_tensor_value_status('tensor_name') | |||||
| assert not res | |||||
| def test_get_tensor_value_by_name_none(self): | def test_get_tensor_value_by_name_none(self): | ||||
| """Test get_tensor_value_by_name.""" | """Test get_tensor_value_by_name.""" | ||||
| res = self.tensor_handler.get_tensor_value_by_name('tensor_name', True) | res = self.tensor_handler.get_tensor_value_by_name('tensor_name', True) | ||||
| @@ -22,47 +22,56 @@ import json | |||||
| import os | import os | ||||
| from unittest import mock, TestCase | from unittest import mock, TestCase | ||||
| from google.protobuf import json_format | |||||
| import pytest | import pytest | ||||
| from google.protobuf import json_format | |||||
| from mindinsight.conditionmgr.conditionmgr import ConditionMgr | |||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | ||||
| DebuggerParamTypeError | DebuggerParamTypeError | ||||
| from mindinsight.debugger.common.log import logger as log | |||||
| from mindinsight.debugger.common.log import LOGGER as log | |||||
| from mindinsight.debugger.stream_cache.watchpoint import Watchpoint | from mindinsight.debugger.stream_cache.watchpoint import Watchpoint | ||||
| from mindinsight.debugger.stream_handler.watchpoint_handler import WatchpointHandler, \ | from mindinsight.debugger.stream_handler.watchpoint_handler import WatchpointHandler, \ | ||||
| WatchpointHitHandler, validate_watch_condition, validate_watch_condition_params | WatchpointHitHandler, validate_watch_condition, validate_watch_condition_params | ||||
| from tests.ut.debugger.configurations import init_graph_handler, mock_tensor_proto, \ | from tests.ut.debugger.configurations import init_graph_handler, mock_tensor_proto, \ | ||||
| mock_tensor_history, get_node_basic_infos, get_watch_nodes_by_search, \ | |||||
| mock_tensor_history, get_node_basic_infos, \ | |||||
| init_watchpoint_hit_handler | init_watchpoint_hit_handler | ||||
| from tests.utils.tools import compare_result_with_file | from tests.utils.tools import compare_result_with_file | ||||
| class TestWatchpointHandler: | class TestWatchpointHandler: | ||||
| """Test WatchpointHandler.""" | """Test WatchpointHandler.""" | ||||
| @classmethod | @classmethod | ||||
| def setup_class(cls): | def setup_class(cls): | ||||
| """Init WatchpointHandler for watchpoint unittest.""" | """Init WatchpointHandler for watchpoint unittest.""" | ||||
| cls.handler = WatchpointHandler() | |||||
| cls.results_dir = os.path.join(os.path.dirname(__file__), | cls.results_dir = os.path.join(os.path.dirname(__file__), | ||||
| '../expected_results/watchpoint') | '../expected_results/watchpoint') | ||||
| cls.graph_results_dir = os.path.join(os.path.dirname(__file__), | cls.graph_results_dir = os.path.join(os.path.dirname(__file__), | ||||
| '../expected_results/graph') | '../expected_results/graph') | ||||
| cls.graph_stream = init_graph_handler() | cls.graph_stream = init_graph_handler() | ||||
| cls.conditionmgr = None | |||||
| cls.handler = None | |||||
| @pytest.mark.parametrize( | |||||
| "watch_condition, watch_nodes, watch_point_id, expect_new_id", [ | |||||
| ({'condition': 'INF'}, None, None, 1), | |||||
| ({'condition': 'INF'}, ["Default"], None, 2), | |||||
| ({'condition': 'MAX_GT', 'param': 1}, | |||||
| ["Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92"], None, 3) | |||||
| ]) | |||||
| def test_create_watchpoint(self, watch_condition, watch_nodes, | |||||
| watch_point_id, expect_new_id): | |||||
| def setup_method(self): | |||||
| """Init watchpoint for each unittest.""" | |||||
| self.conditionmgr = ConditionMgr() | |||||
| self.handler = WatchpointHandler() | |||||
| self._create_watchpoint() | |||||
| def _create_watchpoint(self): | |||||
| """Test create_watchpoint.""" | """Test create_watchpoint.""" | ||||
| watch_nodes = get_node_basic_infos(watch_nodes) | |||||
| watch_point_id = self.handler.create_watchpoint(watch_condition, watch_nodes, watch_point_id) | |||||
| assert watch_point_id == expect_new_id | |||||
| watchpoints = [ | |||||
| ({'id': 'inf', 'params': []}, None, None, 1), | |||||
| ({'id': 'inf', 'params': []}, ["Default"], None, 2), | |||||
| ({'id': 'max_gt', 'params': [{'name': 'param', 'value': 1, 'disable': False}]}, | |||||
| ["Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92"], | |||||
| None, 3) | |||||
| ] | |||||
| for watch_condition, watch_nodes, watch_point_id, expect_new_id in watchpoints: | |||||
| watch_nodes = get_node_basic_infos(watch_nodes) | |||||
| watch_point_id = self.handler.create_watchpoint(self.conditionmgr, watch_condition, watch_nodes, | |||||
| watch_point_id) | |||||
| assert watch_point_id == expect_new_id | |||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "watch_point_id, watch_nodes, watched, expect_updated_id", [ | "watch_point_id, watch_nodes, watched, expect_updated_id", [ | ||||
| @@ -84,31 +93,28 @@ class TestWatchpointHandler: | |||||
| ]) | ]) | ||||
| def test_update_watchpoint_delete(self, watch_point_id, watch_nodes, watched, expect_updated_id): | def test_update_watchpoint_delete(self, watch_point_id, watch_nodes, watched, expect_updated_id): | ||||
| """Test update_watchpoint on deletion.""" | """Test update_watchpoint on deletion.""" | ||||
| watch_nodes = get_watch_nodes_by_search(watch_nodes) | |||||
| watch_nodes = get_node_basic_infos(watch_nodes) | |||||
| with TestCase().assertLogs(logger=log, level='DEBUG') as log_content: | with TestCase().assertLogs(logger=log, level='DEBUG') as log_content: | ||||
| self.handler.update_watchpoint(watch_point_id, watch_nodes, watched) | self.handler.update_watchpoint(watch_point_id, watch_nodes, watched) | ||||
| TestCase().assertIn(f"DEBUG:debugger.debugger:Update watchpoint {expect_updated_id} in cache.", | TestCase().assertIn(f"DEBUG:debugger.debugger:Update watchpoint {expect_updated_id} in cache.", | ||||
| log_content.output) | log_content.output) | ||||
| @pytest.mark.parametrize("filter_condition, result_file", [ | |||||
| (True, 'watchpoint_handler_get_0.json') | |||||
| ]) | |||||
| def test_get_filter_true(self, filter_condition, result_file): | |||||
| def test_get_pending_commands(self): | |||||
| """Test get with filter_condition is True.""" | """Test get with filter_condition is True.""" | ||||
| result_file = 'watchpoint_handler_get_0.json' | |||||
| file_path = os.path.join(self.results_dir, result_file) | file_path = os.path.join(self.results_dir, result_file) | ||||
| with open(file_path, 'r') as f: | |||||
| contents = json.load(f) | |||||
| reply = self.handler.get(filter_condition) | |||||
| protos = reply.get('watch_points') | |||||
| with open(file_path, 'r') as file_handler: | |||||
| contents = json.load(file_handler) | |||||
| protos = self.handler.get_pending_commands(self.graph_stream) | |||||
| for proto in protos: | for proto in protos: | ||||
| msg_dict = json_format.MessageToDict(proto) | msg_dict = json_format.MessageToDict(proto) | ||||
| msg_dict['watch_nodes_num'] = len(msg_dict.pop('watchNodes', [])) | |||||
| assert msg_dict in contents | assert msg_dict in contents | ||||
| @pytest.mark.parametrize("filter_condition, result_file", [ | @pytest.mark.parametrize("filter_condition, result_file", [ | ||||
| (False, 'watchpoint_handler_get_1.json') | |||||
| (None, 'watchpoint_handler_get_1.json') | |||||
| ]) | ]) | ||||
| def test_get_filter_false(self, filter_condition, result_file): | |||||
| def test_get_without_filter(self, filter_condition, result_file): | |||||
| """Test get with filer_condition is False.""" | """Test get with filer_condition is False.""" | ||||
| file_path = os.path.join(self.results_dir, result_file) | file_path = os.path.join(self.results_dir, result_file) | ||||
| reply = self.handler.get(filter_condition) | reply = self.handler.get(filter_condition) | ||||
| @@ -121,7 +127,7 @@ class TestWatchpointHandler: | |||||
| with pytest.raises(DebuggerParamValueError) as err: | with pytest.raises(DebuggerParamValueError) as err: | ||||
| self.handler.get_watchpoint_by_id(watchpoint_id) | self.handler.get_watchpoint_by_id(watchpoint_id) | ||||
| assert err.value.error_code == '5054B081' | assert err.value.error_code == '5054B081' | ||||
| assert err.value.message == f"ValueError. Invalid watchpoint id {watchpoint_id}" | |||||
| assert err.value.message == f"ValueError. Invalid watchpoint id: {watchpoint_id}" | |||||
| @pytest.mark.parametrize("graph_file, watch_point_id", [ | @pytest.mark.parametrize("graph_file, watch_point_id", [ | ||||
| ('graph_handler_get_3_single_node.json', 4) | ('graph_handler_get_3_single_node.json', 4) | ||||
| @@ -129,20 +135,37 @@ class TestWatchpointHandler: | |||||
| def test_set_watch_nodes(self, graph_file, watch_point_id): | def test_set_watch_nodes(self, graph_file, watch_point_id): | ||||
| """Test set_watch_nodes.""" | """Test set_watch_nodes.""" | ||||
| path = os.path.join(self.graph_results_dir, graph_file) | path = os.path.join(self.graph_results_dir, graph_file) | ||||
| with open(path, 'r') as f: | |||||
| graph = json.load(f) | |||||
| with open(path, 'r') as file_handler: | |||||
| graph = json.load(file_handler) | |||||
| self.handler.set_watch_nodes(graph, self.graph_stream, watch_point_id) | self.handler.set_watch_nodes(graph, self.graph_stream, watch_point_id) | ||||
| @pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
| "watch_point_id, expect_deleted_ids", [ | "watch_point_id, expect_deleted_ids", [ | ||||
| (3, 3), (2, 2) | |||||
| (3, 3), (None, 2) | |||||
| ]) | ]) | ||||
| def test_delete_watchpoint(self, watch_point_id, expect_deleted_ids): | def test_delete_watchpoint(self, watch_point_id, expect_deleted_ids): | ||||
| """Test delete_watchpoint.""" | """Test delete_watchpoint.""" | ||||
| self.handler.sync_set_cmd({}) | |||||
| with TestCase().assertLogs(logger=log, level='DEBUG') as log_content: | with TestCase().assertLogs(logger=log, level='DEBUG') as log_content: | ||||
| self.handler.delete_watchpoint(watch_point_id) | self.handler.delete_watchpoint(watch_point_id) | ||||
| TestCase().assertIn(f"DEBUG:debugger.debugger:Delete watchpoint {expect_deleted_ids} in cache.", | |||||
| log_content.output) | |||||
| TestCase().assertIn( | |||||
| f"DEBUG:debugger.debugger:Delete watchpoint {expect_deleted_ids} in cache.", | |||||
| log_content.output) | |||||
| @pytest.mark.parametrize( | |||||
| "watch_point_id, expect_deleted_ids", [ | |||||
| (3, 3), (2, 2) | |||||
| ]) | |||||
| def test_delete_watchpoint_in_cache(self, watch_point_id, | |||||
| expect_deleted_ids): | |||||
| """Test delete_watchpoint.""" | |||||
| for _ in range(watch_point_id): | |||||
| self.handler.create_watchpoint(self.conditionmgr, {'id': 'inf', 'param': []}) | |||||
| with TestCase().assertLogs(logger=log, level='DEBUG') as log_content: | |||||
| self.handler.delete_watchpoint(watch_point_id) | |||||
| TestCase().assertIn( | |||||
| f"DEBUG:debugger.debugger:Cancel create watchpoint {expect_deleted_ids} in cache.", | |||||
| log_content.output) | |||||
| class TestWatchpointHitHandler: | class TestWatchpointHitHandler: | ||||
| @@ -155,8 +178,7 @@ class TestWatchpointHitHandler: | |||||
| 'tensor_proto': mock_tensor_proto(), | 'tensor_proto': mock_tensor_proto(), | ||||
| 'watchpoint': watchpoint, | 'watchpoint': watchpoint, | ||||
| 'node_name': 'Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92', | 'node_name': 'Gradients/Default/network-WithLossCell/_backbone-LeNet5/relu-ReLU/gradReLU/ReluGradV2-op92', | ||||
| 'finished': True, | |||||
| 'slot': 0 | |||||
| 'graph_name': 'kernel_graph_0', | |||||
| } | } | ||||
| @classmethod | @classmethod | ||||
| @@ -198,24 +220,26 @@ class TestWatchpointHitHandler: | |||||
| def test_validate_watch_condition_type_error(): | def test_validate_watch_condition_type_error(): | ||||
| """Test validate_watch_condition.""" | """Test validate_watch_condition.""" | ||||
| watch_condition = [] | watch_condition = [] | ||||
| conditionmgr = ConditionMgr() | |||||
| with pytest.raises(DebuggerParamTypeError) as err: | with pytest.raises(DebuggerParamTypeError) as err: | ||||
| validate_watch_condition(watch_condition) | |||||
| validate_watch_condition(conditionmgr, watch_condition) | |||||
| assert err.value.error_code == '5054B080' | assert err.value.error_code == '5054B080' | ||||
| watch_condition = {'watch_condition': {'condition': 'MAXIMUM'}} | watch_condition = {'watch_condition': {'condition': 'MAXIMUM'}} | ||||
| with pytest.raises(DebuggerParamValueError) as err: | with pytest.raises(DebuggerParamValueError) as err: | ||||
| validate_watch_condition(watch_condition) | |||||
| validate_watch_condition(conditionmgr, watch_condition) | |||||
| assert err.value.error_code == '5054B081' | assert err.value.error_code == '5054B081' | ||||
| def test_validate_watch_condition_params_except(): | def test_validate_watch_condition_params_except(): | ||||
| """Test validate_watch_condition_params.""" | """Test validate_watch_condition_params.""" | ||||
| watch_condition = {'watch_condition': {'condition': 'NAN', 'param': 1}} | |||||
| watch_condition = {'id': 'inf', 'params': [{'name': 'param', 'value': 0, 'disable': False}]} | |||||
| conditionmgr = ConditionMgr() | |||||
| with pytest.raises(DebuggerParamValueError) as err: | with pytest.raises(DebuggerParamValueError) as err: | ||||
| validate_watch_condition_params(watch_condition) | |||||
| validate_watch_condition_params(conditionmgr, watch_condition) | |||||
| assert err.value.error_code == '5054B081' | assert err.value.error_code == '5054B081' | ||||
| watch_condition = {'watch_condition': {'condition': 'MAX_GT', 'param': '0'}} | |||||
| watch_condition = {'id': 'max_gt', 'params': [{'name': 'param', 'value': '0', 'disable': False}]} | |||||
| with pytest.raises(DebuggerParamValueError) as err: | with pytest.raises(DebuggerParamValueError) as err: | ||||
| validate_watch_condition_params(watch_condition) | |||||
| validate_watch_condition_params(conditionmgr, watch_condition) | |||||
| assert err.value.error_code == '5054B081' | assert err.value.error_code == '5054B081' | ||||
| @@ -23,6 +23,7 @@ from unittest.mock import MagicMock | |||||
| import numpy as np | import numpy as np | ||||
| from mindinsight.conditionmgr.conditionmgr import ConditionMgr | |||||
| from mindinsight.debugger.common.utils import get_ack_reply, ServerStatus | from mindinsight.debugger.common.utils import get_ack_reply, ServerStatus | ||||
| from mindinsight.debugger.debugger_cache import DebuggerCache | from mindinsight.debugger.debugger_cache import DebuggerCache | ||||
| from mindinsight.debugger.debugger_grpc_server import DebuggerGrpcServer | from mindinsight.debugger.debugger_grpc_server import DebuggerGrpcServer | ||||
| @@ -117,7 +118,7 @@ class TestDebuggerGrpcServer: | |||||
| def setup_method(self): | def setup_method(self): | ||||
| """Initialize for each testcase.""" | """Initialize for each testcase.""" | ||||
| cache_store = DebuggerCache() | cache_store = DebuggerCache() | ||||
| self._server = DebuggerGrpcServer(cache_store) | |||||
| self._server = DebuggerGrpcServer(cache_store, condition_mgr=ConditionMgr()) | |||||
| def test_waitcmd_with_pending_status(self): | def test_waitcmd_with_pending_status(self): | ||||
| """Test wait command interface when status is pending.""" | """Test wait command interface when status is pending.""" | ||||
| @@ -125,6 +126,7 @@ class TestDebuggerGrpcServer: | |||||
| assert res.status == EventReply.Status.FAILED | assert res.status == EventReply.Status.FAILED | ||||
| @mock.patch.object(WatchpointHitHandler, 'empty', False) | @mock.patch.object(WatchpointHitHandler, 'empty', False) | ||||
| @mock.patch.object(WatchpointHitHandler, 'put') | |||||
| @mock.patch.object(DebuggerGrpcServer, '_deal_with_old_command') | @mock.patch.object(DebuggerGrpcServer, '_deal_with_old_command') | ||||
| def test_waitcmd_with_old_command(self, *args): | def test_waitcmd_with_old_command(self, *args): | ||||
| """Test wait command interface with old command.""" | """Test wait command interface with old command.""" | ||||
| @@ -132,8 +134,8 @@ class TestDebuggerGrpcServer: | |||||
| args[0].return_value = old_command | args[0].return_value = old_command | ||||
| setattr(self._server, '_status', ServerStatus.WAITING) | setattr(self._server, '_status', ServerStatus.WAITING) | ||||
| setattr(self._server, '_received_view_cmd', {'node_name': 'mock_node_name'}) | setattr(self._server, '_received_view_cmd', {'node_name': 'mock_node_name'}) | ||||
| setattr(self._server, '_received_hit', True) | |||||
| res = self._server.WaitCMD(MagicMock(cur_step=1), MagicMock()) | |||||
| setattr(self._server, '_received_hit', [MagicMock()]) | |||||
| res = self._server.WaitCMD(MagicMock(cur_step=1, cur_node=''), MagicMock()) | |||||
| assert res == old_command | assert res == old_command | ||||
| @mock.patch.object(DebuggerGrpcServer, '_deal_with_old_command', return_value=None) | @mock.patch.object(DebuggerGrpcServer, '_deal_with_old_command', return_value=None) | ||||
| @@ -143,7 +145,7 @@ class TestDebuggerGrpcServer: | |||||
| old_command = MockDataGenerator.get_run_cmd(steps=1) | old_command = MockDataGenerator.get_run_cmd(steps=1) | ||||
| args[0].return_value = old_command | args[0].return_value = old_command | ||||
| setattr(self._server, '_status', ServerStatus.WAITING) | setattr(self._server, '_status', ServerStatus.WAITING) | ||||
| res = self._server.WaitCMD(MagicMock(cur_step=1), MagicMock()) | |||||
| res = self._server.WaitCMD(MagicMock(cur_step=1, cur_node=''), MagicMock()) | |||||
| assert res == old_command | assert res == old_command | ||||
| @mock.patch.object(DebuggerGrpcServer, '_deal_with_old_command', return_value=None) | @mock.patch.object(DebuggerGrpcServer, '_deal_with_old_command', return_value=None) | ||||
| @@ -152,7 +154,7 @@ class TestDebuggerGrpcServer: | |||||
| """Test wait command interface with next command is None.""" | """Test wait command interface with next command is None.""" | ||||
| args[0].return_value = None | args[0].return_value = None | ||||
| setattr(self._server, '_status', ServerStatus.RECEIVE_GRAPH) | setattr(self._server, '_status', ServerStatus.RECEIVE_GRAPH) | ||||
| res = self._server.WaitCMD(MagicMock(cur_step=1), MagicMock()) | |||||
| res = self._server.WaitCMD(MagicMock(cur_step=1, cur_node=''), MagicMock()) | |||||
| assert res == get_ack_reply(1) | assert res == get_ack_reply(1) | ||||
| @mock.patch.object(DebuggerCache, 'get_command', return_value=(0, None)) | @mock.patch.object(DebuggerCache, 'get_command', return_value=(0, None)) | ||||
| @@ -228,6 +230,7 @@ class TestDebuggerGrpcServer: | |||||
| assert res == get_ack_reply() | assert res == get_ack_reply() | ||||
| @mock.patch.object(WatchpointHandler, 'get_watchpoint_by_id') | @mock.patch.object(WatchpointHandler, 'get_watchpoint_by_id') | ||||
| @mock.patch.object(GraphHandler, 'get_graph_id_by_full_name', return_value='mock_graph_name') | |||||
| @mock.patch.object(GraphHandler, 'get_node_name_by_full_name') | @mock.patch.object(GraphHandler, 'get_node_name_by_full_name') | ||||
| def test_send_watchpoint_hit(self, *args): | def test_send_watchpoint_hit(self, *args): | ||||
| """Test SendWatchpointHits interface.""" | """Test SendWatchpointHits interface.""" | ||||
| @@ -28,6 +28,7 @@ import pytest | |||||
| from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | from mindinsight.debugger.common.exceptions.exceptions import DebuggerParamValueError, \ | ||||
| DebuggerCompareTensorError, DebuggerCreateWatchPointError, DebuggerDeleteWatchPointError | DebuggerCompareTensorError, DebuggerCreateWatchPointError, DebuggerDeleteWatchPointError | ||||
| from mindinsight.debugger.common.utils import Streams | |||||
| from mindinsight.debugger.debugger_cache import DebuggerCache | from mindinsight.debugger.debugger_cache import DebuggerCache | ||||
| from mindinsight.debugger.debugger_server import DebuggerServer | from mindinsight.debugger.debugger_server import DebuggerServer | ||||
| from mindinsight.debugger.debugger_server import grpc_server_base | from mindinsight.debugger.debugger_server import grpc_server_base | ||||
| @@ -81,7 +82,7 @@ class TestDebuggerServer: | |||||
| """Test search node.""" | """Test search node.""" | ||||
| mock_graph = {'nodes': ['mock_nodes']} | mock_graph = {'nodes': ['mock_nodes']} | ||||
| args[0].return_value = mock_graph | args[0].return_value = mock_graph | ||||
| res = self._server.search('mock_name') | |||||
| res = self._server.search({'name': 'mock_name'}) | |||||
| assert res == mock_graph | assert res == mock_graph | ||||
| def test_tensor_comparision_with_wrong_status(self): | def test_tensor_comparision_with_wrong_status(self): | ||||
| @@ -93,6 +94,7 @@ class TestDebuggerServer: | |||||
| @mock.patch.object(MetadataHandler, 'state', 'waiting') | @mock.patch.object(MetadataHandler, 'state', 'waiting') | ||||
| @mock.patch.object(GraphHandler, 'get_node_type') | @mock.patch.object(GraphHandler, 'get_node_type') | ||||
| @mock.patch.object(GraphHandler, 'get_graph_id_by_name') | |||||
| @mock.patch.object(GraphHandler, 'get_full_name', return_value='mock_node_name') | @mock.patch.object(GraphHandler, 'get_full_name', return_value='mock_node_name') | ||||
| def test_tensor_comparision_with_wrong_type(self, *args): | def test_tensor_comparision_with_wrong_type(self, *args): | ||||
| """Test tensor comparison with wrong type.""" | """Test tensor comparison with wrong type.""" | ||||
| @@ -101,6 +103,7 @@ class TestDebuggerServer: | |||||
| self._server.tensor_comparisons(name='mock_node_name:0', shape='[:, :]') | self._server.tensor_comparisons(name='mock_node_name:0', shape='[:, :]') | ||||
| @mock.patch.object(MetadataHandler, 'state', 'waiting') | @mock.patch.object(MetadataHandler, 'state', 'waiting') | ||||
| @mock.patch.object(GraphHandler, 'get_graph_id_by_name') | |||||
| @mock.patch.object(GraphHandler, 'get_node_type', return_value='Parameter') | @mock.patch.object(GraphHandler, 'get_node_type', return_value='Parameter') | ||||
| @mock.patch.object(GraphHandler, 'get_full_name', return_value='mock_node_name') | @mock.patch.object(GraphHandler, 'get_full_name', return_value='mock_node_name') | ||||
| @mock.patch.object(TensorHandler, 'get_tensors_diff') | @mock.patch.object(TensorHandler, 'get_tensors_diff') | ||||
| @@ -156,7 +159,7 @@ class TestDebuggerServer: | |||||
| """Test validate leaf name.""" | """Test validate leaf name.""" | ||||
| args[0].return_value = 'name_scope' | args[0].return_value = 'name_scope' | ||||
| with pytest.raises(DebuggerParamValueError, match='Invalid leaf node name.'): | with pytest.raises(DebuggerParamValueError, match='Invalid leaf node name.'): | ||||
| self._server._validate_leaf_name(node_name='mock_node_name') | |||||
| self._server._validate_leaf_name(node_name='mock_node_name', graph_name='mock_graph_name') | |||||
| @mock.patch.object(TensorHandler, 'get') | @mock.patch.object(TensorHandler, 'get') | ||||
| @mock.patch.object(DebuggerServer, '_get_tensor_name_and_type_by_ui_name') | @mock.patch.object(DebuggerServer, '_get_tensor_name_and_type_by_ui_name') | ||||
| @@ -199,40 +202,42 @@ class TestDebuggerServer: | |||||
| self._server.create_watchpoint(watch_condition={'condition': 'INF'}) | self._server.create_watchpoint(watch_condition={'condition': 'INF'}) | ||||
| @mock.patch.object(MetadataHandler, 'state', 'waiting') | @mock.patch.object(MetadataHandler, 'state', 'waiting') | ||||
| @mock.patch.object(GraphHandler, 'get_full_name', return_value='mock_full_name') | |||||
| @mock.patch.object(GraphHandler, 'get_full_name', return_value='mock_full_name') | |||||
| @mock.patch.object(GraphHandler, 'get_nodes_by_scope', return_value=[MagicMock()]) | |||||
| @mock.patch.object(GraphHandler, 'get_node_basic_info', return_value=[MagicMock()]) | |||||
| @mock.patch.object(GraphHandler, 'get_node_type', return_value='aggregation_scope') | @mock.patch.object(GraphHandler, 'get_node_type', return_value='aggregation_scope') | ||||
| @mock.patch.object(WatchpointHandler, 'create_watchpoint') | @mock.patch.object(WatchpointHandler, 'create_watchpoint') | ||||
| def test_create_watchpoint(self, *args): | def test_create_watchpoint(self, *args): | ||||
| """Test create watchpoint.""" | """Test create watchpoint.""" | ||||
| args[0].return_value = 1 | args[0].return_value = 1 | ||||
| res = self._server.create_watchpoint({'condition': 'INF'}, ['watch_node_name']) | res = self._server.create_watchpoint({'condition': 'INF'}, ['watch_node_name']) | ||||
| assert res == {'id': 1} | |||||
| assert res == {'id': 1, 'metadata': {'enable_recheck': False, 'state': 'waiting'}} | |||||
| @mock.patch.object(MetadataHandler, 'state', 'waiting') | @mock.patch.object(MetadataHandler, 'state', 'waiting') | ||||
| @mock.patch.object(GraphHandler, 'validate_graph_name', return_value='kernel_graph_0') | |||||
| @mock.patch.object(GraphHandler, 'get_searched_node_list') | @mock.patch.object(GraphHandler, 'get_searched_node_list') | ||||
| @mock.patch.object(WatchpointHandler, 'validate_watchpoint_id') | @mock.patch.object(WatchpointHandler, 'validate_watchpoint_id') | ||||
| @mock.patch.object(WatchpointHandler, 'update_watchpoint') | @mock.patch.object(WatchpointHandler, 'update_watchpoint') | ||||
| def test_update_watchpoint(self, *args): | def test_update_watchpoint(self, *args): | ||||
| """Test update watchpoint.""" | """Test update watchpoint.""" | ||||
| args[2].return_value = [MagicMock(name='seatch_name/op_name')] | |||||
| args[2].return_value = [MagicMock(name='search_name/op_name')] | |||||
| res = self._server.update_watchpoint( | res = self._server.update_watchpoint( | ||||
| watch_point_id=1, watch_nodes=['search_name'], mode=1, name='search_name') | |||||
| assert res == {} | |||||
| watch_point_id=1, watch_nodes=['search_name'], | |||||
| mode=1, search_pattern={'name': 'search_name'}, graph_name='kernel_graph_0') | |||||
| assert res == {'metadata': {'enable_recheck': False, 'state': 'waiting'}} | |||||
| def test_delete_watchpoint_with_wrong_state(self): | def test_delete_watchpoint_with_wrong_state(self): | ||||
| """Test delete watchpoint with wrong state.""" | """Test delete watchpoint with wrong state.""" | ||||
| with pytest.raises(DebuggerDeleteWatchPointError, match='Failed to delete watchpoint'): | with pytest.raises(DebuggerDeleteWatchPointError, match='Failed to delete watchpoint'): | ||||
| self._server.delete_watchpoint(watch_point_id=1) | self._server.delete_watchpoint(watch_point_id=1) | ||||
| @mock.patch.object(MetadataHandler, 'state', 'waiting') | |||||
| @mock.patch.object(MetadataHandler, 'enable_recheck', True) | |||||
| @mock.patch.object(WatchpointHandler, 'is_recheckable', return_value=True) | |||||
| @mock.patch.object(WatchpointHandler, 'delete_watchpoint') | @mock.patch.object(WatchpointHandler, 'delete_watchpoint') | ||||
| def test_delete_watchpoint(self, *args): | def test_delete_watchpoint(self, *args): | ||||
| """Test delete watchpoint with wrong state.""" | """Test delete watchpoint with wrong state.""" | ||||
| self._server.cache_store.get_stream_handler(Streams.METADATA).state = 'waiting' | |||||
| args[0].return_value = None | args[0].return_value = None | ||||
| res = self._server.delete_watchpoint(1) | res = self._server.delete_watchpoint(1) | ||||
| assert res == {} | |||||
| assert res == {'metadata': {'enable_recheck': True, 'state': 'waiting'}} | |||||
| @pytest.mark.parametrize('mode, cur_state, state', [ | @pytest.mark.parametrize('mode, cur_state, state', [ | ||||
| ('continue', 'waiting', 'running'), | ('continue', 'waiting', 'running'), | ||||
| @@ -242,7 +247,7 @@ class TestDebuggerServer: | |||||
| """Test control request.""" | """Test control request.""" | ||||
| with mock.patch.object(MetadataHandler, 'state', cur_state): | with mock.patch.object(MetadataHandler, 'state', cur_state): | ||||
| res = self._server.control({'mode': mode}) | res = self._server.control({'mode': mode}) | ||||
| assert res == {'metadata': {'state': state}} | |||||
| assert res == {'metadata': {'enable_recheck': False, 'state': state}} | |||||
| def test_construct_run_event(self): | def test_construct_run_event(self): | ||||
| """Test construct run event.""" | """Test construct run event.""" | ||||
| @@ -34,11 +34,10 @@ def get_url(url, params): | |||||
| Args: | Args: | ||||
| url (str): A link requested. For example, http://example.com. | url (str): A link requested. For example, http://example.com. | ||||
| params (dict): A dict consists of params. For example, {'offset': 1, 'limit':'100}. | |||||
| params (dict): A dict consists of params. For example, {'offset': 1, 'limit': 100}. | |||||
| Returns: | Returns: | ||||
| str, like http://example.com?offset=1&limit=100 | str, like http://example.com?offset=1&limit=100 | ||||
| """ | """ | ||||
| return url + '?' + urlencode(params) | return url + '?' + urlencode(params) | ||||