Browse Source

profiler mem usage: added new rest api for memory breakdowns

tags/v1.2.0-rc1
zhangyunshu 4 years ago
parent
commit
5b0bc69c95
2 changed files with 77 additions and 24 deletions
  1. +34
    -2
      mindinsight/backend/profiler/profile_api.py
  2. +43
    -22
      mindinsight/profiler/analyser/memory_usage_analyser.py

+ 34
- 2
mindinsight/backend/profiler/profile_api.py View File

@@ -509,7 +509,7 @@ def get_memory_usage_summary():
check_train_job_and_profiler_dir(profiler_dir_abs)

device_id = request.args.get("device_id", default='0')
_ = to_int(device_id, 'device_id')
to_int(device_id, 'device_id')
device_type = request.args.get("device_type", default='ascend')
if device_type not in ['ascend']:
logger.info("Invalid device_type, Memory Usage only supports Ascend for now.")
@@ -538,7 +538,7 @@ def get_memory_usage_graphics():
check_train_job_and_profiler_dir(profiler_dir_abs)

device_id = request.args.get("device_id", default='0')
_ = to_int(device_id, 'device_id')
to_int(device_id, 'device_id')
device_type = request.args.get("device_type", default='ascend')
if device_type not in ['ascend']:
logger.info("Invalid device_type, Memory Usage only supports Ascend for now.")
@@ -551,6 +551,38 @@ def get_memory_usage_graphics():
return graphics


@BLUEPRINT.route("/profile/memory-breakdowns", methods=["GET"])
def get_memory_usage_breakdowns():
"""
Get memory breakdowns of each node.

Returns:
Response, the memory breakdowns for each node.

Examples:
>>> GET http://xxxx/v1/mindinsight/profile/memory-breakdowns
"""
summary_dir = request.args.get("dir")
profiler_dir_abs = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
check_train_job_and_profiler_dir(profiler_dir_abs)

device_id = request.args.get("device_id", default='0')
to_int(device_id, 'device_id')
device_type = request.args.get("device_type", default='ascend')
graph_id = request.args.get("graph_id", default='0')
node_id = request.args.get("node_id", default='0')
node_id = to_int(node_id, 'node_id')
if device_type not in ['ascend']:
logger.error("Invalid device_type, Memory Usage only supports Ascend for now.")
raise ParamValueError("Invalid device_type.")

analyser = AnalyserFactory.instance().get_analyser(
'memory_usage', profiler_dir_abs, device_id)
breakdowns = analyser.get_memory_usage_breakdowns(device_type, graph_id, node_id)

return breakdowns


def init_module(app):
"""
Init module entry.


+ 43
- 22
mindinsight/profiler/analyser/memory_usage_analyser.py View File

@@ -18,7 +18,8 @@ import json
import os

from mindinsight.profiler.analyser.base_analyser import BaseAnalyser
from mindinsight.profiler.common.exceptions.exceptions import ProfilerIOException
from mindinsight.profiler.common.exceptions.exceptions import ProfilerIOException, \
ProfilerFileNotFoundException
from mindinsight.profiler.common.log import logger
from mindinsight.profiler.common.validator.validate_path import validate_and_normalize_path
from mindinsight.utils.exceptions import ParamValueError
@@ -72,10 +73,38 @@ class MemoryUsageAnalyser(BaseAnalyser):
json, the content of memory usage data.
"""
memory_details = self._get_file_content(device_type, FileType.DETAILS.value)
self._process_memory_details(memory_details)
for graph_id in memory_details.keys():
if 'breakdowns' in memory_details[graph_id]:
memory_details[graph_id].pop('breakdowns')

return memory_details

def get_memory_usage_breakdowns(self, device_type, graph_id, node_id):
"""
Get memory usage breakdowns for each node.

Args:
device_type (str): Device type, e.g., GPU, Ascend.
graph_id (int): Graph id.
node_id (int): Node id.

Returns:
json, the content of memory usage breakdowns.
"""
memory_details = self._get_file_content(device_type, FileType.DETAILS.value)
if graph_id not in memory_details:
logger.error('Invalid graph id: %s', graph_id)
raise ParamValueError('Invalid graph id.')

graph = memory_details[graph_id]
if not ('breakdowns' in graph and node_id < len(graph['breakdowns'])):
logger.error('Invalid node id: %s', node_id)
raise ParamValueError('Invalid node id.')

memory_breakdowns = graph.get('breakdowns')[node_id]

return {'breakdowns': memory_breakdowns}

def _get_file_content(self, device_type, file_type):
"""
Get file content for different types of memory usage files.
@@ -88,26 +117,18 @@ class MemoryUsageAnalyser(BaseAnalyser):
dict, file content corresponding to file_type.
"""
file_path = self._get_file_path(device_type, file_type)
file_content = {}
if os.path.exists(file_path):
try:
with open(file_path, 'r') as f_obj:
file_content = json.load(f_obj)
except (IOError, OSError, json.JSONDecodeError) as err:
logger.error('Error occurred when read memory file: %s', err)
raise ProfilerIOException
else:
logger.info('Invalid file path. Please check the output path: %s', file_path)
if not os.path.exists(file_path):
logger.error('Invalid file path. Please check the output path: %s', file_path)
raise ProfilerFileNotFoundException(msg='Invalid memory file path.')

return file_content
try:
with open(file_path, 'r') as f_obj:
file_content = json.load(f_obj)
except (IOError, OSError, json.JSONDecodeError) as err:
logger.error('Error occurred when read memory file: %s', err)
raise ProfilerIOException

@staticmethod
def _process_memory_details(memory_details):
"""Process memory details, change the node dict to node list."""
for key in memory_details.keys():
if 'nodes' in memory_details[key]:
nodes = list(memory_details[key]['nodes'].values())
memory_details[key]['nodes'] = nodes
return file_content

def _get_file_path(self, device_type, file_type):
"""
@@ -127,8 +148,8 @@ class MemoryUsageAnalyser(BaseAnalyser):
elif file_type is FileType.DETAILS.value:
filename = self._details_filename.format(self._device_id)
else:
logger.info('Memory Usage only supports Ascend for now. Please check the device type.')
raise ParamValueError("Invalid device_type.")
logger.error('Memory Usage only supports Ascend for now. Please check the device type.')
raise ParamValueError("Invalid device type.")

file_path = os.path.join(self._profiling_dir, filename)
file_path = validate_and_normalize_path(


Loading…
Cancel
Save