You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profile_api.py 18 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """
  16. Profile api.
  17. This module provides the interfaces to profile functions.
  18. """
  19. import json
  20. import os
  21. from flask import Blueprint
  22. from flask import jsonify
  23. from flask import request
  24. from marshmallow import ValidationError
  25. from mindinsight.conf import settings
  26. from mindinsight.datavisual.utils.tools import get_train_id, get_profiler_dir, to_int, get_device_id
  27. from mindinsight.datavisual.utils.tools import unquote_args
  28. from mindinsight.profiler.analyser.analyser_factory import AnalyserFactory
  29. from mindinsight.profiler.analyser.minddata_analyser import MinddataAnalyser
  30. from mindinsight.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException
  31. from mindinsight.profiler.common.util import analyse_device_list_from_profiler_dir, \
  32. check_train_job_and_profiler_dir
  33. from mindinsight.profiler.common.validator.validate import validate_condition, validate_ui_proc
  34. from mindinsight.profiler.common.validator.validate import validate_minddata_pipeline_condition
  35. from mindinsight.profiler.common.validator.validate_path import \
  36. validate_and_normalize_path
  37. from mindinsight.profiler.common.validator.validate_path import validate_and_normalize_profiler_path
  38. from mindinsight.profiler.proposer.compose_proposer import ComposeProposal
  39. from mindinsight.profiler.common.log import logger
  40. from mindinsight.utils.exceptions import ParamValueError
  41. from mindinsight.backend.application import CustomResponse
  42. BLUEPRINT = Blueprint("profile", __name__, url_prefix=settings.URL_PATH_PREFIX+settings.API_PREFIX)
  43. @BLUEPRINT.route("/profile/ops/search", methods=["POST"])
  44. def get_profile_op_info():
  45. """
  46. Get operation profiling info.
  47. Returns:
  48. str, the operation profiling information.
  49. Raises:
  50. ParamValueError: If the search condition contains some errors.
  51. Examples:
  52. >>> POST http://xxxx/v1/mindinsight/profile/ops/search
  53. """
  54. profiler_dir = get_profiler_dir(request)
  55. train_id = get_train_id(request)
  56. if not profiler_dir or not train_id:
  57. raise ParamValueError("No profiler_dir or train_id.")
  58. search_condition = request.stream.read()
  59. try:
  60. search_condition = json.loads(search_condition if search_condition else "{}")
  61. except (json.JSONDecodeError, ValueError):
  62. raise ParamValueError("Json data parse failed.")
  63. validate_condition(search_condition)
  64. device_id = search_condition.get("device_id", "0")
  65. to_int(device_id, 'device_id')
  66. profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir)
  67. try:
  68. profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler")
  69. except ValidationError:
  70. raise ParamValueError("Invalid profiler dir")
  71. check_train_job_and_profiler_dir(profiler_dir_abs)
  72. op_type = search_condition.get("op_type")
  73. analyser = AnalyserFactory.instance().get_analyser(
  74. op_type, profiler_dir_abs, device_id
  75. )
  76. op_info = analyser.query(search_condition)
  77. return jsonify(op_info)
  78. @BLUEPRINT.route("/profile/devices", methods=["GET"])
  79. def get_profile_device_list():
  80. """
  81. Get profile device list.
  82. Returns:
  83. list, the available device list.
  84. Raises:
  85. ParamValueError: If the search condition contains some errors.
  86. Examples:
  87. >>> POST http://xxxx/v1/mindinsight/profile/devices
  88. """
  89. profiler_dir = get_profiler_dir(request)
  90. train_id = get_train_id(request)
  91. if not profiler_dir or not train_id:
  92. raise ParamValueError("No profiler_dir or train_id.")
  93. profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir)
  94. try:
  95. profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler")
  96. except ValidationError:
  97. raise ParamValueError("Invalid profiler dir")
  98. check_train_job_and_profiler_dir(profiler_dir_abs)
  99. device_list, _ = analyse_device_list_from_profiler_dir(profiler_dir_abs)
  100. return jsonify(device_list)
  101. @BLUEPRINT.route("/profile/training-trace/graph", methods=["GET"])
  102. def get_training_trace_graph():
  103. """
  104. Get training trace info of one step.
  105. Returns:
  106. Response, the training trace info of one step.
  107. Examples:
  108. >>> GET http://xxxx/v1/mindinsight/profile/training-trace/graph
  109. """
  110. summary_dir = request.args.get("dir")
  111. profiler_dir_abs = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
  112. check_train_job_and_profiler_dir(profiler_dir_abs)
  113. graph_type = request.args.get("type", default='0')
  114. graph_type = to_int(graph_type, 'graph_type')
  115. device_id = request.args.get("device_id", default='0')
  116. _ = to_int(device_id, 'device_id')
  117. graph_info = {}
  118. try:
  119. analyser = AnalyserFactory.instance().get_analyser(
  120. 'step_trace', profiler_dir_abs, device_id)
  121. except ProfilerFileNotFoundException:
  122. return jsonify(graph_info)
  123. graph_info = analyser.query({
  124. 'filter_condition': {
  125. 'mode': 'step',
  126. 'step_id': graph_type
  127. }})
  128. graph_info['summary'] = analyser.summary
  129. graph_info['point_info'] = analyser.point_info
  130. return jsonify(graph_info)
  131. @BLUEPRINT.route("/profile/training-trace/target-time-info", methods=["GET"])
  132. def get_target_time_info():
  133. """
  134. Get all the time information of the specified column.
  135. Returns:
  136. Response, all the time information of the specified column.
  137. Examples:
  138. >>> GET http://xxxx/v1/mindinsight/profile/training-trace/target-time-info
  139. """
  140. summary_dir = request.args.get("dir")
  141. profiler_dir_abs = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
  142. check_train_job_and_profiler_dir(profiler_dir_abs)
  143. proc_name = request.args.get("type")
  144. validate_ui_proc(proc_name)
  145. device_id = request.args.get("device_id", default='0')
  146. _ = to_int(device_id, 'device_id')
  147. analyser = AnalyserFactory.instance().get_analyser(
  148. 'step_trace', profiler_dir_abs, device_id)
  149. target_time_info = analyser.query({
  150. 'filter_condition': {
  151. 'mode': 'proc',
  152. 'proc_name': proc_name
  153. }})
  154. target_time_info['summary'] = analyser.summary
  155. return jsonify(target_time_info)
  156. @BLUEPRINT.route("/profile/queue_info", methods=["GET"])
  157. def get_queue_info():
  158. """
  159. Get each type queue info.
  160. Returns:
  161. Response, the queue info.
  162. Examples:
  163. >>> GET http://xxxx/v1/mindinsight/profile/queue_info
  164. """
  165. profiler_dir_abs = get_profiler_abs_dir(request)
  166. check_train_job_and_profiler_dir(profiler_dir_abs)
  167. device_id = unquote_args(request, "device_id")
  168. to_int(device_id, 'device_id')
  169. queue_type = unquote_args(request, "type")
  170. queue_info = {}
  171. minddata_analyser = AnalyserFactory.instance().get_analyser(
  172. 'minddata', profiler_dir_abs, device_id)
  173. if queue_type == "get_next":
  174. queue_info, _ = minddata_analyser.analyse_get_next_info(info_type="queue")
  175. elif queue_type == "device_queue":
  176. queue_info, _ = minddata_analyser.analyse_device_queue_info(info_type="queue")
  177. return jsonify(queue_info)
  178. @BLUEPRINT.route("/profile/minddata_op", methods=["GET"])
  179. def get_time_info():
  180. """
  181. Get minddata operation info.
  182. Returns:
  183. Response, the minddata operation info.
  184. Examples:
  185. >>> GET http://xxxx/v1/mindinsight/profile/minddata_op
  186. """
  187. profiler_dir_abs = get_profiler_abs_dir(request)
  188. check_train_job_and_profiler_dir(profiler_dir_abs)
  189. device_id = unquote_args(request, "device_id")
  190. to_int(device_id, 'device_id')
  191. op_type = unquote_args(request, "type")
  192. time_info = {
  193. 'size': 0,
  194. 'info': [],
  195. "summary": {"time_summary": {}},
  196. "advise": {}
  197. }
  198. minddata_analyser = AnalyserFactory.instance().get_analyser(
  199. 'minddata', profiler_dir_abs, device_id)
  200. if op_type == "get_next":
  201. _, time_info = minddata_analyser.analyse_get_next_info(info_type="time")
  202. elif op_type == "device_queue":
  203. _, time_info = minddata_analyser.analyse_device_queue_info(info_type="time")
  204. return jsonify(time_info)
  205. @BLUEPRINT.route("/profile/process_summary", methods=["GET"])
  206. def get_process_summary():
  207. """
  208. Get interval process summary.
  209. Returns:
  210. Response, the process summary.
  211. Examples:
  212. >>> GET http://xxxx/v1/mindinsight/profile/process_summary
  213. """
  214. profiler_dir_abs = get_profiler_abs_dir(request)
  215. check_train_job_and_profiler_dir(profiler_dir_abs)
  216. device_id = unquote_args(request, "device_id")
  217. to_int(device_id, 'device_id')
  218. minddata_analyser = AnalyserFactory.instance().get_analyser(
  219. 'minddata', profiler_dir_abs, device_id)
  220. get_next_queue_info, _ = minddata_analyser.analyse_get_next_info(info_type="queue")
  221. device_queue_info, _ = minddata_analyser.analyse_device_queue_info(info_type="queue")
  222. result = MinddataAnalyser.analyse_queue_summary(get_next_queue_info, device_queue_info)
  223. return jsonify(result)
  224. def get_profiler_abs_dir(requests):
  225. """
  226. Get interval process summary.
  227. Args:
  228. requests (LocalProxy): The requests.
  229. Returns:
  230. str, the profiler abs dir.
  231. """
  232. profiler_dir = get_profiler_dir(requests)
  233. train_id = get_train_id(requests)
  234. if not profiler_dir or not train_id:
  235. raise ParamValueError("No profiler_dir or train_id.")
  236. profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir)
  237. try:
  238. profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler")
  239. except ValidationError:
  240. raise ParamValueError("Invalid profiler dir")
  241. return profiler_dir_abs
  242. @BLUEPRINT.route("/profile/summary/propose", methods=["GET"])
  243. def get_profile_summary_proposal():
  244. """
  245. Get summary profiling proposal.
  246. Returns:
  247. str, the summary profiling proposal.
  248. Raises:
  249. ParamValueError: If the parameters contain some errors.
  250. Examples:
  251. >>> GET http://xxxx/v1/mindinsight/profile/summary/propose
  252. """
  253. profiler_dir = get_profiler_dir(request)
  254. train_id = get_train_id(request)
  255. device_id = get_device_id(request)
  256. if not profiler_dir or not train_id:
  257. raise ParamValueError("No profiler_dir or train_id.")
  258. to_int(device_id, 'device_id')
  259. profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir)
  260. try:
  261. profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler")
  262. except ValidationError:
  263. raise ParamValueError("Invalid profiler dir")
  264. check_train_job_and_profiler_dir(profiler_dir_abs)
  265. step_trace_condition = {"filter_condition": {"mode": "proc",
  266. "proc_name": "iteration_interval",
  267. "step_id": 0}}
  268. options = {'step_trace': {"iter_interval": step_trace_condition}}
  269. proposal_type_list = ['step_trace', 'minddata', 'minddata_pipeline', 'common']
  270. proposal_obj = ComposeProposal(profiler_dir_abs, device_id, proposal_type_list)
  271. proposal_info = proposal_obj.get_proposal(options)
  272. # Use json.dumps for orderly return
  273. return CustomResponse(json.dumps(proposal_info), mimetype='application/json')
  274. @BLUEPRINT.route("/profile/minddata-pipeline/op-queue", methods=["POST"])
  275. def get_minddata_pipeline_op_queue_info():
  276. """
  277. Get minddata pipeline operator info and queue info.
  278. Returns:
  279. str, the operation information and queue information.
  280. Raises:
  281. ParamValueError: If the search condition contains some errors.
  282. Examples:
  283. >>> POST http://xxxx/v1/mindinsight/profile/minddata-pipeline/op-queue
  284. """
  285. profiler_dir = get_profiler_dir(request)
  286. train_id = get_train_id(request)
  287. if not profiler_dir or not train_id:
  288. raise ParamValueError("No profiler_dir or train_id.")
  289. profiler_dir_abs = os.path.join(
  290. settings.SUMMARY_BASE_DIR, train_id, profiler_dir
  291. )
  292. try:
  293. profiler_dir_abs = validate_and_normalize_path(
  294. profiler_dir_abs, "profiler"
  295. )
  296. except ValidationError:
  297. raise ParamValueError("Invalid profiler dir.")
  298. check_train_job_and_profiler_dir(profiler_dir_abs)
  299. condition = request.stream.read()
  300. try:
  301. condition = json.loads(condition) if condition else {}
  302. except Exception:
  303. raise ParamValueError("Json data parse failed.")
  304. validate_minddata_pipeline_condition(condition)
  305. device_id = condition.get("device_id", "0")
  306. to_int(device_id, 'device_id')
  307. analyser = AnalyserFactory.instance().get_analyser(
  308. 'minddata_pipeline', profiler_dir_abs, device_id
  309. )
  310. op_info = analyser.query(condition)
  311. return jsonify(op_info)
  312. @BLUEPRINT.route("/profile/minddata-pipeline/queue", methods=["GET"])
  313. def get_minddata_pipeline_queue_info():
  314. """
  315. Get the special minddata pipeline queue info.
  316. Returns:
  317. str, the queue information.
  318. Raises:
  319. ParamValueError: If the search condition contains some errors.
  320. Examples:
  321. >>> GET http://xxxx/v1/mindinsight/profile/minddata-pipeline/queue
  322. """
  323. profiler_dir = get_profiler_dir(request)
  324. train_id = get_train_id(request)
  325. if not profiler_dir or not train_id:
  326. raise ParamValueError("No profiler_dir or train_id.")
  327. profiler_dir_abs = os.path.join(
  328. settings.SUMMARY_BASE_DIR, train_id, profiler_dir
  329. )
  330. try:
  331. profiler_dir_abs = validate_and_normalize_path(
  332. profiler_dir_abs, "profiler"
  333. )
  334. except ValidationError:
  335. raise ParamValueError("Invalid profiler dir.")
  336. check_train_job_and_profiler_dir(profiler_dir_abs)
  337. device_id = request.args.get('device_id', default='0')
  338. to_int(device_id, 'device_id')
  339. op_id = request.args.get('op_id', type=int)
  340. if op_id is None:
  341. raise ParamValueError("Invalid operator id or operator id does not exist.")
  342. analyser = AnalyserFactory.instance().get_analyser(
  343. 'minddata_pipeline', profiler_dir_abs, device_id
  344. )
  345. op_queue_info = analyser.get_op_and_parent_op_info(op_id)
  346. return jsonify(op_queue_info)
  347. @BLUEPRINT.route("/profile/timeline-summary", methods=["GET"])
  348. def get_timeline_summary():
  349. """
  350. Get timeline summary info.
  351. Returns:
  352. Response, the timeline summary info.
  353. Examples:
  354. >>> GET http://xxxx/v1/mindinsight/profile/timeline-summary
  355. """
  356. summary_dir = request.args.get("dir")
  357. profiler_dir_abs = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
  358. check_train_job_and_profiler_dir(profiler_dir_abs)
  359. device_id = request.args.get("device_id", default='0')
  360. _ = to_int(device_id, 'device_id')
  361. device_type = request.args.get("device_type", default='ascend')
  362. if device_type not in ['gpu', 'ascend']:
  363. logger.info("Invalid device_type, device_type should be gpu or ascend.")
  364. raise ParamValueError("Invalid device_type.")
  365. analyser = AnalyserFactory.instance().get_analyser(
  366. 'timeline', profiler_dir_abs, device_id)
  367. summary = analyser.get_timeline_summary(device_type)
  368. return summary
  369. @BLUEPRINT.route("/profile/timeline", methods=["GET"])
  370. def get_timeline_detail():
  371. """
  372. Get timeline detail.
  373. Returns:
  374. Response, the detail information of timeline.
  375. Examples:
  376. >>> GET http://xxxx/v1/mindinsight/profile/timeline
  377. """
  378. summary_dir = request.args.get("dir")
  379. profiler_dir_abs = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
  380. check_train_job_and_profiler_dir(profiler_dir_abs)
  381. device_id = request.args.get("device_id", default='0')
  382. _ = to_int(device_id, 'device_id')
  383. device_type = request.args.get("device_type", default='ascend')
  384. if device_type not in ['gpu', 'ascend']:
  385. logger.info("Invalid device_type, device_type should be gpu or ascend.")
  386. raise ParamValueError("Invalid device_type.")
  387. analyser = AnalyserFactory.instance().get_analyser(
  388. 'timeline', profiler_dir_abs, device_id)
  389. timeline = analyser.get_display_timeline(device_type)
  390. return jsonify(timeline)
  391. def init_module(app):
  392. """
  393. Init module entry.
  394. Args:
  395. app: the application obj.
  396. """
  397. app.register_blueprint(BLUEPRINT)