You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

profile_api.py 16 kB

5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """
  16. Profile api.
  17. This module provides the interfaces to profile functions.
  18. """
  19. import json
  20. import os
  21. from flask import Blueprint
  22. from flask import Response
  23. from flask import jsonify
  24. from flask import request
  25. from marshmallow import ValidationError
  26. from mindinsight.conf import settings
  27. from mindinsight.datavisual.utils.tools import get_train_id, get_profiler_dir, to_int, get_device_id
  28. from mindinsight.datavisual.utils.tools import unquote_args
  29. from mindinsight.profiler.analyser.analyser_factory import AnalyserFactory
  30. from mindinsight.profiler.analyser.minddata_analyser import MinddataAnalyser
  31. from mindinsight.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException
  32. from mindinsight.profiler.common.util import analyse_device_list_from_profiler_dir
  33. from mindinsight.profiler.common.validator.validate import validate_condition, validate_ui_proc
  34. from mindinsight.profiler.common.validator.validate import validate_minddata_pipeline_condition
  35. from mindinsight.profiler.common.validator.validate_path import \
  36. validate_and_normalize_path
  37. from mindinsight.profiler.common.validator.validate_path import validate_and_normalize_profiler_path
  38. from mindinsight.profiler.proposer.compose_proposer import ComposeProposal
  39. from mindinsight.utils.exceptions import ParamValueError
  40. BLUEPRINT = Blueprint("profile", __name__, url_prefix=settings.URL_PREFIX)
  41. @BLUEPRINT.route("/profile/ops/search", methods=["POST"])
  42. def get_profile_op_info():
  43. """
  44. Get operation profiling info.
  45. Returns:
  46. str, the operation profiling information.
  47. Raises:
  48. ParamValueError: If the search condition contains some errors.
  49. Examples:
  50. >>> POST http://xxxx/v1/mindinsight/profile/ops/search
  51. """
  52. profiler_dir = get_profiler_dir(request)
  53. train_id = get_train_id(request)
  54. if not profiler_dir or not train_id:
  55. raise ParamValueError("No profiler_dir or train_id.")
  56. search_condition = request.stream.read()
  57. try:
  58. search_condition = json.loads(search_condition if search_condition else "{}")
  59. except Exception:
  60. raise ParamValueError("Json data parse failed.")
  61. validate_condition(search_condition)
  62. device_id = search_condition.get("device_id", "0")
  63. profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir)
  64. try:
  65. profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler")
  66. except ValidationError:
  67. raise ParamValueError("Invalid profiler dir")
  68. op_type = search_condition.get("op_type")
  69. analyser = AnalyserFactory.instance().get_analyser(
  70. op_type, profiler_dir_abs, device_id
  71. )
  72. op_info = analyser.query(search_condition)
  73. return jsonify(op_info)
  74. @BLUEPRINT.route("/profile/devices", methods=["GET"])
  75. def get_profile_device_list():
  76. """
  77. Get profile device list.
  78. Returns:
  79. list, the available device list.
  80. Raises:
  81. ParamValueError: If the search condition contains some errors.
  82. Examples:
  83. >>> POST http://xxxx/v1/mindinsight/profile/devices
  84. """
  85. profiler_dir = get_profiler_dir(request)
  86. train_id = get_train_id(request)
  87. if not profiler_dir or not train_id:
  88. raise ParamValueError("No profiler_dir or train_id.")
  89. profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir)
  90. try:
  91. profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler")
  92. except ValidationError:
  93. raise ParamValueError("Invalid profiler dir")
  94. device_list = analyse_device_list_from_profiler_dir(profiler_dir_abs)
  95. return jsonify(device_list)
  96. @BLUEPRINT.route("/profile/training-trace/graph", methods=["GET"])
  97. def get_training_trace_graph():
  98. """
  99. Get training trace info of one step.
  100. Returns:
  101. Response, the training trace info of one step.
  102. Examples:
  103. >>> GET http://xxxx/v1/mindinsight/profile/training-trace/graph
  104. """
  105. summary_dir = request.args.get("dir")
  106. profiler_dir = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
  107. graph_type = request.args.get("type", default='0')
  108. graph_type = to_int(graph_type, 'graph_type')
  109. device_id = request.args.get("device_id", default='0')
  110. _ = to_int(device_id, 'device_id')
  111. graph_info = {}
  112. try:
  113. analyser = AnalyserFactory.instance().get_analyser(
  114. 'step_trace', profiler_dir, device_id)
  115. except ProfilerFileNotFoundException:
  116. return jsonify(graph_info)
  117. graph_info = analyser.query({
  118. 'filter_condition': {
  119. 'mode': 'step',
  120. 'step_id': graph_type
  121. }})
  122. graph_info['summary'] = analyser.summary
  123. graph_info['point_info'] = analyser.point_info
  124. return jsonify(graph_info)
  125. @BLUEPRINT.route("/profile/training-trace/target-time-info", methods=["GET"])
  126. def get_target_time_info():
  127. """
  128. Get all the time information of the specified column.
  129. Returns:
  130. Response, all the time information of the specified column.
  131. Examples:
  132. >>> GET http://xxxx/v1/mindinsight/profile/training-trace/target-time-info
  133. """
  134. summary_dir = request.args.get("dir")
  135. profiler_dir = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
  136. proc_name = request.args.get("type")
  137. validate_ui_proc(proc_name)
  138. device_id = request.args.get("device_id", default='0')
  139. _ = to_int(device_id, 'device_id')
  140. analyser = AnalyserFactory.instance().get_analyser(
  141. 'step_trace', profiler_dir, device_id)
  142. target_time_info = analyser.query({
  143. 'filter_condition': {
  144. 'mode': 'proc',
  145. 'proc_name': proc_name
  146. }})
  147. target_time_info['summary'] = analyser.summary
  148. return jsonify(target_time_info)
  149. @BLUEPRINT.route("/profile/queue_info", methods=["GET"])
  150. def get_queue_info():
  151. """
  152. Get each type queue info.
  153. Returns:
  154. Response, the queue info.
  155. Examples:
  156. >>> GET http://xxxx/v1/mindinsight/profile/queue_info
  157. """
  158. profile_dir = get_profiler_abs_dir(request)
  159. device_id = unquote_args(request, "device_id")
  160. queue_type = unquote_args(request, "type")
  161. queue_info = {}
  162. minddata_analyser = AnalyserFactory.instance().get_analyser(
  163. 'minddata', profile_dir, device_id)
  164. if queue_type == "get_next":
  165. queue_info, _ = minddata_analyser.analyse_get_next_info(info_type="queue")
  166. elif queue_type == "device_queue":
  167. queue_info, _ = minddata_analyser.analyse_device_queue_info(info_type="queue")
  168. return jsonify(queue_info)
  169. @BLUEPRINT.route("/profile/minddata_op", methods=["GET"])
  170. def get_time_info():
  171. """
  172. Get minddata operation info.
  173. Returns:
  174. Response, the minddata operation info.
  175. Examples:
  176. >>> GET http://xxxx/v1/mindinsight/profile/minddata_op
  177. """
  178. profile_dir = get_profiler_abs_dir(request)
  179. device_id = unquote_args(request, "device_id")
  180. op_type = unquote_args(request, "type")
  181. time_info = {
  182. 'size': 0,
  183. 'info': [],
  184. "summary": {"time_summary": {}},
  185. "advise": {}
  186. }
  187. minddata_analyser = AnalyserFactory.instance().get_analyser(
  188. 'minddata', profile_dir, device_id)
  189. if op_type == "get_next":
  190. _, time_info = minddata_analyser.analyse_get_next_info(info_type="time")
  191. elif op_type == "device_queue":
  192. _, time_info = minddata_analyser.analyse_device_queue_info(info_type="time")
  193. return jsonify(time_info)
  194. @BLUEPRINT.route("/profile/process_summary", methods=["GET"])
  195. def get_process_summary():
  196. """
  197. Get interval process summary.
  198. Returns:
  199. Response, the process summary.
  200. Examples:
  201. >>> GET http://xxxx/v1/mindinsight/profile/process_summary
  202. """
  203. profile_dir = get_profiler_abs_dir(request)
  204. device_id = unquote_args(request, "device_id")
  205. minddata_analyser = AnalyserFactory.instance().get_analyser(
  206. 'minddata', profile_dir, device_id)
  207. get_next_queue_info, _ = minddata_analyser.analyse_get_next_info(info_type="queue")
  208. device_queue_info, _ = minddata_analyser.analyse_device_queue_info(info_type="queue")
  209. result = MinddataAnalyser.analyse_queue_summary(get_next_queue_info, device_queue_info)
  210. return jsonify(result)
  211. def get_profiler_abs_dir(requests):
  212. """
  213. Get interval process summary.
  214. Args:
  215. requests (LocalProxy): The requests.
  216. Returns:
  217. str, the profiler abs dir.
  218. """
  219. profiler_dir = get_profiler_dir(requests)
  220. train_id = get_train_id(requests)
  221. if not profiler_dir or not train_id:
  222. raise ParamValueError("No profiler_dir or train_id.")
  223. profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir)
  224. try:
  225. profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler")
  226. except ValidationError:
  227. raise ParamValueError("Invalid profiler dir")
  228. return profiler_dir_abs
  229. @BLUEPRINT.route("/profile/summary/propose", methods=["GET"])
  230. def get_profile_summary_proposal():
  231. """
  232. Get summary profiling proposal.
  233. Returns:
  234. str, the summary profiling proposal.
  235. Raises:
  236. ParamValueError: If the parameters contain some errors.
  237. Examples:
  238. >>> GET http://xxxx/v1/mindinsight/profile/summary/propose
  239. """
  240. profiler_dir = get_profiler_dir(request)
  241. train_id = get_train_id(request)
  242. device_id = get_device_id(request)
  243. if not profiler_dir or not train_id:
  244. raise ParamValueError("No profiler_dir or train_id.")
  245. profiler_dir_abs = os.path.join(settings.SUMMARY_BASE_DIR, train_id, profiler_dir)
  246. try:
  247. profiler_dir_abs = validate_and_normalize_path(profiler_dir_abs, "profiler")
  248. except ValidationError:
  249. raise ParamValueError("Invalid profiler dir")
  250. step_trace_condition = {"filter_condition": {"mode": "proc",
  251. "proc_name": "iteration_interval",
  252. "step_id": 0}}
  253. options = {'step_trace': {"iter_interval": step_trace_condition}}
  254. proposal_type_list = ['step_trace', 'minddata', 'minddata_pipeline', 'common']
  255. proposal_obj = ComposeProposal(profiler_dir_abs, device_id, proposal_type_list)
  256. proposal_info = proposal_obj.get_proposal(options)
  257. # Use json.dumps for orderly return
  258. return Response(json.dumps(proposal_info), mimetype='application/json')
  259. @BLUEPRINT.route("/profile/minddata-pipeline/op-queue", methods=["POST"])
  260. def get_minddata_pipeline_op_queue_info():
  261. """
  262. Get minddata pipeline operator info and queue info.
  263. Returns:
  264. str, the operation information and queue information.
  265. Raises:
  266. ParamValueError: If the search condition contains some errors.
  267. Examples:
  268. >>> POST http://xxxx/v1/mindinsight/profile/minddata-pipeline/op-queue
  269. """
  270. profiler_dir = get_profiler_dir(request)
  271. train_id = get_train_id(request)
  272. if not profiler_dir or not train_id:
  273. raise ParamValueError("No profiler_dir or train_id.")
  274. profiler_dir_abs = os.path.join(
  275. settings.SUMMARY_BASE_DIR, train_id, profiler_dir
  276. )
  277. try:
  278. profiler_dir_abs = validate_and_normalize_path(
  279. profiler_dir_abs, "profiler"
  280. )
  281. except ValidationError:
  282. raise ParamValueError("Invalid profiler dir.")
  283. condition = request.stream.read()
  284. try:
  285. condition = json.loads(condition) if condition else {}
  286. except Exception:
  287. raise ParamValueError("Json data parse failed.")
  288. validate_minddata_pipeline_condition(condition)
  289. device_id = condition.get("device_id", "0")
  290. analyser = AnalyserFactory.instance().get_analyser(
  291. 'minddata_pipeline', profiler_dir_abs, device_id
  292. )
  293. op_info = analyser.query(condition)
  294. return jsonify(op_info)
  295. @BLUEPRINT.route("/profile/minddata-pipeline/queue", methods=["GET"])
  296. def get_minddata_pipeline_queue_info():
  297. """
  298. Get the special minddata pipeline queue info.
  299. Returns:
  300. str, the queue information.
  301. Raises:
  302. ParamValueError: If the search condition contains some errors.
  303. Examples:
  304. >>> GET http://xxxx/v1/mindinsight/profile/minddata-pipeline/queue
  305. """
  306. profiler_dir = get_profiler_dir(request)
  307. train_id = get_train_id(request)
  308. if not profiler_dir or not train_id:
  309. raise ParamValueError("No profiler_dir or train_id.")
  310. profiler_dir_abs = os.path.join(
  311. settings.SUMMARY_BASE_DIR, train_id, profiler_dir
  312. )
  313. try:
  314. profiler_dir_abs = validate_and_normalize_path(
  315. profiler_dir_abs, "profiler"
  316. )
  317. except ValidationError:
  318. raise ParamValueError("Invalid profiler dir.")
  319. device_id = request.args.get('device_id', default='0')
  320. op_id = request.args.get('op_id', type=int)
  321. if op_id is None:
  322. raise ParamValueError("Invalid operator id or operator id does not exist.")
  323. analyser = AnalyserFactory.instance().get_analyser(
  324. 'minddata_pipeline', profiler_dir_abs, device_id
  325. )
  326. op_queue_info = analyser.get_op_and_parent_op_info(op_id)
  327. return jsonify(op_queue_info)
  328. @BLUEPRINT.route("/profile/timeline-summary", methods=["GET"])
  329. def get_timeline_summary():
  330. """
  331. Get timeline summary info.
  332. Returns:
  333. Response, the timeline summary info.
  334. Examples:
  335. >>> GET http://xxxx/v1/mindinsight/profile/timeline-summary
  336. """
  337. summary_dir = request.args.get("dir")
  338. profiler_dir = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
  339. device_id = request.args.get("device_id", default='0')
  340. _ = to_int(device_id, 'device_id')
  341. analyser = AnalyserFactory.instance().get_analyser(
  342. 'timeline', profiler_dir, device_id)
  343. summary = analyser.get_timeline_summary()
  344. return summary
  345. @BLUEPRINT.route("/profile/timeline", methods=["GET"])
  346. def get_timeline_detail():
  347. """
  348. Get timeline detail.
  349. Returns:
  350. Response, the detail information of timeline.
  351. Examples:
  352. >>> GET http://xxxx/v1/mindinsight/profile/timeline
  353. """
  354. summary_dir = request.args.get("dir")
  355. profiler_dir = validate_and_normalize_profiler_path(summary_dir, settings.SUMMARY_BASE_DIR)
  356. device_id = request.args.get("device_id", default='0')
  357. _ = to_int(device_id, 'device_id')
  358. analyser = AnalyserFactory.instance().get_analyser(
  359. 'timeline', profiler_dir, device_id)
  360. timeline = analyser.get_display_timeline()
  361. return jsonify(timeline)
  362. def init_module(app):
  363. """
  364. Init module entry.
  365. Args:
  366. app: the application obj.
  367. """
  368. app.register_blueprint(BLUEPRINT)