You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 21 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. # Copyright 2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Parse utils module."""
  16. import os
  17. import xlsxwriter
  18. from mindinsight.domain.graph.base import InputType, OutputType
  19. class Toolkit:
  20. """Toolkit."""
  21. PLACEHOLDER = '-'
  22. def __init__(self, dump_dir, constants, parameters, operators):
  23. self.dump_dir = dump_dir
  24. self.constants = constants
  25. self.parameters = parameters
  26. self.operators = operators
  27. def export_xlsx(self, file_path):
  28. """
  29. Export graph data to Excel file.
  30. Args:
  31. file_path (str) : Excel file path.
  32. """
  33. target_dir = os.path.dirname(file_path)
  34. if not os.path.isdir(target_dir):
  35. print(f'Directory {target_dir} not exists')
  36. return
  37. workbook = xlsxwriter.Workbook(file_path)
  38. # text_v_align: 1-top, 2-middle, 3-bottom
  39. # text_h_align: 1-left, 2-center, 3-right
  40. styles = dict(
  41. header_left_fmt=workbook.add_format(dict(
  42. text_v_align=2, text_h_align=1,
  43. font_color='#000000', bg_color='#d9d9d9',
  44. bold=True,
  45. )),
  46. header_center_fmt=workbook.add_format(dict(
  47. text_v_align=2, text_h_align=2,
  48. font_color='#000000', bg_color='#d9d9d9',
  49. bold=True,
  50. )),
  51. content_left_fmt=workbook.add_format(dict(
  52. text_v_align=2, text_h_align=1,
  53. text_wrap=False,
  54. )),
  55. content_center_fmt=workbook.add_format(dict(
  56. text_v_align=2, text_h_align=2,
  57. text_wrap=False,
  58. )),
  59. content_wrapped_fmt=workbook.add_format(dict(
  60. text_v_align=2, text_h_align=1,
  61. text_wrap=True,
  62. )),
  63. )
  64. self._add_info_worksheet(workbook, styles)
  65. self._add_constant_worksheet(workbook, styles)
  66. self._add_parameter_worksheet(workbook, styles)
  67. self._add_operator_worksheet(workbook, styles)
  68. self._add_statistics_worksheet(workbook, styles)
  69. self._add_source_worksheet(workbook, styles)
  70. for worksheet in workbook.sheetnames.values():
  71. worksheet.freeze_panes(1, 0)
  72. worksheet.freeze_panes(1, 1)
  73. workbook.close()
  74. def _convert_column_indices(self, metas):
  75. """
  76. Convert column metas into indices mapping.
  77. Args:
  78. metas (list): Column metas.
  79. Returns:
  80. dict, holds the indicess of columns.
  81. """
  82. mapping = {}
  83. for index, (name, _, _) in enumerate(metas):
  84. mapping[name] = index
  85. return mapping
  86. def _add_info_worksheet(self, workbook, styles):
  87. """
  88. Add info worksheet.
  89. Args:
  90. workbook (WorkBook): Excel workbook.
  91. styles (dict): Workbook styles.
  92. """
  93. worksheet = workbook.add_worksheet('info')
  94. # column metas contain column names, styles and widths
  95. column_metas = [
  96. ('argument', styles['header_center_fmt'], 20),
  97. ('value', styles['header_left_fmt'], 150),
  98. ]
  99. for index, (column, fmt, width) in enumerate(column_metas):
  100. worksheet.set_column(index, index, width)
  101. worksheet.write(0, index, column, fmt)
  102. worksheet.autofilter(0, 0, 0, len(column_metas) - 1)
  103. indices = self._convert_column_indices(column_metas)
  104. worksheet.write(1, indices.get('argument'), 'dump-dir', styles['content_center_fmt'])
  105. worksheet.write(1, indices.get('value'), self.dump_dir or '', styles['content_left_fmt'])
  106. def _get_operator_input_info(self, operator, input_types):
  107. """
  108. Add operator worksheet.
  109. Args:
  110. operator (Operator): Operator.
  111. input_types (dict): Input types.
  112. Returns:
  113. dict, input info content.
  114. """
  115. input_content = ''
  116. input_dtype_content = ''
  117. input_shape_content = ''
  118. for op_input in operator.inputs:
  119. if op_input.type == InputType.OPERATOR:
  120. op = input_types[InputType.OPERATOR][op_input.op_id]
  121. if op.type == 'Load':
  122. input_content += f'{op.type}_{op.name}' + '\n'
  123. else:
  124. input_content += f'{op.type}_{op.op_id}' + '\n'
  125. if op_input.info:
  126. input_dtype_content += str(op_input.info['dtype']) + '\n'
  127. input_shape_content += str(op_input.info.get('shape') or Toolkit.PLACEHOLDER) + '\n'
  128. else:
  129. input_dtype_content += Toolkit.PLACEHOLDER + '\n'
  130. input_shape_content += Toolkit.PLACEHOLDER + '\n'
  131. elif op_input.type == InputType.PARAMETER:
  132. input_content += op_input.name + '\n'
  133. param = input_types[InputType.PARAMETER][op_input.name]
  134. if param.output:
  135. input_dtype_content += param.output.info['dtype'] + '\n'
  136. input_shape_content += str(param.output.info.get('shape') or Toolkit.PLACEHOLDER) + '\n'
  137. else:
  138. input_dtype_content += Toolkit.PLACEHOLDER + '\n'
  139. input_shape_content += Toolkit.PLACEHOLDER + '\n'
  140. elif op_input.type == InputType.CONSTANT:
  141. input_content += op_input.name + '\n'
  142. cst = input_types[InputType.CONSTANT][op_input.name]
  143. if cst.output.type == OutputType.TENSOR:
  144. input_dtype_content += cst.output.info.get('dtype') or Toolkit.PLACEHOLDER + '\n'
  145. input_shape_content += str(cst.output.info.get('shape') or Toolkit.PLACEHOLDER) + '\n'
  146. else:
  147. input_dtype_content += Toolkit.PLACEHOLDER + '\n'
  148. input_shape_content += Toolkit.PLACEHOLDER + '\n'
  149. else:
  150. input_content += op_input.name + '\n'
  151. input_dtype_content += Toolkit.PLACEHOLDER + '\n'
  152. input_shape_content += Toolkit.PLACEHOLDER + '\n'
  153. return {
  154. 'input': input_content.strip(),
  155. 'input_dtype': input_dtype_content.strip(),
  156. 'input_shape': input_shape_content.strip(),
  157. }
  158. def _add_operator_worksheet(self, workbook, styles):
  159. """
  160. Add operator worksheet.
  161. Args:
  162. workbook (WorkBook): Excel workbook.
  163. styles (dict): Workbook styles.
  164. """
  165. constant_mapping = dict((constant.name, constant) for constant in self.constants)
  166. parameter_mapping = dict((parameter.name, parameter) for parameter in self.parameters)
  167. operator_mapping = dict((operator.op_id, operator) for operator in self.operators)
  168. input_types = {
  169. InputType.CONSTANT: constant_mapping,
  170. InputType.PARAMETER: parameter_mapping,
  171. InputType.OPERATOR: operator_mapping,
  172. }
  173. worksheet = workbook.add_worksheet('operator')
  174. # column metas contain column names, styles and widths
  175. column_metas = [
  176. ('operator', styles['header_left_fmt'], 30),
  177. ('input', styles['header_left_fmt'], 30),
  178. ('input_dtype', styles['header_left_fmt'], 20),
  179. ('input_shape', styles['header_left_fmt'], 25),
  180. ('output_dtype', styles['header_left_fmt'], 20),
  181. ('output_shape', styles['header_left_fmt'], 25),
  182. ('downstream', styles['header_left_fmt'], 30),
  183. ('name', styles['header_center_fmt'], 10),
  184. ('attrs', styles['header_left_fmt'], 30),
  185. ('full_name', styles['header_left_fmt'], 20),
  186. ('device_id', styles['header_left_fmt'], 20),
  187. ('graph_name', styles['header_left_fmt'], 30),
  188. ('stack', styles['header_left_fmt'], 150),
  189. ]
  190. for index, (column, fmt, width) in enumerate(column_metas):
  191. worksheet.set_column(index, index, width)
  192. worksheet.write(0, index, column, fmt)
  193. worksheet.autofilter(0, 0, 0, len(column_metas) - 1)
  194. indices = self._convert_column_indices(column_metas)
  195. for index, operator in enumerate(self.operators):
  196. if operator.type == 'Load':
  197. operator_content = f'{operator.type}_{operator.name}'
  198. else:
  199. operator_content = f'{operator.type}_{operator.op_id}'
  200. worksheet.write(index + 1, indices.get('operator'), operator_content, styles['content_left_fmt'])
  201. if operator.type == 'make_tuple':
  202. worksheet.write(index + 1, indices.get('device_id'), operator.device_id, styles['content_left_fmt'])
  203. worksheet.write(index + 1, indices.get('graph_name'), operator.graph_name, styles['content_left_fmt'])
  204. continue
  205. input_info = self._get_operator_input_info(operator, input_types)
  206. worksheet.write(index + 1, indices.get('input'), input_info['input'], styles['content_wrapped_fmt'])
  207. worksheet.write(
  208. index + 1, indices.get('input_dtype'),
  209. input_info['input_dtype'], styles['content_wrapped_fmt'])
  210. worksheet.write(
  211. index + 1, indices.get('input_shape'),
  212. input_info['input_shape'], styles['content_wrapped_fmt'])
  213. output_dtype_content = ''
  214. output_shape_content = ''
  215. if operator.output and operator.output.type == OutputType.TENSOR:
  216. output_dtype_content = operator.output.info['dtype']
  217. output_shape_content = str(operator.output.info['shape'])
  218. elif operator.output and operator.output.type == OutputType.TUPLE:
  219. output_dtype_content = '\n'.join([
  220. Toolkit.PLACEHOLDER if dtype is None else dtype
  221. for dtype in operator.output.info['dtypes']
  222. ])
  223. output_shape_content = '\n'.join([
  224. Toolkit.PLACEHOLDER if shape is None else str(shape)
  225. for shape in operator.output.info['shapes']
  226. ])
  227. worksheet.write(
  228. index + 1, indices.get('output_dtype'),
  229. output_dtype_content, styles['content_wrapped_fmt'])
  230. worksheet.write(
  231. index + 1, indices.get('output_shape'),
  232. output_shape_content, styles['content_wrapped_fmt'])
  233. downstream_content = ''
  234. for op_id in operator.downstream:
  235. op = operator_mapping[op_id]
  236. downstream_content += f'{op.type}_{op.op_id}' + '\n'
  237. worksheet.write(
  238. index + 1, indices.get('downstream'),
  239. downstream_content.strip(), styles['content_wrapped_fmt'])
  240. worksheet.write(index + 1, indices.get('name'), operator.name, styles['content_center_fmt'])
  241. worksheet.write(index + 1, indices.get('attrs'), str(operator.attrs), styles['content_left_fmt'])
  242. worksheet.write(index + 1, indices.get('full_name'), operator.full_name, styles['content_left_fmt'])
  243. worksheet.write(index + 1, indices.get('device_id'), operator.device_id, styles['content_left_fmt'])
  244. worksheet.write(index + 1, indices.get('graph_name'), operator.graph_name, styles['content_left_fmt'])
  245. stack_content = ''
  246. for source in operator.stack:
  247. stack_content += f'{source.file_path}:{source.line_no}\n{source.code_line}\n'
  248. worksheet.write(index + 1, indices.get('stack'), stack_content.strip(), styles['content_wrapped_fmt'])
  249. def _add_parameter_worksheet(self, workbook, styles):
  250. """
  251. Add parameter worksheet.
  252. Args:
  253. workbook (WorkBook): Excel workbook.
  254. styles (dict): Workbook styles.
  255. """
  256. worksheet = workbook.add_worksheet('parameter')
  257. # column metas contain column names, styles and widths
  258. column_metas = [
  259. ('name', styles['header_left_fmt'], 50),
  260. ('output_dtype', styles['header_left_fmt'], 20),
  261. ('output_shape', styles['header_left_fmt'], 25),
  262. ('downstream', styles['header_left_fmt'], 30),
  263. ('device_id', styles['header_left_fmt'], 20),
  264. ('graph_name', styles['header_left_fmt'], 30),
  265. ]
  266. for index, (column, fmt, width) in enumerate(column_metas):
  267. worksheet.set_column(index, index, width)
  268. worksheet.write(0, index, column, fmt)
  269. worksheet.autofilter(0, 0, 0, len(column_metas) - 1)
  270. indices = self._convert_column_indices(column_metas)
  271. operator_mapping = dict((operator.op_id, operator) for operator in self.operators)
  272. for index, parameter in enumerate(self.parameters):
  273. worksheet.write(index + 1, indices.get('name'), parameter.name, styles['content_left_fmt'])
  274. worksheet.write(
  275. index + 1, indices.get('output_dtype'),
  276. parameter.output.info['dtype'], styles['content_left_fmt'])
  277. worksheet.write(
  278. index + 1, indices.get('output_shape'),
  279. str(parameter.output.info['shape']), styles['content_left_fmt'])
  280. downstream_nodes = [operator_mapping[op_id] for op_id in parameter.downstream]
  281. downstream_content = ''
  282. for op in downstream_nodes:
  283. if op.type == 'Load':
  284. downstream_content += f'{op.type}_{op.name}' + '\n'
  285. else:
  286. downstream_content += f'{op.type}_{op.op_id}' + '\n'
  287. worksheet.write(
  288. index + 1, indices.get('downstream'),
  289. downstream_content.strip(), styles['content_wrapped_fmt'])
  290. worksheet.write(index + 1, indices.get('device_id'), parameter.device_id, styles['content_left_fmt'])
  291. worksheet.write(index + 1, indices.get('graph_name'), parameter.graph_name, styles['content_left_fmt'])
  292. def _add_constant_worksheet(self, workbook, styles):
  293. """
  294. Add constant worksheet.
  295. Args:
  296. workbook (WorkBook): Excel workbook.
  297. styles (dict): Workbook styles.
  298. """
  299. worksheet = workbook.add_worksheet('constant')
  300. # column metas contain column names, styles and widths
  301. column_metas = [
  302. ('name', styles['header_left_fmt'], 10),
  303. ('value', styles['header_left_fmt'], 30),
  304. ('downstream', styles['header_left_fmt'], 30),
  305. ('device_id', styles['header_left_fmt'], 20),
  306. ('graph_name', styles['header_left_fmt'], 30),
  307. ]
  308. for index, (column, fmt, width) in enumerate(column_metas):
  309. worksheet.set_column(index, index, width)
  310. worksheet.write(0, index, column, fmt)
  311. worksheet.autofilter(0, 0, 0, len(column_metas) - 1)
  312. indices = self._convert_column_indices(column_metas)
  313. operator_mapping = dict((operator.op_id, operator) for operator in self.operators)
  314. for index, constant in enumerate(self.constants):
  315. worksheet.write(index + 1, indices.get('name'), constant.name, styles['content_left_fmt'])
  316. if constant.output.type == OutputType.NONE:
  317. value_content = 'NONE'
  318. elif constant.output.type == OutputType.TENSOR:
  319. value_content = 'TENSOR'
  320. else:
  321. value_content = constant.output.info['value']
  322. worksheet.write(index + 1, indices.get('value'), value_content, styles['content_left_fmt'])
  323. downstream_nodes = [operator_mapping[op_id] for op_id in constant.downstream]
  324. downstream_content = ''
  325. for op in downstream_nodes:
  326. if op.type == 'Load':
  327. downstream_content += f'{op.type}_{op.name}' + '\n'
  328. else:
  329. downstream_content += f'{op.type}_{op.op_id}' + '\n'
  330. worksheet.write(
  331. index + 1, indices.get('downstream'),
  332. downstream_content.strip(), styles['content_wrapped_fmt'])
  333. worksheet.write(index + 1, indices.get('device_id'), constant.device_id, styles['content_left_fmt'])
  334. worksheet.write(index + 1, indices.get('graph_name'), constant.graph_name, styles['content_left_fmt'])
  335. def _add_statistics_worksheet(self, workbook, styles):
  336. """
  337. Add statistics worksheet.
  338. Args:
  339. workbook (WorkBook): Excel workbook.
  340. styles (dict): Workbook styles.
  341. """
  342. worksheet = workbook.add_worksheet('statistics')
  343. # column metas contain column names, styles and widths
  344. column_metas = [
  345. ('operator', styles['header_left_fmt'], 30),
  346. ('count', styles['header_center_fmt'], 20),
  347. ]
  348. for index, (column, fmt, width) in enumerate(column_metas):
  349. worksheet.set_column(index, index, width)
  350. worksheet.write(0, index, column, fmt)
  351. worksheet.autofilter(0, 0, 0, len(column_metas) - 1)
  352. operator_type_set = set()
  353. for operator in self.operators:
  354. operator_type_set.add(operator.type)
  355. operator_types = sorted(list(operator_type_set))
  356. stats = dict(zip(operator_types, [0]*len(operator_types)))
  357. for operator in self.operators:
  358. stats[operator.type] += 1
  359. indices = self._convert_column_indices(column_metas)
  360. for index, operator_type in enumerate(operator_types):
  361. worksheet.write(index + 1, indices.get('operator'), operator_type, styles['content_left_fmt'])
  362. worksheet.write(index + 1, indices.get('count'), stats[operator_type], styles['content_center_fmt'])
  363. def _add_source_worksheet(self, workbook, styles):
  364. """
  365. Add source worksheet.
  366. Args:
  367. workbook (WorkBook): Excel workbook.
  368. styles (dict): Workbook styles.
  369. """
  370. worksheet = workbook.add_worksheet('source')
  371. # column metas contain column names, styles and widths
  372. column_metas = [
  373. ('stack', styles['header_left_fmt'], 150),
  374. ('operator', styles['header_left_fmt'], 30),
  375. ('full_name', styles['header_left_fmt'], 20),
  376. ('device_id', styles['header_left_fmt'], 20),
  377. ('graph_name', styles['header_left_fmt'], 30),
  378. ]
  379. for index, (column, fmt, width) in enumerate(column_metas):
  380. worksheet.set_column(index, index, width)
  381. worksheet.write(0, index, column, fmt)
  382. worksheet.autofilter(0, 0, 0, len(column_metas) - 1)
  383. source_mapping = {}
  384. for operator in self.operators:
  385. if not operator.stack:
  386. continue
  387. stack = [f'{source.file_path}:{source.line_no}\n{source.code_line}' for source in operator.stack]
  388. key = '\n'.join(stack)
  389. if key in source_mapping:
  390. source_mapping[key].append(operator)
  391. else:
  392. source_mapping[key] = [operator]
  393. row = 0
  394. indices = self._convert_column_indices(column_metas)
  395. for key in source_mapping:
  396. operators = source_mapping[key]
  397. operators.sort(key=lambda x: int(x.op_id))
  398. if len(operators) == 1:
  399. worksheet.write(row + 1, indices.get('stack'), key, styles['content_wrapped_fmt'])
  400. else:
  401. worksheet.merge_range(
  402. row + 1, indices.get('stack'),
  403. row+len(operators), 0, key, styles['content_wrapped_fmt'])
  404. for index, operator in enumerate(operators):
  405. operator_content = f'{operator.type}_{operator.op_id}'
  406. worksheet.write(
  407. row + index + 1, indices.get('operator'),
  408. operator_content, styles['content_left_fmt'])
  409. worksheet.write(
  410. row + index + 1, indices.get('full_name'),
  411. operator.full_name, styles['content_left_fmt'])
  412. worksheet.write(
  413. row + index + 1, indices.get('device_id'),
  414. operator.device_id, styles['content_left_fmt'])
  415. worksheet.write(
  416. row + index + 1, indices.get('graph_name'),
  417. operator.graph_name, styles['content_left_fmt'])
  418. row += len(operators)