You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

summary_watcher.py 24 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. # Copyright 2020-2021 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Summary watcher module."""
  16. import json
  17. import os
  18. import re
  19. import datetime
  20. from pathlib import Path
  21. from mindinsight.datavisual.common.log import logger
  22. from mindinsight.datavisual.common.validation import Validation
  23. from mindinsight.datavisual.utils.tools import Counter
  24. from mindinsight.datavisual.utils.utils import contains_null_byte
  25. from mindinsight.datavisual.common.exceptions import MaxCountExceededError
  26. from mindinsight.utils.exceptions import FileSystemPermissionError
  27. LINEAGE_SUMMARY_SUFFIX = '_lineage'
  28. EXPLAIN_SUMMARY_SUFFIX = '_explain'
  29. class SummaryWatcher:
  30. """SummaryWatcher class."""
  31. SUMMARY_FILENAME_REGEX = r'summary\.(?P<timestamp>\d+)'
  32. PB_FILENAME_REGEX = r'\.pb$'
  33. PROFILER_DIRECTORY_REGEX = r'^profiler'
  34. CLUSTER_PROFILER_DIRECTORY_REGEX = r'^cluster_profiler$'
  35. MAX_SUMMARY_DIR_COUNT = 999
  36. # scan at most 20000 files/directories (approximately 1 seconds)
  37. # if overall is False in SummaryWatcher.list_summary_directories
  38. # to avoid long-time blocking
  39. MAX_SCAN_COUNT = 20000
  40. def list_summary_directories(self, summary_base_dir, overall=True, list_explain=False):
  41. """
  42. List summary directories within base directory.
  43. Args:
  44. summary_base_dir (str): Path of summary base directory.
  45. overall (bool): Limit the total num of scanning if overall is False.
  46. list_explain (bool): Indicates whether to list only the mindexplain folder.
  47. Default is False, means not to list mindexplain folder.
  48. Returns:
  49. list, list of summary directory info, each of which including the following attributes.
  50. - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
  51. starting with "./".
  52. - create_time (datetime): Creation time of summary file.
  53. - update_time (datetime): Modification time of summary file.
  54. - profiler (dict): profiler info, including profiler subdirectory path, profiler creation time and
  55. profiler modification time.
  56. Examples:
  57. >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
  58. >>> summary_watcher = SummaryWatcher()
  59. >>> directories = summary_watcher.list_summary_directories('/summary/base/dir')
  60. """
  61. if contains_null_byte(summary_base_dir=summary_base_dir):
  62. return []
  63. relative_path = os.path.join('.', '')
  64. if not self._is_valid_summary_directory(summary_base_dir, relative_path):
  65. return []
  66. summary_dict = {}
  67. counter = Counter(max_count=None if overall else self.MAX_SCAN_COUNT)
  68. try:
  69. entries = os.scandir(summary_base_dir)
  70. except PermissionError:
  71. logger.error('Path of summary base directory is not accessible.')
  72. raise FileSystemPermissionError('Path of summary base directory is not accessible.')
  73. for entry in entries:
  74. if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
  75. break
  76. try:
  77. counter.add()
  78. except MaxCountExceededError:
  79. logger.info('Stop further scanning due to overall is False and '
  80. 'number of scanned files exceeds upper limit.')
  81. break
  82. if entry.is_symlink():
  83. pass
  84. elif entry.is_file():
  85. self._update_summary_dict(summary_dict, summary_base_dir, relative_path, entry, list_explain)
  86. elif entry.is_dir():
  87. self._update_summary_dict(summary_dict, summary_base_dir, relative_path, entry, list_explain)
  88. entry_path = os.path.realpath(os.path.join(summary_base_dir, entry.name))
  89. self._scan_subdir_entries(summary_dict, summary_base_dir, entry_path, entry.name, counter, list_explain)
  90. directories = []
  91. for key, value in summary_dict.items():
  92. directory = {
  93. 'relative_path': key,
  94. **value
  95. }
  96. directories.append(directory)
  97. # sort by update time in descending order and relative path in ascending order
  98. directories.sort(key=lambda x: (-int(x['update_time'].timestamp()), x['relative_path']))
  99. return directories
  100. def _scan_subdir_entries(self, summary_dict, summary_base_dir, entry_path, entry_name, counter, list_explain):
  101. """
  102. Scan subdir entries.
  103. Args:
  104. summary_dict (dict): Temporary data structure to hold summary directory info.
  105. summary_base_dir (str): Path of summary base directory.
  106. entry_path(str): Path entry.
  107. entry_name (str): Name of entry.
  108. counter (Counter): An instance of CountLimiter.
  109. list_explain (bool): Indicates whether to list only the mindexplain folder.
  110. """
  111. try:
  112. subdir_entries = os.scandir(entry_path)
  113. except PermissionError:
  114. logger.warning('Path of %s under summary base directory is not accessible.', entry_name)
  115. return
  116. for subdir_entry in subdir_entries:
  117. if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
  118. break
  119. try:
  120. counter.add()
  121. except MaxCountExceededError:
  122. logger.info('Stop further scanning due to overall is False and '
  123. 'number of scanned files exceeds upper limit.')
  124. break
  125. subdir_relative_path = os.path.join('.', entry_name)
  126. if subdir_entry.is_symlink():
  127. pass
  128. self._update_summary_dict(summary_dict, summary_base_dir, subdir_relative_path, subdir_entry, list_explain)
  129. def _is_valid_summary_directory(self, summary_base_dir, relative_path):
  130. """
  131. Check if the given summary directory is valid.
  132. Args:
  133. summary_base_dir (str): Path of summary base directory.
  134. relative_path (str): Relative path of summary directory, referring to summary base directory,
  135. starting with "./" .
  136. Returns:
  137. bool, indicates if summary directory is valid.
  138. """
  139. summary_base_dir = os.path.realpath(summary_base_dir)
  140. summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path))
  141. if not os.path.exists(summary_directory):
  142. logger.info('Path of summary directory not exists.')
  143. return False
  144. if not os.path.isdir(summary_directory):
  145. logger.warning('Path of summary directory is not a valid directory.')
  146. return False
  147. try:
  148. Path(summary_directory).relative_to(Path(summary_base_dir))
  149. except ValueError:
  150. logger.warning('Relative path %s is not subdirectory of summary_base_dir', relative_path)
  151. return False
  152. return True
  153. def _update_summary_dict(self, summary_dict, summary_base_dir, relative_path, entry, list_explain):
  154. """
  155. Update summary_dict with ctime and mtime.
  156. Args:
  157. summary_dict (dict): Temporary data structure to hold summary directory info.
  158. summary_base_dir (str): Path of summary base directory.
  159. relative_path (str): Relative path of summary directory, referring to summary base directory,
  160. starting with "./" .
  161. entry (DirEntry): Directory entry instance needed to check with regular expression.
  162. list_explain (bool): Indicates whether to list only the mindexplain folder.
  163. """
  164. try:
  165. stat = entry.stat()
  166. except FileNotFoundError:
  167. logger.warning('File %s not found', entry.name)
  168. return
  169. ctime = datetime.datetime.fromtimestamp(stat.st_ctime).astimezone()
  170. mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()
  171. if entry.is_file():
  172. summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
  173. pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
  174. if not self._is_valid_pattern_result(summary_pattern, pb_pattern, list_explain, entry):
  175. return
  176. timestamp = None
  177. if summary_pattern is not None:
  178. timestamp = int(summary_pattern.groupdict().get('timestamp'))
  179. try:
  180. # extract created time from filename
  181. ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
  182. except OverflowError:
  183. return
  184. if relative_path not in summary_dict:
  185. summary_dict[relative_path] = _new_entry(ctime, mtime)
  186. job_dict = _get_explain_job_info(summary_base_dir, relative_path, timestamp)
  187. summary_dict[relative_path].update(job_dict)
  188. if summary_dict[relative_path]['create_time'] < ctime:
  189. summary_dict[relative_path].update({'create_time': ctime, 'update_time': mtime})
  190. job_dict = _get_explain_job_info(summary_base_dir, relative_path, timestamp)
  191. summary_dict[relative_path].update(job_dict)
  192. if not summary_pattern:
  193. summary_dict[relative_path]['graph_files'] += 1
  194. elif entry.name.endswith(LINEAGE_SUMMARY_SUFFIX):
  195. summary_dict[relative_path]['lineage_files'] += 1
  196. elif entry.name.endswith(EXPLAIN_SUMMARY_SUFFIX):
  197. summary_dict[relative_path]['explain_files'] += 1
  198. else:
  199. summary_dict[relative_path]['summary_files'] += 1
  200. elif entry.is_dir():
  201. if list_explain:
  202. return
  203. cluster_profiler_type, is_cluster_profiler = \
  204. self._find_cluster_profiler_dir(entry, summary_base_dir, relative_path)
  205. profiler_type, is_profiler = self._find_profiler_dir(entry, summary_base_dir, relative_path)
  206. if is_cluster_profiler or is_profiler:
  207. if is_cluster_profiler:
  208. profiler_type = cluster_profiler_type
  209. profiler = {
  210. 'directory': os.path.join('.', entry.name),
  211. 'create_time': ctime,
  212. 'update_time': mtime,
  213. "profiler_type": profiler_type
  214. }
  215. if relative_path in summary_dict:
  216. summary_dict[relative_path]['profiler'] = profiler
  217. else:
  218. summary_dict[relative_path] = _new_entry(ctime, mtime, profiler)
  219. def _find_profiler_dir(self, entry, summary_base_dir, relative_path):
  220. """Find profiler dir by the given relative path."""
  221. profiler_pattern = re.search(self.PROFILER_DIRECTORY_REGEX, entry.name)
  222. full_dir_path = os.path.join(summary_base_dir, relative_path, entry.name)
  223. is_valid_profiler_dir, profiler_type = self._is_valid_profiler_directory(full_dir_path)
  224. if profiler_pattern is None or not is_valid_profiler_dir:
  225. return profiler_type, False
  226. return profiler_type, True
  227. def _find_cluster_profiler_dir(self, entry, summary_base_dir, relative_path):
  228. """Find profiler cluster dir by the given relative path."""
  229. cluster_profiler_pattern = re.search(self.CLUSTER_PROFILER_DIRECTORY_REGEX, entry.name)
  230. full_dir_path = os.path.join(summary_base_dir, relative_path, entry.name)
  231. is_valid_cluster_profiler_dir, profiler_type = self._is_valid_cluster_profiler_directory(full_dir_path)
  232. if cluster_profiler_pattern is None or not is_valid_cluster_profiler_dir:
  233. return profiler_type, False
  234. return profiler_type, True
  235. def _is_valid_pattern_result(self, summary_pattern, pb_pattern, list_explain, entry):
  236. """Check the pattern result is valid."""
  237. if summary_pattern is None and pb_pattern is None:
  238. return False
  239. if list_explain and not entry.name.endswith(EXPLAIN_SUMMARY_SUFFIX):
  240. return False
  241. if not list_explain and entry.name.endswith(EXPLAIN_SUMMARY_SUFFIX):
  242. return False
  243. return True
  244. def is_summary_directory(self, summary_base_dir, relative_path):
  245. """
  246. Check if the given summary directory is valid.
  247. Args:
  248. summary_base_dir (str): Path of summary base directory.
  249. relative_path (str): Relative path of summary directory, referring to summary base directory,
  250. starting with "./" .
  251. Returns:
  252. bool, indicates if the given summary directory is valid.
  253. Examples:
  254. >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
  255. >>> summary_watcher = SummaryWatcher()
  256. >>> summaries = summary_watcher.is_summary_directory('/summary/base/dir', './job-01')
  257. """
  258. if contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path):
  259. return False
  260. if not self._is_valid_summary_directory(summary_base_dir, relative_path):
  261. return False
  262. summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path))
  263. try:
  264. entries = os.scandir(summary_directory)
  265. except PermissionError:
  266. logger.error('Path of summary base directory is not accessible.')
  267. raise FileSystemPermissionError('Path of summary base directory is not accessible.')
  268. for entry in entries:
  269. if entry.is_symlink():
  270. continue
  271. summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
  272. if summary_pattern is not None and entry.is_file():
  273. return True
  274. pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
  275. if pb_pattern is not None and entry.is_file():
  276. return True
  277. if entry.is_dir():
  278. profiler_pattern = re.search(self.PROFILER_DIRECTORY_REGEX, entry.name)
  279. cluster_profiler_pattern = re.search(self.CLUSTER_PROFILER_DIRECTORY_REGEX, entry.name)
  280. if profiler_pattern is not None or cluster_profiler_pattern is not None:
  281. full_path = os.path.realpath(os.path.join(summary_directory, entry.name))
  282. if self._is_valid_profiler_directory(full_path)[0] or \
  283. self._is_valid_cluster_profiler_directory(full_path)[0]:
  284. return True
  285. return False
  286. def _is_valid_profiler_directory(self, directory):
  287. profiler_type = ""
  288. try:
  289. from mindinsight.profiler.common.util import analyse_device_list_from_profiler_dir
  290. device_list, profiler_type = analyse_device_list_from_profiler_dir(directory)
  291. except ImportError:
  292. device_list = []
  293. return bool(device_list), profiler_type
  294. def _is_valid_cluster_profiler_directory(self, directory):
  295. """Determine whether it is a valid cluster profiler."""
  296. cluster_profiler_type = 'cluster'
  297. entries = os.scandir(directory)
  298. for entry in entries:
  299. if entry.is_symlink():
  300. continue
  301. if entry.is_dir():
  302. full_path = os.path.join(directory, entry.name, 'profiler')
  303. is_profile, profiler_type = self._is_valid_profiler_directory(full_path)
  304. if is_profile:
  305. return is_profile, cluster_profiler_type + '_' + profiler_type
  306. return False, cluster_profiler_type
  307. def list_summary_directories_by_pagination(self, summary_base_dir, offset=0, limit=10):
  308. """
  309. List summary directories within base directory.
  310. Args:
  311. summary_base_dir (str): Path of summary base directory.
  312. offset (int): An offset for page. Ex, offset is 0, mean current page is 1. Default value is 0.
  313. limit (int): The max data items for per page. Default value is 10.
  314. Returns:
  315. tuple[total, directories], total indicates the overall number of summary directories and directories
  316. indicate list of summary directory info including the following attributes.
  317. - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
  318. starting with "./".
  319. - create_time (datetime): Creation time of summary file.
  320. - update_time (datetime): Modification time of summary file.
  321. Raises:
  322. ParamValueError, if offset < 0 or limit is out of valid value range.
  323. ParamTypeError, if offset or limit is not valid integer.
  324. Examples:
  325. >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
  326. >>> summary_watcher = SummaryWatcher()
  327. >>> total, directories = summary_watcher.list_summary_directories_by_pagination(
  328. '/summary/base/dir', offset=0, limit=10)
  329. """
  330. offset = Validation.check_offset(offset=offset)
  331. limit = Validation.check_limit(limit, min_value=1, max_value=999)
  332. directories = self.list_summary_directories(summary_base_dir, overall=False)
  333. return len(directories), directories[offset * limit:(offset + 1) * limit]
  334. def list_summaries(self, summary_base_dir, relative_path='./'):
  335. """
  336. Get info of latest summary file within the given summary directory.
  337. Args:
  338. summary_base_dir (str): Path of summary base directory.
  339. relative_path (str): Relative path of summary directory, referring to summary base directory,
  340. starting with "./" .
  341. Returns:
  342. list, list of summary file including the following attributes.
  343. - file_name (str): Summary file name.
  344. - create_time (datetime): Creation time of summary file.
  345. - update_time (datetime): Modification time of summary file.
  346. Examples:
  347. >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
  348. >>> summary_watcher = SummaryWatcher()
  349. >>> summaries = summary_watcher.list_summaries('/summary/base/dir', './job-01')
  350. """
  351. if contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path):
  352. return []
  353. if not self._is_valid_summary_directory(summary_base_dir, relative_path):
  354. return []
  355. summaries = []
  356. summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path))
  357. try:
  358. entries = os.scandir(summary_directory)
  359. except PermissionError:
  360. logger.error('Path of summary directory is not accessible.')
  361. raise FileSystemPermissionError('Path of summary directory is not accessible.')
  362. for entry in entries:
  363. if entry.is_symlink() or not entry.is_file():
  364. continue
  365. pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
  366. if pattern is None:
  367. continue
  368. timestamp = int(pattern.groupdict().get('timestamp'))
  369. try:
  370. # extract created time from filename
  371. ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
  372. except OverflowError:
  373. continue
  374. try:
  375. stat = entry.stat()
  376. except FileNotFoundError:
  377. logger.warning('File %s not found.', entry.name)
  378. continue
  379. mtime = datetime.datetime.fromtimestamp(stat.st_mtime).astimezone()
  380. summaries.append({
  381. 'file_name': entry.name,
  382. 'create_time': ctime,
  383. 'update_time': mtime,
  384. })
  385. # sort by update time in descending order and filename in ascending order
  386. summaries.sort(key=lambda x: (-int(x['update_time'].timestamp()), x['file_name']))
  387. return summaries
  388. def list_explain_directories(self, summary_base_dir, offset=0, limit=None):
  389. """
  390. List explain directories within base directory.
  391. Args:
  392. summary_base_dir (str): Path of summary base directory.
  393. offset (int): An offset for page. Ex, offset is 0, mean current page is 1. Default value is 0.
  394. limit (int): The max data items for per page. Default value is 10.
  395. Returns:
  396. tuple[total, directories], total indicates the overall number of explain directories and directories
  397. indicate list of summary directory info including the following attributes.
  398. - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
  399. starting with "./".
  400. - create_time (datetime): Creation time of summary file.
  401. - update_time (datetime): Modification time of summary file.
  402. Raises:
  403. ParamValueError, if offset < 0 or limit is out of valid value range.
  404. ParamTypeError, if offset or limit is not valid integer.
  405. Examples:
  406. >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
  407. >>> summary_watcher = SummaryWatcher()
  408. >>> total, directories = summary_watcher.list_explain_directories('/summary/base/dir', offset=0, limit=10)
  409. """
  410. offset = Validation.check_offset(offset=offset)
  411. limit = Validation.check_limit(limit, min_value=1, max_value=999, default_value=None)
  412. directories = self.list_summary_directories(summary_base_dir, overall=False, list_explain=True)
  413. if limit is None:
  414. return len(directories), directories
  415. return len(directories), directories[offset * limit:(offset + 1) * limit]
  416. def _new_entry(ctime, mtime, profiler=None):
  417. """Create a new entry."""
  418. return {
  419. 'create_time': ctime,
  420. 'update_time': mtime,
  421. 'summary_files': 0,
  422. 'lineage_files': 0,
  423. 'explain_files': 0,
  424. 'graph_files': 0,
  425. 'profiler': profiler
  426. }
  427. def _get_explain_job_info(summary_base_dir, relative_path, timestamp):
  428. """Get explain job info."""
  429. if timestamp is None:
  430. job_dict = {"saliency_map": False, "hierarchical_occlusion": False}
  431. return job_dict
  432. json_path = os.path.join(summary_base_dir, relative_path.lstrip("./"), f"_explain_{timestamp}",
  433. "manifest.json")
  434. if os.path.exists(json_path):
  435. with open(json_path, "r") as f:
  436. job_dict = json.load(f)
  437. return job_dict
  438. # Set default value to make it compatible with previous version
  439. job_dict = {"saliency_map": True, "hierarchical_occlusion": False}
  440. return job_dict