You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

summary_watcher.py 15 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. # Copyright 2020 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Summary watcher module."""
  16. import os
  17. import re
  18. import datetime
  19. from pathlib import Path
  20. from mindinsight.datavisual.common.log import logger
  21. from mindinsight.datavisual.common.validation import Validation
  22. from mindinsight.utils.exceptions import FileSystemPermissionError
  23. class SummaryWatcher:
  24. """SummaryWatcher class."""
  25. SUMMARY_FILENAME_REGEX = r'summary\.(?P<timestamp>\d+)'
  26. PB_FILENAME_REGEX = r'\.pb$'
  27. MAX_SUMMARY_DIR_COUNT = 999
  28. # scan at most 20000 files/directories (approximately 1 seconds)
  29. # if overall=False in SummaryWatcher.list_summary_directories
  30. # to avoid long-time blocking
  31. MAX_SCAN_COUNT = 20000
  32. def list_summary_directories(self, summary_base_dir, overall=True):
  33. """
  34. List summary directories within base directory.
  35. Args:
  36. summary_base_dir (str): Path of summary base directory.
  37. Returns:
  38. list, list of summary directory info, each of which including the following attributes.
  39. - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
  40. starting with "./".
  41. - create_time (datetime): Creation time of summary file.
  42. - update_time (datetime): Modification time of summary file.
  43. Examples:
  44. >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
  45. >>> summary_watcher = SummaryWatcher()
  46. >>> directories = summary_watcher.list_summary_directories('/summary/base/dir')
  47. """
  48. if self._contains_null_byte(summary_base_dir=summary_base_dir):
  49. return []
  50. if not os.path.exists(summary_base_dir):
  51. logger.warning('Path of summary base directory not exists.')
  52. return []
  53. if not os.path.isdir(summary_base_dir):
  54. logger.warning('Path of summary base directory is not a valid directory.')
  55. return []
  56. summary_dict = {}
  57. scan_count = 0
  58. try:
  59. entries = os.scandir(summary_base_dir)
  60. except PermissionError:
  61. logger.error('Path of summary base directory is not accessible.')
  62. raise FileSystemPermissionError('Path of summary base directory is not accessible.')
  63. for entry in entries:
  64. if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
  65. break
  66. relative_path = os.path.join('.', '')
  67. if entry.is_symlink():
  68. pass
  69. elif entry.is_file():
  70. self._update_summary_dict(summary_dict, relative_path, entry)
  71. elif entry.is_dir():
  72. full_path = os.path.realpath(os.path.join(summary_base_dir, entry.name))
  73. try:
  74. subdir_entries = os.scandir(full_path)
  75. except PermissionError:
  76. logger.warning('Path of %s under summary base directory is not accessible.', entry.name)
  77. else:
  78. for subdir_entry in subdir_entries:
  79. if len(summary_dict) == self.MAX_SUMMARY_DIR_COUNT:
  80. break
  81. subdir_relative_path = os.path.join('.', entry.name)
  82. if subdir_entry.is_symlink():
  83. pass
  84. elif subdir_entry.is_file():
  85. self._update_summary_dict(summary_dict, subdir_relative_path, subdir_entry)
  86. scan_count += 1
  87. if not overall and scan_count >= self.MAX_SCAN_COUNT:
  88. break
  89. scan_count += 1
  90. if not overall and scan_count >= self.MAX_SCAN_COUNT:
  91. logger.info('Stop further scanning due to overall is False and '
  92. 'number of scanned files exceeds upper limit.')
  93. break
  94. directories = [{
  95. 'relative_path': key,
  96. 'create_time': value['ctime'],
  97. 'update_time': value['mtime'],
  98. } for key, value in summary_dict.items()]
  99. # sort by update time in descending order and relative path in ascending order
  100. directories.sort(key=lambda x: (-int(x['update_time'].timestamp()), x['relative_path']))
  101. return directories
  102. def _contains_null_byte(self, **kwargs):
  103. """
  104. Check if arg contains null byte.
  105. Args:
  106. kwargs (Any): Check if arg contains null byte.
  107. Returns:
  108. bool, indicates if any arg contains null byte.
  109. """
  110. for key, value in kwargs.items():
  111. if not isinstance(value, str):
  112. continue
  113. if '\x00' in value:
  114. logger.warning('%s contains null byte \\x00.', key)
  115. return True
  116. return False
  117. def _is_valid_summary_directory(self, summary_base_dir, relative_path):
  118. """
  119. Check if the given summary directory is valid.
  120. Args:
  121. summary_base_dir (str): Path of summary base directory.
  122. relative_path (str): Relative path of summary directory, referring to summary base directory,
  123. starting with "./" .
  124. Returns:
  125. bool, indicates if summary directory is valid.
  126. """
  127. summary_base_dir = os.path.realpath(summary_base_dir)
  128. summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path))
  129. if summary_base_dir == summary_directory:
  130. return True
  131. if not os.path.exists(summary_directory):
  132. logger.warning('Path of summary directory not exists.')
  133. return False
  134. if not os.path.isdir(summary_directory):
  135. logger.warning('Path of summary directory is not a valid directory.')
  136. return False
  137. try:
  138. Path(summary_directory).relative_to(Path(summary_base_dir))
  139. except ValueError:
  140. logger.warning('Relative path %s is not subdirectory of summary_base_dir', relative_path)
  141. return False
  142. return True
  143. def _update_summary_dict(self, summary_dict, relative_path, entry):
  144. """
  145. Update summary_dict with ctime and mtime.
  146. Args:
  147. summary_dict (dict): Temporary data structure to hold summary directory info.
  148. relative_path (str): Relative path of summary directory, referring to summary base directory,
  149. starting with "./" .
  150. entry (DirEntry): Directory entry instance needed to check with regular expression.
  151. """
  152. summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
  153. pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
  154. if summary_pattern is None and pb_pattern is None:
  155. return
  156. if summary_pattern is not None:
  157. timestamp = int(summary_pattern.groupdict().get('timestamp'))
  158. try:
  159. # extract created time from filename
  160. ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
  161. except OverflowError:
  162. return
  163. else:
  164. ctime = datetime.datetime.fromtimestamp(entry.stat().st_ctime).astimezone()
  165. # extract modified time from filesystem
  166. mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone()
  167. if relative_path not in summary_dict or summary_dict[relative_path]['ctime'] < ctime:
  168. summary_dict[relative_path] = {
  169. 'ctime': ctime,
  170. 'mtime': mtime,
  171. }
  172. def is_summary_directory(self, summary_base_dir, relative_path):
  173. """
  174. Check if the given summary directory is valid.
  175. Args:
  176. summary_base_dir (str): Path of summary base directory.
  177. relative_path (str): Relative path of summary directory, referring to summary base directory,
  178. starting with "./" .
  179. Returns:
  180. bool, indicates if the given summary directory is valid.
  181. Examples:
  182. >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
  183. >>> summary_watcher = SummaryWatcher()
  184. >>> summaries = summary_watcher.is_summary_directory('/summary/base/dir', './job-01')
  185. """
  186. if self._contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path):
  187. return False
  188. if not self._is_valid_summary_directory(summary_base_dir, relative_path):
  189. return False
  190. summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path))
  191. try:
  192. entries = os.scandir(summary_directory)
  193. except PermissionError:
  194. logger.error('Path of summary base directory is not accessible.')
  195. raise FileSystemPermissionError('Path of summary base directory is not accessible.')
  196. for entry in entries:
  197. if entry.is_symlink() or not entry.is_file():
  198. continue
  199. summary_pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
  200. pb_pattern = re.search(self.PB_FILENAME_REGEX, entry.name)
  201. if summary_pattern or pb_pattern:
  202. return True
  203. return False
  204. def list_summary_directories_by_pagination(self, summary_base_dir, offset=0, limit=10):
  205. """
  206. List summary directories within base directory.
  207. Args:
  208. summary_base_dir (str): Path of summary base directory.
  209. offset (int): An offset for page. Ex, offset is 0, mean current page is 1. Default value is 0.
  210. limit (int): The max data items for per page. Default value is 10.
  211. Returns:
  212. tuple[total, directories], total indicates the overall number of summary directories and directories
  213. indicate list of summary directory info including the following attributes.
  214. - relative_path (str): Relative path of summary directory, referring to settings.SUMMARY_BASE_DIR,
  215. starting with "./".
  216. - create_time (datetime): Creation time of summary file.
  217. - update_time (datetime): Modification time of summary file.
  218. Raises:
  219. ParamValueError, if offset < 0 or limit is out of valid value range.
  220. ParamTypeError, if offset or limit is not valid integer.
  221. Examples:
  222. >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
  223. >>> summary_watcher = SummaryWatcher()
  224. >>> total, directories = summary_watcher.list_summary_directories_by_pagination(
  225. '/summary/base/dir', offset=0, limit=10)
  226. """
  227. offset = Validation.check_offset(offset=offset)
  228. limit = Validation.check_limit(limit, min_value=1, max_value=999)
  229. directories = self.list_summary_directories(summary_base_dir, overall=False)
  230. return len(directories), directories[offset * limit:(offset + 1) * limit]
  231. def list_summaries(self, summary_base_dir, relative_path='./'):
  232. """
  233. Get info of latest summary file within the given summary directory.
  234. Args:
  235. summary_base_dir (str): Path of summary base directory.
  236. relative_path (str): Relative path of summary directory, referring to summary base directory,
  237. starting with "./" .
  238. Returns:
  239. list, list of summary file including the following attributes.
  240. - file_name (str): Summary file name.
  241. - create_time (datetime): Creation time of summary file.
  242. - update_time (datetime): Modification time of summary file.
  243. Examples:
  244. >>> from mindinsight.datavisual.data_transform.summary_watcher import SummaryWatcher
  245. >>> summary_watcher = SummaryWatcher()
  246. >>> summaries = summary_watcher.list_summaries('/summary/base/dir', './job-01')
  247. """
  248. if self._contains_null_byte(summary_base_dir=summary_base_dir, relative_path=relative_path):
  249. return []
  250. if not self._is_valid_summary_directory(summary_base_dir, relative_path):
  251. return []
  252. summaries = []
  253. summary_directory = os.path.realpath(os.path.join(summary_base_dir, relative_path))
  254. try:
  255. entries = os.scandir(summary_directory)
  256. except PermissionError:
  257. logger.error('Path of summary directory is not accessible.')
  258. raise FileSystemPermissionError('Path of summary directory is not accessible.')
  259. for entry in entries:
  260. if entry.is_symlink() or not entry.is_file():
  261. continue
  262. pattern = re.search(self.SUMMARY_FILENAME_REGEX, entry.name)
  263. if pattern is None:
  264. continue
  265. timestamp = int(pattern.groupdict().get('timestamp'))
  266. try:
  267. # extract created time from filename
  268. ctime = datetime.datetime.fromtimestamp(timestamp).astimezone()
  269. except OverflowError:
  270. continue
  271. # extract modified time from filesystem
  272. mtime = datetime.datetime.fromtimestamp(entry.stat().st_mtime).astimezone()
  273. summaries.append({
  274. 'file_name': entry.name,
  275. 'create_time': ctime,
  276. 'update_time': mtime,
  277. })
  278. # sort by update time in descending order and filename in ascending order
  279. summaries.sort(key=lambda x: (-int(x['update_time'].timestamp()), x['file_name']))
  280. return summaries

MindInsight为MindSpore提供了简单易用的调优调试能力。在训练过程中,可以将标量、张量、图像、计算图、模型超参、训练耗时等数据记录到文件中,通过MindInsight可视化页面进行查看及分析。

Contributors (1)