You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

file_handler.py 8.9 kB

5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """File handler for file operations."""
  16. from mindinsight.utils.exceptions import PathNotExistError
  17. from mindinsight.datavisual.common.log import logger
  18. from mindinsight.datavisual.utils.tools import to_str
  19. from mindinsight.datavisual.data_access.local_file_system import LocalFileSystem
  20. _DEFAULT_BUFFER_SIZE = 24 * 1024 * 1024
  21. # _FILE_SYSTEMS, key: FileProtocolHead, value: FileSystem
  22. _FILE_SYSTEMS = dict()
  23. _FILE_SYSTEMS[""] = LocalFileSystem()
  24. class FileHandler:
  25. """File handler."""
  26. def __init__(self, file_path, mode='rb'):
  27. """
  28. Init FileHandler.
  29. Args:
  30. file_path (str): File path.
  31. mode (Literal['r', 'rb', 'br', 'w', 'wb', 'bw']): It must be
  32. in ['r', 'rb', 'br', 'w', 'wb', 'bw'].
  33. """
  34. logger.debug("The __init__ method enter, param: file_path=%s"
  35. "mode=%s", file_path, mode)
  36. if mode not in ('r', 'rb', 'br', 'w', 'wb', 'bw'):
  37. raise ValueError("mode %s is not supported by FileHandler." % mode)
  38. self._file_path = to_str(file_path)
  39. self._file_system = self.get_file_system(self._file_path)
  40. self._buff_chunk_size = _DEFAULT_BUFFER_SIZE
  41. self._buff = None
  42. self._buff_offset = 0
  43. self._offset = 0
  44. self._binary_mode = 'b' in mode
  45. @staticmethod
  46. def get_file_system(path):
  47. """
  48. Get file system object from path.
  49. Args:
  50. path (str): Directory path or file path.
  51. Returns:
  52. BaseFileSystem, a file system object.
  53. """
  54. path = to_str(path)
  55. prefix_index = path.find("://")
  56. prefix = path[:prefix_index] if prefix_index >= 0 else ""
  57. file_system = _FILE_SYSTEMS.get(prefix, None)
  58. if file_system is None:
  59. raise ValueError("No filesystem can be found for prefix %s" % prefix)
  60. return file_system
  61. @staticmethod
  62. def walk(node, forward=True, onerror=None):
  63. """
  64. Traverse path for directory and file tree.
  65. Read from the buffer first.If there is not enough data in the buffer,
  66. data will be read from the file system.
  67. Args:
  68. node (str): Current path.
  69. forward (bool): If True, it will return the sub-directories and files in the top-level
  70. directory first and then iterate the files in the sub-directories. Default: True.
  71. onerror (Optional[Callable]): If None, it indicates that errors during file traversal
  72. will be ignored. Default: None.
  73. Yields:
  74. Tuple, (node, sub_dirs, files).
  75. """
  76. logger.debug("The walk method enter, param: node=%s, "
  77. "forward=%s, onerror=%s.", node, forward, type(onerror))
  78. file_system = FileHandler.get_file_system(node)
  79. node = to_str(node)
  80. dirs = []
  81. try:
  82. dirs = file_system.list_dir(node)
  83. except PathNotExistError as err:
  84. if onerror:
  85. onerror(err)
  86. else:
  87. logger.warning("Get dir list error, dir_path=%s error=%s.", node, str(err))
  88. return
  89. sub_dirs, files = [], []
  90. for item in dirs:
  91. full_path = file_system.join(node, to_str(item))
  92. if file_system.is_dir(full_path):
  93. sub_dirs.append(item)
  94. else:
  95. files.append(item)
  96. result = (node, sub_dirs, files)
  97. if forward:
  98. logger.debug("The walk method return, pre result=%s.", result)
  99. yield result
  100. for subdir in sub_dirs:
  101. joined_subdir = file_system.join(node, to_str(subdir))
  102. for sub_results in FileHandler.walk(joined_subdir, forward, onerror):
  103. yield sub_results
  104. if not forward:
  105. logger.debug("The walk method return, post result=%s.", result)
  106. yield result
  107. def read(self, size=None):
  108. """
  109. Read bytes from buffer or file by size.
  110. Args:
  111. size (Union[None, int]): Number of bytes to read, If set None, read the whole file. Default: None.
  112. Returns:
  113. str, a certain number of bytes.
  114. """
  115. if size is None:
  116. result = self._file_system.read(self._file_path, self._binary_mode)
  117. self._offset = len(result)
  118. return result
  119. result = None
  120. if self._buff and len(self._buff) > self._buff_offset:
  121. read_offset = self._buff_offset + size if size is not None else len(self._buff)
  122. result = self._read_buffer_by_offset(read_offset)
  123. if size is not None:
  124. if len(result) == size:
  125. return result
  126. size -= len(result)
  127. read_size = max(self._buff_chunk_size, size) if size is not None else None
  128. self._buff = self._file_system.read(self._file_path, self._binary_mode,
  129. read_size, self._offset)
  130. self._buff_offset = 0
  131. read_offset = size if size is not None else len(self._buff)
  132. chunk = self._read_buffer_by_offset(read_offset)
  133. result = result + chunk if result else chunk
  134. return result
  135. def _read_buffer_by_offset(self, new_buff_offset):
  136. """
  137. Read buffer by offset.
  138. Args:
  139. new_buff_offset (int): Ending offset to read.
  140. Returns:
  141. str, bytes from old offset to new offset.
  142. """
  143. old_buff_offset = self._buff_offset
  144. read_size = min(len(self._buff), new_buff_offset) - old_buff_offset
  145. self._offset += read_size
  146. self._buff_offset += read_size
  147. return self._buff[old_buff_offset:old_buff_offset + read_size]
  148. def reset_offset(self, offset):
  149. """
  150. Reset offset and buff_offset, clean buff.
  151. Args:
  152. offset (int): Offset.
  153. """
  154. self._offset = offset
  155. self._buff = None
  156. self._buff_offset = 0
  157. @staticmethod
  158. def list_dir(path):
  159. """
  160. List directories by path.
  161. Args:
  162. path (str): Directory path or file path.
  163. Returns:
  164. list[str], directories.
  165. """
  166. file_system = FileHandler.get_file_system(path)
  167. return file_system.list_dir(path)
  168. @staticmethod
  169. def is_dir(path):
  170. """
  171. Determine if it is a directory.
  172. Args:
  173. path (str): Directory path or file path.
  174. Returns:
  175. bool, if it is a directory path, return True.
  176. """
  177. file_system = FileHandler.get_file_system(path)
  178. return file_system.is_dir(path)
  179. @staticmethod
  180. def is_file(path):
  181. """
  182. Determine if it is a file.
  183. Args:
  184. path (str): Directory path or file path.
  185. Returns:
  186. bool, if it is a file path, return True.
  187. """
  188. file_system = FileHandler.get_file_system(path)
  189. return file_system.is_file(path)
  190. @staticmethod
  191. def exists(path):
  192. """
  193. Determine if it exists.
  194. Args:
  195. path (str): Directory path or file path.
  196. Returns:
  197. bool, if it exists, return True.
  198. """
  199. file_system = FileHandler.get_file_system(path)
  200. return file_system.exists(path)
  201. @staticmethod
  202. def file_stat(file_path):
  203. """
  204. Get file stat information.
  205. Args:
  206. file_path (str): File path.
  207. Returns:
  208. Nametuple, the (size, mtime) of file.
  209. """
  210. file_system = FileHandler.get_file_system(file_path)
  211. return file_system.file_stat(file_path)
  212. @staticmethod
  213. def join(path, *paths):
  214. """
  215. Join paths.
  216. Args:
  217. path (str): Directory path.
  218. paths (str): Path or paths.
  219. Returns:
  220. str, the joined path.
  221. """
  222. file_system = FileHandler.get_file_system(path)
  223. return file_system.join(path, *paths)
  224. @property
  225. def offset(self):
  226. """Get offset."""
  227. return self._offset
  228. @property
  229. def file_path(self):
  230. """Get file path."""
  231. return self._file_path

MindInsight为MindSpore提供了简单易用的调优调试能力。在训练过程中,可以将标量、张量、图像、计算图、模型超参、训练耗时等数据记录到文件中,通过MindInsight可视化页面进行查看及分析。

Contributors (1)