|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290 |
- # Copyright 2019 Huawei Technologies Co., Ltd
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # ============================================================================
- """File handler for file operations."""
- from mindinsight.utils.exceptions import PathNotExistError
- from mindinsight.datavisual.common.log import logger
- from mindinsight.datavisual.utils.tools import to_str
- from mindinsight.datavisual.data_access.local_file_system import LocalFileSystem
-
- _DEFAULT_BUFFER_SIZE = 24 * 1024 * 1024
-
- # _FILE_SYSTEMS, key: FileProtocolHead, value: FileSystem
- _FILE_SYSTEMS = dict()
- _FILE_SYSTEMS[""] = LocalFileSystem()
-
-
- class FileHandler:
- """File handler."""
-
- def __init__(self, file_path, mode='rb'):
- """
- Init FileHandler.
-
- Args:
- file_path (str): File path.
- mode (Literal['r', 'rb', 'br', 'w', 'wb', 'bw']): It must be
- in ['r', 'rb', 'br', 'w', 'wb', 'bw'].
- """
- logger.debug("The __init__ method enter, param: file_path=%s"
- "mode=%s", file_path, mode)
-
- if mode not in ('r', 'rb', 'br', 'w', 'wb', 'bw'):
- raise ValueError("mode %s is not supported by FileHandler." % mode)
-
- self._file_path = to_str(file_path)
- self._file_system = self.get_file_system(self._file_path)
- self._buff_chunk_size = _DEFAULT_BUFFER_SIZE
- self._buff = None
- self._buff_offset = 0
- self._offset = 0
- self._binary_mode = 'b' in mode
-
- @staticmethod
- def get_file_system(path):
- """
- Get file system object from path.
-
- Args:
- path (str): Directory path or file path.
-
- Returns:
- BaseFileSystem, a file system object.
- """
- path = to_str(path)
- prefix_index = path.find("://")
- prefix = path[:prefix_index] if prefix_index >= 0 else ""
- file_system = _FILE_SYSTEMS.get(prefix, None)
-
- if file_system is None:
- raise ValueError("No filesystem can be found for prefix %s" % prefix)
- return file_system
-
- @staticmethod
- def walk(node, forward=True, onerror=None):
- """
- Traverse path for directory and file tree.
-
- Read from the buffer first.If there is not enough data in the buffer,
- data will be read from the file system.
-
- Args:
- node (str): Current path.
- forward (bool): If True, it will return the sub-directories and files in the top-level
- directory first and then iterate the files in the sub-directories. Default: True.
- onerror (Optional[Callable]): If None, it indicates that errors during file traversal
- will be ignored. Default: None.
-
- Yields:
- Tuple, (node, sub_dirs, files).
-
- """
- logger.debug("The walk method enter, param: node=%s, "
- "forward=%s, onerror=%s.", node, forward, type(onerror))
-
- file_system = FileHandler.get_file_system(node)
- node = to_str(node)
- dirs = []
-
- try:
- dirs = file_system.list_dir(node)
- except PathNotExistError as err:
- if onerror:
- onerror(err)
- else:
- logger.warning("Get dir list error, dir_path=%s error=%s.", node, str(err))
- return
-
- sub_dirs, files = [], []
- for item in dirs:
- full_path = file_system.join(node, to_str(item))
- if file_system.is_dir(full_path):
- sub_dirs.append(item)
- else:
- files.append(item)
-
- result = (node, sub_dirs, files)
-
- if forward:
- logger.debug("The walk method return, pre result=%s.", result)
- yield result
-
- for subdir in sub_dirs:
- joined_subdir = file_system.join(node, to_str(subdir))
- for sub_results in FileHandler.walk(joined_subdir, forward, onerror):
- yield sub_results
-
- if not forward:
- logger.debug("The walk method return, post result=%s.", result)
- yield result
-
- def read(self, size=None):
- """
- Read bytes from buffer or file by size.
-
- Args:
- size (Union[None, int]): Number of bytes to read, If set None, read the whole file. Default: None.
-
- Returns:
- str, a certain number of bytes.
- """
- if size is None:
- result = self._file_system.read(self._file_path, self._binary_mode)
- self._offset = len(result)
- return result
-
- result = None
- if self._buff and len(self._buff) > self._buff_offset:
- read_offset = self._buff_offset + size if size is not None else len(self._buff)
- result = self._read_buffer_by_offset(read_offset)
- if size is not None:
- if len(result) == size:
- return result
- size -= len(result)
-
- read_size = max(self._buff_chunk_size, size) if size is not None else None
- self._buff = self._file_system.read(self._file_path, self._binary_mode,
- read_size, self._offset)
- self._buff_offset = 0
-
- read_offset = size if size is not None else len(self._buff)
- chunk = self._read_buffer_by_offset(read_offset)
-
- result = result + chunk if result else chunk
-
- return result
-
- def _read_buffer_by_offset(self, new_buff_offset):
- """
- Read buffer by offset.
-
- Args:
- new_buff_offset (int): Ending offset to read.
-
- Returns:
- str, bytes from old offset to new offset.
-
- """
- old_buff_offset = self._buff_offset
- read_size = min(len(self._buff), new_buff_offset) - old_buff_offset
- self._offset += read_size
- self._buff_offset += read_size
- return self._buff[old_buff_offset:old_buff_offset + read_size]
-
- def reset_offset(self, offset):
- """
- Reset offset and buff_offset, clean buff.
-
- Args:
- offset (int): Offset.
-
- """
- self._offset = offset
- self._buff = None
- self._buff_offset = 0
-
- @staticmethod
- def list_dir(path):
- """
- List directories by path.
-
- Args:
- path (str): Directory path or file path.
-
- Returns:
- list[str], directories.
- """
- file_system = FileHandler.get_file_system(path)
- return file_system.list_dir(path)
-
- @staticmethod
- def is_dir(path):
- """
- Determine if it is a directory.
-
- Args:
- path (str): Directory path or file path.
-
- Returns:
- bool, if it is a directory path, return True.
- """
- file_system = FileHandler.get_file_system(path)
- return file_system.is_dir(path)
-
- @staticmethod
- def is_file(path):
- """
- Determine if it is a file.
-
- Args:
- path (str): Directory path or file path.
-
- Returns:
- bool, if it is a file path, return True.
- """
- file_system = FileHandler.get_file_system(path)
- return file_system.is_file(path)
-
- @staticmethod
- def exists(path):
- """
- Determine if it exists.
-
- Args:
- path (str): Directory path or file path.
-
- Returns:
- bool, if it exists, return True.
- """
- file_system = FileHandler.get_file_system(path)
- return file_system.exists(path)
-
- @staticmethod
- def file_stat(file_path):
- """
- Get file stat information.
-
- Args:
- file_path (str): File path.
-
- Returns:
- Nametuple, the (size, mtime) of file.
- """
- file_system = FileHandler.get_file_system(file_path)
- return file_system.file_stat(file_path)
-
- @staticmethod
- def join(path, *paths):
- """
- Join paths.
-
- Args:
- path (str): Directory path.
- paths (str): Path or paths.
-
- Returns:
- str, the joined path.
- """
- file_system = FileHandler.get_file_system(path)
- return file_system.join(path, *paths)
-
- @property
- def offset(self):
- """Get offset."""
- return self._offset
-
- @property
- def file_path(self):
- """Get file path."""
- return self._file_path
|