You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

file.py 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. # Copyright (c) Alibaba, Inc. and its affiliates.
  2. import contextlib
  3. import os
  4. import tempfile
  5. from abc import ABCMeta, abstractmethod
  6. from pathlib import Path
  7. from typing import Generator, Union
  8. import requests
  9. class Storage(metaclass=ABCMeta):
  10. """Abstract class of storage.
  11. All backends need to implement two apis: ``read()`` and ``read_text()``.
  12. ``read()`` reads the file as a byte stream and ``read_text()`` reads
  13. the file as texts.
  14. """
  15. @abstractmethod
  16. def read(self, filepath: str):
  17. pass
  18. @abstractmethod
  19. def read_text(self, filepath: str):
  20. pass
  21. @abstractmethod
  22. def write(self, obj: bytes, filepath: Union[str, Path]) -> None:
  23. pass
  24. @abstractmethod
  25. def write_text(self,
  26. obj: str,
  27. filepath: Union[str, Path],
  28. encoding: str = 'utf-8') -> None:
  29. pass
  30. class LocalStorage(Storage):
  31. """Local hard disk storage"""
  32. def read(self, filepath: Union[str, Path]) -> bytes:
  33. """Read data from a given ``filepath`` with 'rb' mode.
  34. Args:
  35. filepath (str or Path): Path to read data.
  36. Returns:
  37. bytes: Expected bytes object.
  38. """
  39. with open(filepath, 'rb') as f:
  40. content = f.read()
  41. return content
  42. def read_text(self,
  43. filepath: Union[str, Path],
  44. encoding: str = 'utf-8') -> str:
  45. """Read data from a given ``filepath`` with 'r' mode.
  46. Args:
  47. filepath (str or Path): Path to read data.
  48. encoding (str): The encoding format used to open the ``filepath``.
  49. Default: 'utf-8'.
  50. Returns:
  51. str: Expected text reading from ``filepath``.
  52. """
  53. with open(filepath, 'r', encoding=encoding) as f:
  54. value_buf = f.read()
  55. return value_buf
  56. def write(self, obj: bytes, filepath: Union[str, Path]) -> None:
  57. """Write data to a given ``filepath`` with 'wb' mode.
  58. Note:
  59. ``put`` will create a directory if the directory of ``filepath``
  60. does not exist.
  61. Args:
  62. obj (bytes): Data to be written.
  63. filepath (str or Path): Path to write data.
  64. """
  65. dirname = os.path.dirname(filepath)
  66. if dirname and not os.path.exists(dirname):
  67. os.makedirs(dirname)
  68. with open(filepath, 'wb') as f:
  69. f.write(obj)
  70. def write_text(self,
  71. obj: str,
  72. filepath: Union[str, Path],
  73. encoding: str = 'utf-8') -> None:
  74. """Write data to a given ``filepath`` with 'w' mode.
  75. Note:
  76. ``put_text`` will create a directory if the directory of
  77. ``filepath`` does not exist.
  78. Args:
  79. obj (str): Data to be written.
  80. filepath (str or Path): Path to write data.
  81. encoding (str): The encoding format used to open the ``filepath``.
  82. Default: 'utf-8'.
  83. """
  84. dirname = os.path.dirname(filepath)
  85. if dirname and not os.path.exists(dirname):
  86. os.makedirs(dirname)
  87. with open(filepath, 'w', encoding=encoding) as f:
  88. f.write(obj)
  89. @contextlib.contextmanager
  90. def as_local_path(
  91. self,
  92. filepath: Union[str,
  93. Path]) -> Generator[Union[str, Path], None, None]:
  94. """Only for unified API and do nothing."""
  95. yield filepath
  96. class HTTPStorage(Storage):
  97. """HTTP and HTTPS storage."""
  98. def read(self, url):
  99. # TODO @wenmeng.zwm add progress bar if file is too large
  100. r = requests.get(url)
  101. r.raise_for_status()
  102. return r.content
  103. def read_text(self, url):
  104. r = requests.get(url)
  105. r.raise_for_status()
  106. return r.text
  107. @contextlib.contextmanager
  108. def as_local_path(
  109. self, filepath: str) -> Generator[Union[str, Path], None, None]:
  110. """Download a file from ``filepath``.
  111. ``as_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
  112. can be called with ``with`` statement, and when exists from the
  113. ``with`` statement, the temporary path will be released.
  114. Args:
  115. filepath (str): Download a file from ``filepath``.
  116. Examples:
  117. >>> storage = HTTPStorage()
  118. >>> # After existing from the ``with`` clause,
  119. >>> # the path will be removed
  120. >>> with storage.get_local_path('http://path/to/file') as path:
  121. ... # do something here
  122. """
  123. try:
  124. f = tempfile.NamedTemporaryFile(delete=False)
  125. f.write(self.read(filepath))
  126. f.close()
  127. yield f.name
  128. finally:
  129. os.remove(f.name)
  130. def write(self, obj: bytes, url: Union[str, Path]) -> None:
  131. raise NotImplementedError('write is not supported by HTTP Storage')
  132. def write_text(self,
  133. obj: str,
  134. url: Union[str, Path],
  135. encoding: str = 'utf-8') -> None:
  136. raise NotImplementedError(
  137. 'write_text is not supported by HTTP Storage')
  138. class OSSStorage(Storage):
  139. """OSS storage."""
  140. def __init__(self, oss_config_file=None):
  141. # read from config file or env var
  142. raise NotImplementedError(
  143. 'OSSStorage.__init__ to be implemented in the future')
  144. def read(self, filepath):
  145. raise NotImplementedError(
  146. 'OSSStorage.read to be implemented in the future')
  147. def read_text(self, filepath, encoding='utf-8'):
  148. raise NotImplementedError(
  149. 'OSSStorage.read_text to be implemented in the future')
  150. @contextlib.contextmanager
  151. def as_local_path(
  152. self, filepath: str) -> Generator[Union[str, Path], None, None]:
  153. """Download a file from ``filepath``.
  154. ``as_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
  155. can be called with ``with`` statement, and when exists from the
  156. ``with`` statement, the temporary path will be released.
  157. Args:
  158. filepath (str): Download a file from ``filepath``.
  159. Examples:
  160. >>> storage = OSSStorage()
  161. >>> # After existing from the ``with`` clause,
  162. >>> # the path will be removed
  163. >>> with storage.get_local_path('http://path/to/file') as path:
  164. ... # do something here
  165. """
  166. try:
  167. f = tempfile.NamedTemporaryFile(delete=False)
  168. f.write(self.read(filepath))
  169. f.close()
  170. yield f.name
  171. finally:
  172. os.remove(f.name)
  173. def write(self, obj: bytes, filepath: Union[str, Path]) -> None:
  174. raise NotImplementedError(
  175. 'OSSStorage.write to be implemented in the future')
  176. def write_text(self,
  177. obj: str,
  178. filepath: Union[str, Path],
  179. encoding: str = 'utf-8') -> None:
  180. raise NotImplementedError(
  181. 'OSSStorage.write_text to be implemented in the future')
  182. G_STORAGES = {}
  183. class File(object):
  184. _prefix_to_storage: dict = {
  185. 'oss': OSSStorage,
  186. 'http': HTTPStorage,
  187. 'https': HTTPStorage,
  188. 'local': LocalStorage,
  189. }
  190. @staticmethod
  191. def _get_storage(uri):
  192. assert isinstance(uri,
  193. str), f'uri should be str type, buf got {type(uri)}'
  194. if '://' not in uri:
  195. # local path
  196. storage_type = 'local'
  197. else:
  198. prefix, _ = uri.split('://')
  199. storage_type = prefix
  200. assert storage_type in File._prefix_to_storage, \
  201. f'Unsupported uri {uri}, valid prefixs: '\
  202. f'{list(File._prefix_to_storage.keys())}'
  203. if storage_type not in G_STORAGES:
  204. G_STORAGES[storage_type] = File._prefix_to_storage[storage_type]()
  205. return G_STORAGES[storage_type]
  206. @staticmethod
  207. def read(uri: str) -> bytes:
  208. """Read data from a given ``filepath`` with 'rb' mode.
  209. Args:
  210. filepath (str or Path): Path to read data.
  211. Returns:
  212. bytes: Expected bytes object.
  213. """
  214. storage = File._get_storage(uri)
  215. return storage.read(uri)
  216. @staticmethod
  217. def read_text(uri: Union[str, Path], encoding: str = 'utf-8') -> str:
  218. """Read data from a given ``filepath`` with 'r' mode.
  219. Args:
  220. filepath (str or Path): Path to read data.
  221. encoding (str): The encoding format used to open the ``filepath``.
  222. Default: 'utf-8'.
  223. Returns:
  224. str: Expected text reading from ``filepath``.
  225. """
  226. storage = File._get_storage(uri)
  227. return storage.read_text(uri)
  228. @staticmethod
  229. def write(obj: bytes, uri: Union[str, Path]) -> None:
  230. """Write data to a given ``filepath`` with 'wb' mode.
  231. Note:
  232. ``put`` will create a directory if the directory of ``filepath``
  233. does not exist.
  234. Args:
  235. obj (bytes): Data to be written.
  236. filepath (str or Path): Path to write data.
  237. """
  238. storage = File._get_storage(uri)
  239. return storage.write(obj, uri)
  240. @staticmethod
  241. def write_text(obj: str, uri: str, encoding: str = 'utf-8') -> None:
  242. """Write data to a given ``filepath`` with 'w' mode.
  243. Note:
  244. ``put_text`` will create a directory if the directory of
  245. ``filepath`` does not exist.
  246. Args:
  247. obj (str): Data to be written.
  248. filepath (str or Path): Path to write data.
  249. encoding (str): The encoding format used to open the ``filepath``.
  250. Default: 'utf-8'.
  251. """
  252. storage = File._get_storage(uri)
  253. return storage.write_text(obj, uri)
  254. @contextlib.contextmanager
  255. def as_local_path(uri: str) -> Generator[Union[str, Path], None, None]:
  256. """Only for unified API and do nothing."""
  257. storage = File._get_storage(uri)
  258. with storage.as_local_path(uri) as local_path:
  259. yield local_path

致力于通过开放的社区合作,开源AI模型以及相关创新技术,推动基于模型即服务的生态繁荣发展