You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

validate_path.py 6.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Validate the input path."""
  16. import os
  17. import re
  18. from typing import Union, List
  19. from urllib.parse import unquote
  20. from marshmallow import ValidationError
  21. from mindinsight.profiler.common.exceptions.exceptions import \
  22. ProfilerParamValueErrorException, ProfilerDirNotFoundException
  23. from mindinsight.datavisual.common.exceptions import TrainJobNotExistError
  24. from mindinsight.profiler.common.log import logger as log
  25. def safe_normalize_path(
  26. path,
  27. raise_key,
  28. safe_prefixes: Union[None, List[str]],
  29. check_absolute_path=False,
  30. allow_parent_dir=False,
  31. ):
  32. """
  33. Returns safe normalized path.
  34. This func validates given path, and returns its normalized form. If
  35. safe_prefixes is given, this func will check whether the path is safe.
  36. Note:
  37. This func is not compatible with windows.
  38. Caller should check returned path to ensure safety according to
  39. business logic.
  40. File scheme (rfc8089) is currently not supported.
  41. Args:
  42. path (str): Path to be normalized.
  43. raise_key (str): The exception raise key
  44. safe_prefixes (list[str]): If not none, path must startswith one of the
  45. safe_prefixes. Set this arg to [] will cause all paths considered
  46. unsafe. Normally, prefix in this arg should end with "/".
  47. check_absolute_path (bool): Whether check path is absolute.
  48. allow_parent_dir (bool): Whether allow parent dir in path.
  49. Returns:
  50. str, normalized path.
  51. """
  52. normalized_path = validate_and_normalize_path(
  53. path,
  54. raise_key=raise_key,
  55. check_absolute_path=check_absolute_path,
  56. allow_parent_dir=allow_parent_dir,
  57. )
  58. if safe_prefixes is None:
  59. return normalized_path
  60. normalized_str = str(normalized_path)
  61. for prefix in safe_prefixes:
  62. if normalized_str.startswith(prefix):
  63. return normalized_path
  64. raise ValidationError({raise_key: {"The path is invalid!"}})
  65. def validate_and_normalize_path(
  66. path,
  67. raise_key,
  68. check_absolute_path=False,
  69. allow_parent_dir=False,
  70. ):
  71. """
  72. Validates path and returns its normalized form.
  73. If path has a valid scheme, treat path as url, otherwise consider path a
  74. unix local path.
  75. Note:
  76. File scheme (rfc8089) is currently not supported.
  77. Args:
  78. path (str): Path to be normalized.
  79. raise_key (str): The exception raise key.
  80. check_absolute_path (bool): Whether check path scheme is supported.
  81. allow_parent_dir (bool): Whether allow parent dir in path.
  82. Returns:
  83. str, normalized path.
  84. """
  85. if not path:
  86. raise ValidationError({raise_key: {"The path is invalid!"}})
  87. path_str = str(path)
  88. if not allow_parent_dir:
  89. path_components = path_str.split("/")
  90. if ".." in path_components:
  91. raise ValidationError({raise_key: {"The path is invalid!"}})
  92. # path does not have valid schema, treat it as unix local path.
  93. if check_absolute_path:
  94. if not path_str.startswith("/"):
  95. raise ValidationError({raise_key: {"The path is invalid!"}})
  96. try:
  97. # most unix systems allow
  98. normalized_path = os.path.realpath(path)
  99. except ValueError:
  100. raise ValidationError({raise_key: {"The path is invalid!"}})
  101. return normalized_path
  102. def validate_and_normalize_profiler_path(summary_dir, summary_base_dir):
  103. """
  104. Validate and normalize profiler path.
  105. Args:
  106. summary_dir (str): The relative path of summary directory.
  107. summary_base_dir (str): The summary base directory.
  108. Returns:
  109. str, normalized path of profiler directory.
  110. """
  111. profiler_directory_pattern = r'^profiler.*'
  112. if not summary_dir:
  113. raise ProfilerParamValueErrorException('The file dir does not exist.')
  114. try:
  115. unquote_path = unquote(summary_dir, errors='strict')
  116. except UnicodeDecodeError:
  117. raise ProfilerParamValueErrorException('Unquote error with strict mode')
  118. train_job_dir = os.path.join(summary_base_dir, unquote_path)
  119. try:
  120. train_job_dir_abs = validate_and_normalize_path(train_job_dir, 'train_job_dir')
  121. except ValidationError:
  122. log.error('train_job dir <%s> is invalid', train_job_dir)
  123. raise ProfilerParamValueErrorException('train_job dir is invalid.')
  124. if not os.path.exists(train_job_dir_abs):
  125. raise TrainJobNotExistError(error_detail=train_job_dir_abs)
  126. try:
  127. profiler_name_list = []
  128. for dir_name in os.listdir(train_job_dir_abs):
  129. search_res = re.search(profiler_directory_pattern, dir_name)
  130. if search_res:
  131. profiler_name_list.append(search_res[0])
  132. profiler_name_list.sort()
  133. profiler_name_newest = profiler_name_list[-1]
  134. profiler_dir = os.path.join(summary_base_dir, unquote_path, profiler_name_newest)
  135. except ValidationError:
  136. log.error('no valid profiler dir under <%s>', train_job_dir_abs)
  137. raise ProfilerDirNotFoundException('Profiler dir not found.')
  138. try:
  139. profiler_dir = validate_and_normalize_path(profiler_dir, 'profiler')
  140. except ValidationError:
  141. log.error('profiler dir <%s> is invalid', profiler_dir)
  142. raise ProfilerParamValueErrorException('Profiler dir is invalid.')
  143. return profiler_dir