You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

run.py 8.0 kB

5 years ago
5 years ago
5 years ago
5 years ago
5 years ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. # Copyright 2019 Huawei Technologies Co., Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ============================================================================
  15. """Web service entrance."""
  16. import os
  17. import stat
  18. import re
  19. import subprocess
  20. import time
  21. import shlex
  22. from gunicorn.glogging import Logger
  23. from mindinsight.backend.config import gunicorn_conf
  24. from mindinsight.backend.config import WEB_CONFIG_DIR
  25. from mindinsight.conf import settings
  26. from mindinsight.utils.log import setup_logger
  27. MINDBOARD_APP_MODULE = "mindinsight.backend.application:APP"
  28. GUNICORN_LOGGER = "mindinsight.backend.run.GunicornLogger"
  29. _MIN_PORT = 1
  30. _MAX_PORT = 65535
  31. def _get_file_size(file_path):
  32. """
  33. Get the file size.
  34. Args:
  35. file_path (str): The file path.
  36. Returns:
  37. int, the file size. If file is not existed, then return 0.
  38. """
  39. try:
  40. file_size = os.path.getsize(file_path)
  41. except FileNotFoundError:
  42. file_size = 0
  43. return file_size
  44. def _is_match_one(sub_string_list, src_string):
  45. """
  46. Whether the sub-string in the list can match the source string.
  47. Args:
  48. sub_string_list (list): The sub-string list.
  49. src_string (str): The source string.
  50. Returns:
  51. bool, if matched return True, else return False.
  52. """
  53. for match_info in sub_string_list:
  54. if match_info in src_string:
  55. return True
  56. return False
  57. def _check_stat_from_log(log_info):
  58. """
  59. Determine the service startup status based on the log information.
  60. Args:
  61. log_info (str): The output log of service startup.
  62. Returns:
  63. str, the state value that is one of the follows: "unknown", "failed" and "success".
  64. """
  65. server_state = "unknown"
  66. match_success_info = "Listening at: http://%s:%d" % \
  67. (settings.HOST, int(settings.PORT))
  68. common_failed_info_list = [
  69. "[ERROR] Retrying in 1 second",
  70. "[INFO] Reason: App failed to load",
  71. "[ERROR] Exception in worker process"
  72. ]
  73. re_pattern = "\\[ERROR\\].+%s.+%d" % \
  74. (settings.HOST, int(settings.PORT))
  75. # matched failed output log by fuzzy match
  76. if re.search(re_pattern, log_info) or \
  77. _is_match_one(common_failed_info_list, log_info):
  78. server_state = "failed"
  79. if match_success_info in log_info:
  80. server_state = "success"
  81. return server_state
  82. def _get_error_log_path():
  83. """
  84. Get gunicorn error log path.
  85. Returns:
  86. str, the path of error log.
  87. """
  88. path = os.path.join(settings.WORKSPACE, 'log/gunicorn/error.log')
  89. errorlog_abspath = os.path.realpath(path)
  90. return errorlog_abspath
  91. def _get_access_log_path():
  92. """Get gunicorn access log path."""
  93. access_log_path = os.path.join(settings.WORKSPACE, 'log/gunicorn/access.log')
  94. access_log_path = os.path.realpath(access_log_path)
  95. return access_log_path
  96. def _check_state_from_log(log_abspath, start_pos=0):
  97. """
  98. Check the service startup status based on the log file.
  99. Args:
  100. log_abspath (str): Absolute path of the log file.
  101. start_pos (int): Offset position of the log file.
  102. Returns:
  103. dict, a dict with "state" and "prompt_message" key.
  104. The value of the "state" key is as follows:"unknown", "failed" and "success".
  105. The value of the "prompt_message" key is a list of prompt messages.
  106. """
  107. server_is_start = False
  108. state_result = {"state": "unknown", "prompt_message": []}
  109. prompt_messages = []
  110. match_start_log = "Starting gunicorn"
  111. with open(log_abspath) as f_log:
  112. f_log.seek(start_pos)
  113. for line in f_log.readlines():
  114. if match_start_log in line:
  115. if server_is_start:
  116. break
  117. server_is_start = True
  118. continue
  119. if server_is_start:
  120. log_result = _check_stat_from_log(line)
  121. # ignore "unknown" result
  122. if log_result != "unknown":
  123. state_result["state"] = log_result
  124. if log_result == "failed":
  125. prompt_messages.append(line.strip())
  126. prompt_messages.append(
  127. "more failed details in log: %s" % log_abspath)
  128. break
  129. state_result["prompt_message"].append(
  130. "service start state: %s" % state_result["state"])
  131. for prompt_message in prompt_messages:
  132. state_result["prompt_message"].append(prompt_message)
  133. return state_result
  134. def _check_server_start_stat(log_abspath, start_pos=None):
  135. """
  136. Checking the Server Startup Status.
  137. Args:
  138. log_abspath (str): The log file path.
  139. start_pos (int): The log file start position.
  140. Returns:
  141. dict, an dict object that contains the state and prompt_message fields.
  142. The state values are as follows: "unknown", "failed" and "success".
  143. """
  144. state_result = {"state": "unknown", "prompt_message": []}
  145. # return unknown when not config gunicorn error log file
  146. if not log_abspath:
  147. return state_result
  148. log_pos = _get_file_size(log_abspath) if start_pos is None else start_pos
  149. try_cnt = 0
  150. try_cnt_max = 2
  151. while try_cnt < try_cnt_max:
  152. try_cnt += 1
  153. time.sleep(1)
  154. if _get_file_size(log_abspath) > log_pos:
  155. state_result.update(_check_state_from_log(log_abspath, log_pos))
  156. break
  157. return state_result
  158. class GunicornLogger(Logger):
  159. """Rewrite gunicorn default logger."""
  160. def __init__(self, cfg):
  161. self.access_log = setup_logger('gunicorn', 'access')
  162. self.error_log = setup_logger('gunicorn', 'error')
  163. super(GunicornLogger, self).__init__(cfg)
  164. access_log_path = _get_access_log_path()
  165. error_log_path = _get_error_log_path()
  166. os.chmod(access_log_path, stat.S_IREAD | stat.S_IWRITE)
  167. os.chmod(error_log_path, stat.S_IREAD | stat.S_IWRITE)
  168. def start():
  169. """Start web service."""
  170. errorlog_abspath = _get_error_log_path()
  171. gunicorn_conf_file = os.path.join(WEB_CONFIG_DIR, "gunicorn_conf.py")
  172. cmd = "gunicorn " \
  173. "-b {host}:{port} {app_module} " \
  174. "-c {conf_file} " \
  175. "--logger-class {logger_class} " \
  176. "--access-logformat {log_format}"\
  177. .format(host=settings.HOST,
  178. port=settings.PORT,
  179. conf_file=gunicorn_conf_file,
  180. app_module=MINDBOARD_APP_MODULE,
  181. logger_class=GUNICORN_LOGGER,
  182. log_format=settings.GUNICORN_ACCESS_FORMAT
  183. )
  184. log_size = _get_file_size(errorlog_abspath)
  185. console = setup_logger('mindinsight', 'console', console=True, logfile=False, formatter='%(message)s')
  186. # start server
  187. process = subprocess.Popen(
  188. shlex.split(cmd),
  189. shell=False,
  190. stdin=subprocess.PIPE,
  191. stdout=subprocess.PIPE,
  192. stderr=subprocess.PIPE
  193. )
  194. _, stderr = process.communicate()
  195. if stderr:
  196. console.error(stderr.decode())
  197. # wait command success to end when gunicorn running in daemon.
  198. if gunicorn_conf.daemon and process.wait() == 0:
  199. state_result = _check_server_start_stat(errorlog_abspath, log_size)
  200. # print gunicorn start state to stdout
  201. console.info('Web address: http://%s:%s', settings.HOST, settings.PORT)
  202. for line in state_result["prompt_message"]:
  203. console.info(line)
  204. if __name__ == '__main__':
  205. start()

MindInsight为MindSpore提供了简单易用的调优调试能力。在训练过程中,可以将标量、张量、图像、计算图、模型超参、训练耗时等数据记录到文件中,通过MindInsight可视化页面进行查看及分析。