From d1096f376bdaf98fba0c507df492419326723a1f Mon Sep 17 00:00:00 2001 From: wjtest1215 Date: Wed, 28 Sep 2022 17:25:46 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20''?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wjtes2022092616t232775444-log.txt | 233 ++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 wjtes2022092616t232775444-log.txt diff --git a/wjtes2022092616t232775444-log.txt b/wjtes2022092616t232775444-log.txt new file mode 100644 index 0000000..3b487a6 --- /dev/null +++ b/wjtes2022092616t232775444-log.txt @@ -0,0 +1,233 @@ +/home/work + +start loading script + +finish loading script + +2022/09/26 16:24:20 Start to download master.zip + +2022/09/26 16:24:20 Total parts count 1 + +2022/09/26 16:24:21 part(1) finished + +2022/09/26 16:24:21 Download object finished, downloadPath:/cache/code/master.zip + +panic: runtime error: index out of range [4] with length 4 + + + +goroutine 1 [running]: + +main.main() + + /home/houysh/openi/lewis/sync_for_grampus/downloader_for_obs.go:41 +0x4e0 + +unzip finished;start to exec code; + +do nothing + +[Modelarts Service Log]user: uid=1101(work) gid=1101(work) groups=1101(work),1000(HwHiAiUser) + +[Modelarts Service Log]pwd: /home/work + +[Modelarts Service Log]boot_file: /cache/code/npu_test/npu/train_for_c2net.py + +[Modelarts Service Log]log_url: /tmp/log/train.log + +[Modelarts Service Log]command: /cache/code/npu_test/npu/train_for_c2net.py + +[Modelarts Service Log]local_code_dir: + +[Modelarts Service Log]Training start at 2022-09-26-16:24:21 + +[Modelarts Service Log][modelarts_create_log] modelarts-pipe found + +[ModelArts Service Log]modelarts-pipe: will create log file /tmp/log/train.log + +[Modelarts Service Log][modelarts_logger] modelarts-pipe found + +[ModelArts Service Log]modelarts-pipe: will create log file /tmp/log/train.log + +[ModelArts Service Log]modelarts-pipe: will write log file /tmp/log/train.log + +[ModelArts Service Log]modelarts-pipe: param for max log length: 1073741824 + +[ModelArts Service Log]modelarts-pipe: param for whether exit on overflow: 0 + +INFO:root:Using MoXing-v2.0.0.rc2.4b57a67b-4b57a67b + +INFO:root:Using OBS-Python-SDK-3.20.9.1 + +[Modelarts Service Log]2022-09-26 16:24:22,746 - INFO - Ascend Driver: Version=22.0.0.3 + +[Modelarts Service Log]2022-09-26 16:24:22,747 - INFO - you are advised to use ASCEND_DEVICE_ID env instead of DEVICE_ID, as the DEVICE_ID env will be discarded in later versions + +[Modelarts Service Log]2022-09-26 16:24:22,747 - INFO - particularly, ${ASCEND_DEVICE_ID} == ${DEVICE_ID}, it's the logical device id + +[Modelarts Service Log]2022-09-26 16:24:22,747 - INFO - Davinci training command + +[Modelarts Service Log]2022-09-26 16:24:22,747 - INFO - ['/usr/bin/python', '/cache/code/npu_test/npu/train_for_c2net.py'] + +[Modelarts Service Log]2022-09-26 16:24:22,747 - INFO - Wait for Rank table file ready + +[Modelarts Service Log]2022-09-26 16:24:22,748 - INFO - Rank table file (K8S generated) is ready for read + +[Modelarts Service Log]2022-09-26 16:24:22,748 - INFO - + +{ + + "status": "completed", + + "group_count": "1", + + "group_list": [ + + { + + "group_name": "job-wjtes2022092616t2327", + + "device_count": "1", + + "instance_count": "1", + + "instance_list": [ + + { + + "pod_name": "joba57ac677-job-wjtes2022092616t2327-0", + + "server_id": "192.168.0.189", + + "devices": [ + + { + + "device_id": "3", + + "device_ip": "192.4.68.236" + + } + + ] + + } + + ] + + } + + ] + +} + +[Modelarts Service Log]2022-09-26 16:24:22,748 - INFO - Rank table file (C7x) + +[Modelarts Service Log]2022-09-26 16:24:22,748 - INFO - + +{ + + "status": "completed", + + "version": "1.0", + + "server_count": "1", + + "server_list": [ + + { + + "server_id": "192.168.0.189", + + "device": [ + + { + + "device_id": "3", + + "device_ip": "192.4.68.236", + + "rank_id": "0" + + } + + ] + + } + + ] + +} + +[Modelarts Service Log]2022-09-26 16:24:22,749 - INFO - Rank table file (C7x) is generated + +[Modelarts Service Log]2022-09-26 16:24:22,749 - INFO - Current server + +[Modelarts Service Log]2022-09-26 16:24:22,749 - INFO - + +{ + + "server_id": "192.168.0.189", + + "device": [ + + { + + "device_id": "3", + + "device_ip": "192.4.68.236", + + "rank_id": "0" + + } + + ] + +} + +[Modelarts Service Log]2022-09-26 16:24:22,750 - INFO - bootstrap proc-rank-0-device-0 + +args: + +Namespace(device_target='Ascend', epoch_size=5) + +Traceback (most recent call last): + + File "/cache/code/npu_test/npu/train_for_c2net.py", line 50, in + + cfg.batch_size) + + File "/cache/code/npu_test/npu/dataset.py", line 32, in create_dataset + + mnist_ds = ds.MnistDataset(data_path) + + File "/usr/local/ma/python3.7/lib/python3.7/site-packages/mindspore/dataset/engine/validators.py", line 343, in new_method + + check_dir(dataset_dir) + + File "/usr/local/ma/python3.7/lib/python3.7/site-packages/mindspore/dataset/core/validator_helpers.py", line 551, in check_dir + + raise ValueError("The folder {} does not exist or is not a directory or permission denied!".format(dataset_dir)) + +ValueError: The folder /cache/dataset/train does not exist or is not a directory or permission denied! + +[Modelarts Service Log]2022-09-26 16:24:31,765 - ERROR - proc-rank-0-device-0 (pid: 159) has exited with non-zero code: 1 + +[Modelarts Service Log]2022-09-26 16:24:31,765 - INFO - Begin destroy training processes + +[Modelarts Service Log]2022-09-26 16:24:31,765 - INFO - proc-rank-0-device-0 (pid: 159) has exited + +[Modelarts Service Log]2022-09-26 16:24:31,765 - INFO - End destroy training processes + +[ModelArts Service Log]modelarts-pipe: total length: 3763 + +[Modelarts Service Log]Training end with return code: 1 + +[Modelarts Service Log]Training end at 2022-09-26-16:24:31 + +[Modelarts Service Log]Training completed. + +2022/09/26 16:24:51 start uploading model + +2022/09/26 16:24:51 file:train.log + +2022/09/26 16:24:52 finish uploading model \ No newline at end of file