update

1 year ago · 8863478571
--- a/OpenI云脑使用教程.ipynb
+++ b/OpenI云脑使用教程.ipynb
@@ -1,227 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7d112f9b-84ba-420d-a52b-9eb7ba307068",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: http://pip.modelarts.private.com:8888/repository/pypi/simple\n",
      "Requirement already satisfied: openi-test==0.7.1 in /home/ma-user/anaconda3/envs/python-3.7.10/lib/python3.7/site-packages (0.7.1)\n",
      "Requirement already satisfied: requests in /home/ma-user/modelarts-dev/modelarts-sdk (from openi-test==0.7.1) (2.28.2)\n",
      "Requirement already satisfied: tqdm in /home/ma-user/modelarts-dev/modelarts-sdk (from openi-test==0.7.1) (4.64.0)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in /home/ma-user/modelarts-dev/modelarts-sdk (from requests->openi-test==0.7.1) (3.3.2)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ma-user/anaconda3/envs/python-3.7.10/lib/python3.7/site-packages (from requests->openi-test==0.7.1) (1.26.12)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /home/ma-user/modelarts-dev/modelarts-sdk (from requests->openi-test==0.7.1) (3.4)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /home/ma-user/anaconda3/envs/python-3.7.10/lib/python3.7/site-packages (from requests->openi-test==0.7.1) (2022.6.15)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install openi-test==0.7.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "02ad2e02-6533-4da0-98c3-c5f238d4d8f7",
   "metadata": {},
   "outputs": [],
   "source": [
    "#导入包\n",
    "from openi.context import prepare, upload_openi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "69880626-9320-46cd-ad29-8e5f7be09f32",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:root:Using MoXing-v2.1.0.5d9c87c8-5d9c87c8\n",
      "INFO:root:Using OBS-Python-SDK-3.20.9.1\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "🎉 Successfully Download s3:///urchincache/attachment/d/d/ddabdf57-a65a-496c-bef0-19d82b9043cd/MNISTData.zip to /home/ma-user/work/dataset/MNISTData.zip\n",
      "🎉 Successfully Extracted /home/ma-user/work/dataset/MNISTData.zip\n",
      "🎉 Successfully Deleted /home/ma-user/work/dataset/MNISTData.zip\n",
      "🎉 Successfully Download s3:///urchincache/attachment/2/c/2c59be66-64ec-41ca-b311-f51a486eabf8/checkpoint_lenet-1_1875.zip to /home/ma-user/work/dataset/checkpoint_lenet-1_1875.zip\n",
      "🎉 Successfully Extracted /home/ma-user/work/dataset/checkpoint_lenet-1_1875.zip\n",
      "🎉 Successfully Deleted /home/ma-user/work/dataset/checkpoint_lenet-1_1875.zip\n",
      "🎉 Successfully Download s3:///urchincache/aimodels/0/c/0cf4367b-5234-4967-a41f-f548d3f69fcf/ to /home/ma-user/work/pretrainmodel/MNIST_Example_model_zjdt\n",
      "please set the output location to /home/ma-user/work/output\n"
     ]
    }
   ],
   "source": [
    "\n",
    "#初始化导入数据集和预训练模型到容器内\n",
    "openi_context = prepare()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "c586f98f-bead-4dc9-a22f-173a672d456b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/ma-user/work/dataset\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['checkpoint_lenet-1_1875', 'MNISTData']"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#获取数据集路径，预训练模型路径，输出路径\n",
    "dataset_path = openi_context.dataset_path\n",
    "print(dataset_path)\n",
    "\n",
    "import os\n",
    "os.listdir(dataset_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "7d6617f0-7b86-4b1b-a201-ecdc58db53a5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/ma-user/work/pretrainmodel\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['MNIST_Example_model_zjdt']"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pretrain_model_path = openi_context.pretrain_model_path\n",
    "print(pretrain_model_path)\n",
    "os.listdir(pretrain_model_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "6bc51211-5555-452e-9d83-adcfee1c4f79",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/home/ma-user/work/output\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "output_path = openi_context.output_path\n",
    "print(output_path)\n",
    "os.listdir(output_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "48b5da5d-a55f-4781-9056-b886d41779c7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "upload /home/ma-user/work/output to openi\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'/home/ma-user/work/output'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#回传结果到openi，训练任务才能回传，调试任务回传后也是不支持下载\n",
    "upload_openi()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75e7ce04-594e-4e8f-8292-15241709eb5e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "python-3.7.10",
   "language": "python",
   "name": "python-3.7.10"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ pip install -U c2net-beta

 ```
 #导入包
 from c2net.context import prepare, upload_output
 from c2net.context import prepare

 #初始化导入数据集和预训练模型到容器内
 c2net_context = prepare()
@@ -22,12 +22,12 @@ dataset_path = c2net_context.dataset_path
 pretrain_model_path = c2net_context.pretrain_model_path
 output_path = c2net_context.output_path

 #回传结果到openi，训练任务才能回传，调试任务回传后也是不支持下载
 upload_output()
 #必须将输出结果保存到c2net_context.output_path，才能回传结果到openi，并且训练任务才能回传，调试任务回传后暂时不支持下载
 ```

 ## 2. 手写数字识别示例

 * GPU示例请参考[gpu_mnist_example](./gpu_mnist_example/README.md)
 * GPGPU示例请参考[gpgpu_mnist_example](./gpgpu_mnist_example/README.md)
 * NPU示例请参考[npu_mnist_example](./npu_mnist_example/README.md)
 * GCU示例请参考[gcu_mnist_example](./gcu_mnist_example/README.md)
--- a/gcu_mnist_example/README.md
+++ b/gcu_mnist_example/README.md
@@ -1,53 +1,3 @@
 <<<<<<< HEAD
 # 如何在启智平台上进行模型训练—GCU示例

 ## 1.启智集群和智算集群的GCU训练样例

 ###### 启智集群的示例代码：

 - 训练示例请参考示例中[train_gcu.py](./train_gcu.py)的代码注释

 ## 2. 在openi上获取数据集，预训练模型，输出路径

 安装openi包

 ```
 pip install -U openi
 ```

 使用openi包

 ```
 #导入包
 from openi.context import prepare, upload_openi

 #初始化导入数据集和预训练模型到容器内
 openi_context = prepare()

 #获取数据集路径，预训练模型路径，输出路径
 dataset_path = openi_context.dataset_path
 pretrain_model_path = openi_context.pretrain_model_path
 output_path = openi_context.output_path

 #回传结果到openi
 upload_openi()
 ```

 ## 3.FAQ

 ### 3.1 关于启智平台公共库[openi](https://openi.pcl.ac.cn/OpenIOSSG/openi-pypi)：

 主要使用的方法有以下几个：

 ```
 prepare                    准备数据集，模型，输出路径
 upload_openi               将训练镜像的输出结果拷贝回启智平台
 ```

 ### 3.2 解决参数报错问题：

 请在代码中加入 `args, unknown = parser.parse_known_args()`，可忽略掉 `--ckpt_url`， `--multi_date_url`等参数报错问题
 =======
 # 如何在启智平台上进行模型训练 - GCU版本

 - 启智集群单数据集的训练，启智集群多数据集的训练，智算集群的单数据集训练，这3个的训练使用方式不同，请注意区分。数据加载方式、模型定义逻辑大致同[手写数字识别GPU版本_PytorchExample](https://openi.pcl.ac.cn/OpenIOSSG/MNIST_PytorchExample_GPU)项目：
@@ -166,6 +116,5 @@ upload_openi               将训练镜像的输出结果拷贝回启智平台
 目前训练任务的日志在代码中print输出，参考示例train_for_c2net.py代码相关print

 ### 4.2 训练结束后可以下载模型文件
 >>>>>>> origin/liuzx

 ## 对于示例代码有任何问题，欢迎在本项目中提issue。
--- a/gcu_mnist_example/model.py
+++ b/gcu_mnist_example/model.py
@@ -32,4 +32,4 @@ class Model(Module):
        y = self.relu4(y)
        y = self.fc3(y)
        y = self.relu5(y)
        return y
        return y
--- a/gcu_mnist_example/train_gcu.py
+++ b/gcu_mnist_example/train_gcu.py
@@ -140,4 +140,4 @@ if __name__ == '__main__':
        #The model output location is placed under /tmp/output
        state = {'model':model.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch':_epoch+1}
        torch.save(state, '{}/mnist_epoch{}_{:.2f}.pkl'.format(c2net_context.output_path, _epoch+1, correct / _sum))
        print(os.listdir('{}'.format(c2net_context.output_path)))
        print(os.listdir('{}'.format(c2net_context.output_path)))
--- a/gpgpu_mnist_example/README.md
+++ b/gpgpu_mnist_example/README.md
@@ -0,0 +1,53 @@
 # 如何在启智平台上进行模型训练—GPGPU示例

 ## 1.启智集群和智算集群的GPGPU训练样例

 ###### 启智集群的示例代码：

 - 推理任务示例请参考示例中[inference.py](./inference.py)的代码注释

 ## 2. 在云脑上获取数据集，预训练模型，输出路径

 安装c2net包

 ```
 pip install -U c2net-beta
 ```

 使用c2net包

 ```
 #导入包
 from c2net.context import prepare,upload_output

 #初始化导入数据集和预训练模型到容器内
 c2net_context = prepare()

 #获取数据集路径，预训练模型路径，输出路径
 dataset_path = c2net_context.dataset_path
 pretrain_model_path = c2net_context.pretrain_model_path
 output_path = openi_context.output_path

 #回传结果
 upload_output()

 ```

 ## 3.[创建GPU训练示例任务界面教程](./Example_Picture/快速创建GPU训练任务.md)

 ## 4.FAQ

 ### 4.1 关于启智平台公共库[c2net](https://openi.pcl.ac.cn/OpenIOSSG/c2net-pypi)：

 主要使用的方法有以下几个：

 ```
 prepare                    准备数据集，模型，输出路径
 upload_output               将训练镜像的输出结果拷贝回启智平台
 ```

 ### 4.2 解决参数报错问题：

 请在代码中加入 `args, unknown = parser.parse_known_args()`，可忽略掉 `--ckpt_url`，`--data_url`, `--multi_date_url`等参数报错问题

 ## 对于示例代码有任何问题，欢迎在本项目中提issue。
--- a/gpgpu_mnist_example/inference.py
+++ b/gpgpu_mnist_example/inference.py
@@ -0,0 +1,82 @@
 #!/usr/bin/python
 #coding=utf-8
 '''
 If there are Chinese comments in the code，please add at the beginning：
 #!/usr/bin/python
 #coding=utf-8   

 1，The dataset structure of the single-dataset in this example
 MnistDataset_torch.zip
  ├── test
  └── train  

 '''
 from model import Model
 import numpy as np
 import torch
 from torchvision.datasets import mnist
 from torch.nn import CrossEntropyLoss
 from torch.optim import SGD
 from torch.utils.data import DataLoader
 from torchvision.transforms import ToTensor
 import argparse
 import os
 #导入c2net包
 from c2net.context import prepare

 # Training settings
 parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
 parser.add_argument('--epoch_size', type=int, default=10, help='how much epoch to train')
 parser.add_argument('--batch_size', type=int, default=256, help='how much batch_size in epoch')

 # 参数声明
 WORKERS = 0   
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 model = Model().to(device)
 optimizer = SGD(model.parameters(), lr=1e-1)
 cost = CrossEntropyLoss()
 
 # 模型测试
 def test(model, test_loader, data_length):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for i, data in enumerate(test_loader, 0):
            x, y = data
            x = x.to(device)
            y = y.to(device)
            y_hat = model(x)
            test_loss += cost(y_hat, y).item()
            pred = y_hat.max(1, keepdim=True)[1]
            correct += pred.eq(y.view_as(pred)).sum().item()
        test_loss /= (i+1)

        # 结果写入输出文件夹
        filename = 'result.txt'
        file_path = os.path.join('/tmp/output', filename)
        with open(file_path, 'w') as file:
            file.write('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, data_length, 100. * correct / data_length))


 if __name__ == '__main__':
    args, unknown = parser.parse_known_args()
    #初始化导入数据集和预训练模型到容器内
    c2net_context = prepare()
    #获取数据集路径
    checkpoint_lenet_1_1875_path = c2net_context.dataset_path+"/"+"checkpoint_lenet-1_1875"
    MnistDataset_torch = c2net_context.dataset_path+"/"+"MnistDataset_torch"
    #获取预训练模型路径
    mnist_example_test2_model_djts_path = c2net_context.pretrain_model_path+"/"+"MNIST_Example_test2_model_djts"
    #log output
    print('cuda is available:{}'.format(torch.cuda.is_available()))  
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    batch_size = args.batch_size
    epochs = args.epoch_size
    test_dataset = mnist.MNIST(root=mnist_example_test2_model_djts_path + "/test", train=False, transform=ToTensor(),download=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    model = Model().to(device)
    checkpoint = torch.load(mnist_example_test2_model_djts_path + "/mnist_epoch1_0.73.pkl")
    model.load_state_dict(checkpoint['model'])
    test(model,test_loader,len(test_dataset))
--- a/gpgpu_mnist_example/model.py
+++ b/gpgpu_mnist_example/model.py
@@ -0,0 +1,35 @@
 from torch.nn import Module
 from torch import nn


 class Model(Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(256, 120)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(120, 84)
        self.relu4 = nn.ReLU()
        self.fc3 = nn.Linear(84, 10)
        self.relu5 = nn.ReLU()

    def forward(self, x):
        y = self.conv1(x)
        y = self.relu1(y)
        y = self.pool1(y)
        y = self.conv2(y)
        y = self.relu2(y)
        y = self.pool2(y)
        y = y.view(y.shape[0], -1)
        y = self.fc1(y)
        y = self.relu3(y)
        y = self.fc2(y)
        y = self.relu4(y)
        y = self.fc3(y)
        y = self.relu5(y)
        return y
--- a/gpu_mnist_example/README.md
+++ b/gpu_mnist_example/README.md
@@ -4,46 +4,47 @@

 ###### 启智集群的示例代码：

 - 训练示例请参考示例中[train_gpu.py](./train_gpu.py)的代码注释
 - 训练任务示例请参考示例中[train.py](./train.py)的代码注释
 - 推理任务示例请参考示例中[inference.py](./inference.py)的代码注释

 ## 2. 在openi上获取数据集，预训练模型，输出路径
 ## 2. 在云脑上获取数据集，预训练模型，输出路径

 安装openi包
 安装c2net包

 ```
 pip install -U openi
 pip install -U c2net-beta
 ```

 使用openi包
 使用c2net包

 ```
 #导入包
 from openi.context import prepare, upload_openi
 from c2net.context import prepare,upload_output

 #初始化导入数据集和预训练模型到容器内
 openi_context = prepare()
 c2net_context = prepare()

 #获取数据集路径，预训练模型路径，输出路径
 dataset_path = openi_context.dataset_path
 pretrain_model_path = openi_context.pretrain_model_path
 dataset_path = c2net_context.dataset_path
 pretrain_model_path = c2net_context.pretrain_model_path
 output_path = openi_context.output_path

 #回传结果到openi
 upload_openi()
 #回传结果
 upload_output()

 ```

 ## 3.[创建GPU训练示例任务界面教程](./Example_Picture/快速创建GPU训练任务.md)

 ## 4.FAQ

 ### 4.1 关于启智平台公共库[openi](https://openi.pcl.ac.cn/OpenIOSSG/openi-pypi)：
 ### 4.1 关于启智平台公共库[c2net](https://openi.pcl.ac.cn/OpenIOSSG/c2net-pypi)：

 主要使用的方法有以下几个：

 ```
 prepare                    准备数据集，模型，输出路径
 upload_openi               将训练镜像的输出结果拷贝回启智平台

 upload_output               将训练镜像的输出结果拷贝回启智平台
 ```

 ### 4.2 解决参数报错问题：
--- a/gpu_mnist_example/inference.py
+++ b/gpu_mnist_example/inference.py
@@ -0,0 +1,82 @@
 #!/usr/bin/python
 #coding=utf-8
 '''
 If there are Chinese comments in the code，please add at the beginning：
 #!/usr/bin/python
 #coding=utf-8   

 1，The dataset structure of the single-dataset in this example
 MnistDataset_torch.zip
  ├── test
  └── train  

 '''
 from model import Model
 import numpy as np
 import torch
 from torchvision.datasets import mnist
 from torch.nn import CrossEntropyLoss
 from torch.optim import SGD
 from torch.utils.data import DataLoader
 from torchvision.transforms import ToTensor
 import argparse
 import os
 #导入c2net包
 from c2net.context import prepare

 # Training settings
 parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
 parser.add_argument('--epoch_size', type=int, default=10, help='how much epoch to train')
 parser.add_argument('--batch_size', type=int, default=256, help='how much batch_size in epoch')

 # 参数声明
 WORKERS = 0   
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 model = Model().to(device)
 optimizer = SGD(model.parameters(), lr=1e-1)
 cost = CrossEntropyLoss()
 
 # 模型测试
 def test(model, test_loader, data_length):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for i, data in enumerate(test_loader, 0):
            x, y = data
            x = x.to(device)
            y = y.to(device)
            y_hat = model(x)
            test_loss += cost(y_hat, y).item()
            pred = y_hat.max(1, keepdim=True)[1]
            correct += pred.eq(y.view_as(pred)).sum().item()
        test_loss /= (i+1)

        # 结果写入输出文件夹
        filename = 'result.txt'
        file_path = os.path.join('/tmp/output', filename)
        with open(file_path, 'w') as file:
            file.write('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, data_length, 100. * correct / data_length))


 if __name__ == '__main__':
    args, unknown = parser.parse_known_args()
    #初始化导入数据集和预训练模型到容器内
    c2net_context = prepare()
    #获取数据集路径
    checkpoint_lenet_1_1875_path = c2net_context.dataset_path+"/"+"checkpoint_lenet-1_1875"
    MnistDataset_torch = c2net_context.dataset_path+"/"+"MnistDataset_torch"
    #获取预训练模型路径
    mnist_example_test2_model_djts_path = c2net_context.pretrain_model_path+"/"+"MNIST_Example_test2_model_djts"
    #log output
    print('cuda is available:{}'.format(torch.cuda.is_available()))  
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    batch_size = args.batch_size
    epochs = args.epoch_size
    test_dataset = mnist.MNIST(root=mnist_example_test2_model_djts_path + "/test", train=False, transform=ToTensor(),download=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    model = Model().to(device)
    checkpoint = torch.load(mnist_example_test2_model_djts_path + "/mnist_epoch1_0.73.pkl")
    model.load_state_dict(checkpoint['model'])
    test(model,test_loader,len(test_dataset))
--- a/gpu_mnist_example/train_gpu.py
+++ b/gpu_mnist_example/train_gpu.py
@@ -11,9 +11,7 @@ If there are Chinese comments in the code，please add at the beginning：
  └── train  

 '''
 import os
 os.system("pip install openi-test")
 os.system("pip install {}".format(os.getenv("OPENI_SDK_PATH")))


 from model import Model
 import numpy as np
@@ -26,7 +24,7 @@ from torchvision.transforms import ToTensor
 import argparse
 import os
 #导入c2net包
 from c2net.context import prepare, upload_output
 from c2net.context import prepare

 # Training settings
 parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
@@ -114,7 +112,5 @@ if __name__ == '__main__':
        # 将模型保存到c2net_context.output_path
        state = {'model':model.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch':epoch}
        torch.save(state, '{}/mnist_epoch{}.pkl'.format(c2net_context.output_path, epoch))
    #回传结果
    upload_output()


--- a/npu_mnist_example/README.md
+++ b/npu_mnist_example/README.md
@@ -4,49 +4,44 @@

 ###### 启智集群的示例代码：

 - 训练示例请参考示例中[train_npu.py](./train_npu.py)的代码注释
 - 训练任务单卡示例请参考示例中[train.py](./train.py)的代码注释
 - 训练任务多卡示例请参考示例中[train_multi_card.py](./train_multi_card.py)的代码注释
 - 训练任务示例请参考示例中[inference.py](./inference.py)的代码注释
 - 启智集群训练任务已经将ImageNet-1k数据集挂载到训练镜像，具体使用方法请参考示例中[read_imagenet.py](./read_imagenet.py)的代码注释
 - 继续训练功能示例代码请参考[train_continue.py](./train_continue.py) ，启智与智算的用法相同

 ## 2. 在openi上获取数据集，预训练模型，输出路径
 ## 2. 在云脑上获取数据集，预训练模型，输出路径

 安装openi包

 ```
 pip install -U openi
 ```

 使用openi包
 使用c2net包

 ```
 #导入包
 from openi.context import prepare, upload_openi
 from c2net.context import prepare, upload_openi

 #初始化导入数据集和预训练模型到容器内
 openi_context = prepare()
 c2net_context = prepare()

 #获取数据集路径，预训练模型路径，输出路径
 dataset_path = openi_context.dataset_path
 pretrain_model_path = openi_context.pretrain_model_path
 output_path = openi_context.output_path
 dataset_path = c2net_context.dataset_path
 pretrain_model_path = c2net_context.pretrain_model_path
 output_path = c2net_context.output_path

 #回传结果到openi
 upload_openi()
 upload_output()
 ```

 ## 3.[创建NPU训练示例任务界面教程](./Example_Picture/快速创建NPU训练任务.md)

 ## 4.FAQ

 ### 4.1 关于启智平台公共库[openi](https://openi.pcl.ac.cn/OpenIOSSG/openi-pypi)：
 ### 4.1 关于公共库[c2net](https://openi.pcl.ac.cn/OpenIOSSG/c2net-pypi)：

 主要使用的方法有以下几个：

 ```
 prepare                    准备数据集，模型，输出路径
 upload_openi               将训练镜像的输出结果拷贝回启智平台
 obs_copy_file              通过mox拷贝文件
 obs_copy_folder 	   通过mox拷贝文件夹
 prepare                                            准备数据集，模型，输出路径
 c2net.context.upload_output                        将训练镜像的输出结果拷贝回启智平台
 c2net.context.moxing_helper.obs_copy_file          通过mox拷贝文件
 c2net.context.moxing_helper.obs_copy_folder 	   通过mox拷贝文件夹
 ```

 ### 4.2 解决参数报错问题：
@@ -59,7 +54,7 @@ obs_copy_folder 	   通过mox拷贝文件夹

 ```
 if local_rank%8==0:
    这里省略下载数据的代码...(openi.context.prepare())
    这里省略下载数据的代码...
    f = open("/cache/download_input.txt", 'w')  
    f.close()
    try:
--- a/npu_mnist_example/inference.py
+++ b/npu_mnist_example/inference.py
@@ -0,0 +1,90 @@


 """
 示例选用的数据集是MNISTData.zip
 数据集结构是：
 MNISTData.zip
  ├── test
  │   ├── t10k-images-idx3-ubyte
  │   └── t10k-labels-idx1-ubyte
  └── train
      ├── train-images-idx3-ubyte
      └── train-labels-idx1-ubyte 
      
 使用注意事项：
 1、在代码中加入args, unknown = parser.parse_known_args()，可忽略掉--ckpt_url参数报错等参数问题
 2、用户需要调用c2net的python sdk包
 """
 import time
 import os
 import argparse
 from config import mnist_cfg as cfg
 from dataset import create_dataset
 from lenet import LeNet5
 import mindspore.nn as nn
 import numpy as np
 from mindspore import context
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
 from mindspore import load_checkpoint, load_param_into_net
 from mindspore.train import Model
 from mindspore import Tensor
 #导入c2net包
 from c2net.context import prepare, upload_output


 parser = argparse.ArgumentParser(description='MindSpore Lenet Example')
                   
 parser.add_argument(
    '--device_target',
    type=str,
    default="Ascend",
    choices=['Ascend', 'CPU'],
    help='device where the code will be implemented (default: Ascend),if to use the CPU on the Qizhi platform:device_target=CPU')

 parser.add_argument('--epoch_size',
                    type=int,
                    default=5,
                    help='Training epochs.')

 if __name__ == "__main__":
    ###请在代码中加入args, unknown = parser.parse_known_args()，可忽略掉--ckpt_url参数报错等参数问题
    args, unknown = parser.parse_known_args()
    #初始化导入数据集和预训练模型到容器内
    c2net_context = prepare()
    #获取数据集路径
    mnistdata_path = c2net_context.dataset_path+"/"+"MNISTData"
    #获取预训练模型路径
    mnist_example_test2_model_djts_path = c2net_context.pretrain_model_path+"/"+"MNIST_Example_test2_model_djts"
    #获取输出路径
    save_path = c2net_context.output_path
  
    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
    network = LeNet5(cfg.num_classes)
    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    repeat_size = cfg.epoch_size
    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
    #model = Model(network, net_loss, net_opt, metrics={"Accuracy"})
    model = Model(network, net_loss, net_opt)


    print("============== Starting Testing ==============")
    load_param_into_net(network, load_checkpoint(os.path.join(mnist_example_test2_model_djts_path, "checkpoint_lenet-1_1875.ckpt")))
    ds_test = create_dataset(os.path.join(mnistdata_path, "test"), batch_size=1).create_dict_iterator()
    data = next(ds_test)
    images = data["image"].asnumpy()
    labels = data["label"].asnumpy()
    print('Tensor:', Tensor(data['image']))
    output = model.predict(Tensor(data['image']))
    predicted = np.argmax(output.asnumpy(), axis=1)
    pred = np.argmax(output.asnumpy(), axis=1)
    print('predicted:', predicted)
    print('pred:', pred)

    print(f'Predicted: "{predicted[0]}", Actual: "{labels[0]}"')
    filename = 'result.txt'
    file_path = os.path.join(save_path, filename)
    with open(file_path, 'a+') as file:
            file.write("    {}: {:.2f} \n".format("Predicted", predicted[0]))

    ###上传训练结果到启智平台,注意必须将要输出的模型存储在c2net_context.output_path   
    upload_output()
--- a/npu_mnist_example/read_imagenet.py
+++ b/npu_mnist_example/read_imagenet.py
@@ -36,7 +36,7 @@ import moxing as mox
 import mindspore as ms
 from mindspore.dataset import ImageFolderDataset
 import mindspore.dataset.vision.c_transforms as transforms
 from openi.context import upload_openi
 from c2net.context import upload_output

 parser = argparse.ArgumentParser(description='Read big dataset ImageNet Example')
 parser.add_argument('--train_url',
@@ -45,6 +45,7 @@ parser.add_argument('--train_url',

 if __name__ == "__main__":
    args, unknown = parser.parse_known_args()
    #注意只有训练任务可用
    data_path = '/cache/sfs/data/imagenet/'
    modelart_output = '/cache/output'
    if not os.path.exists(modelart_output):
@@ -69,4 +70,4 @@ if __name__ == "__main__":

    data_info = dataset_train.to_json(filename= modelart_output + '/data_info.json')
    print(data_info)
    upload_openi()
    upload_output()
--- a/npu_mnist_example/train_npu.py
+++ b/npu_mnist_example/train_npu.py
@@ -15,7 +15,9 @@
 1、在代码中加入args, unknown = parser.parse_known_args()，可忽略掉--ckpt_url参数报错等参数问题
 2、用户需要调用c2net的python sdk包
 """

 import os
 os.system("pip install c2net-beta -i https://pypi.tuna.tsinghua.edu.cn/simple")
 import argparse
 from config import mnist_cfg as cfg
 from dataset import create_dataset
@@ -29,6 +31,7 @@ import time
 #导入c2net包
 from c2net.context import prepare, upload_output


 parser = argparse.ArgumentParser(description='MindSpore Lenet Example')
                   
 parser.add_argument(
@@ -52,6 +55,8 @@ if __name__ == "__main__":
    mnistdata_path = c2net_context.dataset_path+"/"+"MNISTData"
    #获取预训练模型路径
    mnist_example_test2_model_djts_path = c2net_context.pretrain_model_path+"/"+"MNIST_Example_test2_model_djts"
    #获取输出路径
    output_path = c2net_context.output_path
  
    context.set_context(mode=context.GRAPH_MODE,device_target=args.device_target)
    #使用数据集的方式  
@@ -77,7 +82,7 @@ if __name__ == "__main__":
        save_checkpoint_steps=cfg.save_checkpoint_steps,
        keep_checkpoint_max=cfg.keep_checkpoint_max)
    #将模型保存到c2net_context.output_path
    outputDirectory = c2net_context.output_path + "/"
    outputDirectory = output_path + "/"
    ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet",
                                directory=outputDirectory,
                                config=config_ck)
--- a/npu_mnist_example/train_continue.py
+++ b/npu_mnist_example/train_continue.py
@@ -1,122 +0,0 @@
 #####################################################################################################
 # 继续训练功能：修改训练任务时，若勾选复用上次结果，则可在新训练任务的输出路径中读取到上次结果
 #
 # 示例用法
 # - 增加两个训练参数 
 #    'ckpt_save_name' 此次任务的输出文件名，用于保存此次训练的模型文件名称（不带后缀） 
 #    'ckpt_load_name' 上一次任务的输出文件名，用于加载上一次输出的模型文件名称（不带后缀），首次训练默认为空，则不读取任何文件
 # - 训练代码中判断 'ckpt_load_name' 是否为空，若不为空，则为继续训练任务
 #####################################################################################################


 import os
 import argparse
 from config import mnist_cfg as cfg
 from dataset import create_dataset
 from dataset_distributed import create_dataset_parallel
 from lenet import LeNet5
 import mindspore.nn as nn
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
 from mindspore import load_checkpoint, load_param_into_net
 from mindspore.train import Model
 from mindspore.nn.metrics import Accuracy
 from mindspore.communication.management import get_rank

 #导入openi包
 from openi.context import prepare, upload_openi
 from openi.context.helper import obs_copy_file, obs_copy_folder

 parser = argparse.ArgumentParser(description='MindSpore Lenet Example')

 parser.add_argument(
    '--device_target',
    type=str,
    default="Ascend",
    choices=['Ascend', 'CPU'],
    help='device where the code will be implemented (default: Ascend),if to use the CPU on the Qizhi platform:device_target=CPU')

 parser.add_argument('--epoch_size',
                    type=int,
                    default=5,
                    help='Training epochs.')

 ### continue task parameters
 parser.add_argument('--ckpt_load_name',
                help='model name to save/load',
                default=  '')

 parser.add_argument('--ckpt_save_name',
                help='model name to save/load',
                default=  'checkpoint')


 if __name__ == "__main__":
    args, unknown = parser.parse_known_args()

    ###Initialize and copy data to training image
    openi_context = prepare()   
    data_dir = openi_context.dataset_path
    pretrain_model_dir = openi_context.pretrain_model_path
    train_dir = openi_context.output_path         

    device_num = int(os.getenv('RANK_SIZE'))
    ds_train = create_dataset(os.path.join(data_dir + "/MNISTData", "train"),  cfg.batch_size)
    if ds_train.get_dataset_size() == 0:
        raise ValueError("Please check dataset size > 0 and batch_size <= dataset size")

    network = LeNet5(cfg.num_classes)
    net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum)
    time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
    
    ### 继续训练模型加载
    if args.ckpt_load_name:
        obs_copy_folder(args.train_url, base_path)
        load_path = "{}/{}.ckpt".format(base_path,args.ckpt_load_name)   
        param_dict = load_checkpoint(load_path)         
        load_param_into_net(network, param_dict)
        print("Successfully load ckpt file:{}, saved_net_work:{}".format(load_path,param_dict))
    ### 保存已有模型名避免重复回传结果
    outputFiles = os.listdir(base_path)

    if args.device_target != "Ascend":
        model = Model(network,
                      net_loss,
                      net_opt,
                      metrics={"accuracy": Accuracy()})
    else:
        model = Model(network,
                      net_loss,
                      net_opt,
                      metrics={"accuracy": Accuracy()},
                      amp_level="O2")

    config_ck = CheckpointConfig(
        save_checkpoint_steps=cfg.save_checkpoint_steps,
        keep_checkpoint_max=cfg.keep_checkpoint_max)
    #Note that this method saves the model file on each card. You need to specify the save path on each card.
    # In this example, get_rank() is added to distinguish different paths.
    if device_num == 1:
        save_path = base_path + "/"
    if device_num > 1:
        save_path = base_path + "/" + str(get_rank()) + "/"
    ckpoint_cb = ModelCheckpoint(prefix=args.ckpt_save_name,
                                directory=save_path,
                                config=config_ck)
    print("============== Starting Training ==============")
    epoch_size = cfg['epoch_size']
    if (args.epoch_size):
        epoch_size = args.epoch_size
        print('epoch_size is: ', epoch_size)
    model.train(epoch_size,
                ds_train,
                callbacks=[time_cb, ckpoint_cb,
                           LossMonitor()])
    
    ### 将训练容器中的新输出模型 回传到启智社区
    outputFilesNew = os.listdir(base_path)
    new_models = [i for i in outputFilesNew if i not in outputFiles]
    for n in new_models:
        ckpt_url = base_path + "/" + n
        obs_ckpt_url = args.train_url + "/" + n
        obs_copy_file(ckpt_url, obs_ckpt_url)
--- a/npu_mnist_example/train_npu_multi_card.py
+++ b/npu_mnist_example/train_npu_multi_card.py
@@ -30,7 +30,7 @@ from mindspore.context import ParallelMode
 from mindspore.communication.management import init, get_rank
 import time
 #导入openi包
 from c2net.context import prepare, upload_output
 from c2net.context import prepare


 parser = argparse.ArgumentParser(description='MindSpore Lenet Example')
@@ -67,6 +67,7 @@ if __name__ == "__main__":
        mnistdata_path = c2net_context.dataset_path+"/"+"MNISTData"
        #获取预训练模型路径
        mnist_example_test2_model_djts_path = c2net_context.pretrain_model_path+"/"+"MNIST_Example_test2_model_djts"
        output_path = c2net_context.output_path
        #Set a cache file to determine whether the data has been copied to obs. 
        #If this file exists during multi-card training, there is no need to copy the dataset multiple times.
        f = open("/cache/download_input.txt", 'w')    
@@ -102,7 +103,7 @@ if __name__ == "__main__":
        keep_checkpoint_max=cfg.keep_checkpoint_max)
    #Note that this method saves the model file on each card. You need to specify the save path on each card.
    # In this example, get_rank() is added to distinguish different paths.
    outputDirectory = c2net_context.output_path  + "/" + str(get_rank()) + "/"
    outputDirectory = output_path  + "/" + str(get_rank()) + "/"
    ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet",
                                directory=outputDirectory,
                                config=config_ck)
--- a/train.py
+++ b/train.py
@@ -1,15 +0,0 @@

 #导入包
 import os
 from c2net.context import prepare, upload_output

 #初始化导入数据集和预训练模型到容器内
 c2net_context = prepare()

 #获取数据集路径，预训练模型路径，输出路径
 dataset_path = c2net_context.dataset_path
 pretrain_model_path = c2net_context.pretrain_model_path
 output_path = c2net_context.output_path

 #回传结果到openi
 upload_output()