|
|
@@ -0,0 +1,93 @@ |
|
|
|
#!/usr/bin/python |
|
|
|
#coding=utf-8 |
|
|
|
''' |
|
|
|
If there are Chinese comments in the code,please add at the beginning: |
|
|
|
#!/usr/bin/python |
|
|
|
#coding=utf-8 |
|
|
|
|
|
|
|
Due to the adaptability of a100, before using the training environment, please use the recommended image of the |
|
|
|
platform with cuda 11.Then adjust the code and submit the image. |
|
|
|
The image of this example is: dockerhub.pcl.ac.cn:5000/user-images/openi:cuda111_python37_pytorch191 |
|
|
|
In the training environment, the uploaded dataset will be automatically placed in the /dataset directory. |
|
|
|
If it is a single dataset: |
|
|
|
if MnistDataset_torch.zip is selected,Then the dataset directory is /dataset/train, /dataset/test; |
|
|
|
If it is a multiple dataset: |
|
|
|
If MnistDataset_torch.zip and checkpoint_epoch1_0.73.zip are selected, |
|
|
|
the dataset directory is /dataset/MnistDataset_torch/train, /dataset/MnistDataset_torch/test |
|
|
|
and /dataset/checkpoint_epoch1_0.73/mnist_epoch1_0.73.pkl |
|
|
|
|
|
|
|
The model download path is under /model by default. Please specify the model output location to /model, |
|
|
|
and the Qizhi platform will provide file downloads under the /model directory. |
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
from model import Model |
|
|
|
import numpy as np |
|
|
|
import torch |
|
|
|
from torchvision.datasets import mnist |
|
|
|
from torch.nn import CrossEntropyLoss |
|
|
|
from torch.optim import SGD |
|
|
|
from torch.utils.data import DataLoader |
|
|
|
from torchvision.transforms import ToTensor |
|
|
|
import argparse |
|
|
|
import datetime |
|
|
|
|
|
|
|
# Training settings |
|
|
|
parser = argparse.ArgumentParser(description='PyTorch MNIST Example') |
|
|
|
#The dataset location is placed under /dataset |
|
|
|
parser.add_argument('--traindata', default="/dataset/train" ,help='path to train dataset') |
|
|
|
parser.add_argument('--testdata', default="/dataset/test" ,help='path to test dataset') |
|
|
|
parser.add_argument('--epoch_size', type=int, default=1, help='how much epoch to train') |
|
|
|
parser.add_argument('--batch_size', type=int, default=256, help='how much batch_size in epoch') |
|
|
|
|
|
|
|
def gettime(): |
|
|
|
timestr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') |
|
|
|
return timestr |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
args, unknown = parser.parse_known_args() |
|
|
|
#log output |
|
|
|
print(gettime(), 'cuda is available:{}'.format(torch.cuda.is_available())) |
|
|
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |
|
|
|
batch_size = args.batch_size |
|
|
|
train_dataset = mnist.MNIST(root=args.traindata, train=True, transform=ToTensor(),download=False) |
|
|
|
test_dataset = mnist.MNIST(root=args.testdata, train=False, transform=ToTensor(),download=False) |
|
|
|
train_loader = DataLoader(train_dataset, batch_size=batch_size) |
|
|
|
test_loader = DataLoader(test_dataset, batch_size=batch_size) |
|
|
|
model = Model().to(device) |
|
|
|
sgd = SGD(model.parameters(), lr=1e-1) |
|
|
|
cost = CrossEntropyLoss() |
|
|
|
epoch = args.epoch_size |
|
|
|
print(gettime(), 'epoch_size is:{}'.format(epoch)) |
|
|
|
for _epoch in range(epoch): |
|
|
|
print(gettime(), 'the {} epoch_size begin'.format(_epoch + 1)) |
|
|
|
model.train() |
|
|
|
for idx, (train_x, train_label) in enumerate(train_loader): |
|
|
|
train_x = train_x.to(device) |
|
|
|
train_label = train_label.to(device) |
|
|
|
label_np = np.zeros((train_label.shape[0], 10)) |
|
|
|
sgd.zero_grad() |
|
|
|
predict_y = model(train_x.float()) |
|
|
|
loss = cost(predict_y, train_label.long()) |
|
|
|
if idx % 10 == 0: |
|
|
|
print(gettime(), 'idx: {}, loss: {}'.format(idx, loss.sum().item())) |
|
|
|
loss.backward() |
|
|
|
sgd.step() |
|
|
|
|
|
|
|
correct = 0 |
|
|
|
_sum = 0 |
|
|
|
model.eval() |
|
|
|
for idx, (test_x, test_label) in enumerate(test_loader): |
|
|
|
test_x = test_x |
|
|
|
test_label = test_label |
|
|
|
predict_y = model(test_x.to(device).float()).detach() |
|
|
|
predict_ys = np.argmax(predict_y.cpu(), axis=-1) |
|
|
|
label_np = test_label.numpy() |
|
|
|
_ = predict_ys == test_label |
|
|
|
correct += np.sum(_.numpy(), axis=-1) |
|
|
|
_sum += _.shape[0] |
|
|
|
print(gettime(), 'accuracy: {:.2f}'.format(correct / _sum)) |
|
|
|
#The model output location is placed under /model |
|
|
|
torch.save(model, '/model/mnist_epoch{}_{:.2f}.pkl'.format(_epoch+1, correct / _sum)) |
|
|
|
print("----------this is the end--------") |
|
|
|
print(a) |