Browse Source

del files

test_v20221116
wjtest001 2 years ago
parent
commit
a28130586e
3 changed files with 0 additions and 399 deletions
  1. +0
    -128
      gpu/pretrain.py
  2. +0
    -144
      gpu/pretrain_for_c2net.py
  3. +0
    -127
      gpu/pretrain_wjtest.py

+ 0
- 128
gpu/pretrain.py View File

@@ -1,128 +0,0 @@
#!/usr/bin/python
#coding=utf-8
'''
If there are Chinese comments in the code,please add at the beginning:
#!/usr/bin/python
#coding=utf-8

1,The dataset structure of the single-dataset in this example
MnistDataset_torch.zip
├── test
└── train

2,Due to the adaptability of a100, before using the training environment, please use the recommended image of the
platform with cuda 11.Then adjust the code and submit the image.
The image of this example is: dockerhub.pcl.ac.cn:5000/user-images/openi:cuda111_python37_pytorch191
In the training environment, the uploaded dataset will be automatically placed in the /dataset directory.
Note: the paths are different when selecting a single dataset and multiple datasets.
(1)If it is a single dataset: if MnistDataset_torch.zip is selected,
the dataset directory is /dataset/train, /dataset/test;
If it is a multiple dataset: if MnistDataset_torch.zip is selected,
the dataset directory is /dataset/MnistDataset_torch/train, /dataset/MnistDataset_torch/test;

(2)If the pre-training model file is selected, the selected pre-training model will be
automatically placed in the /pretrainmodel directory.
for example:
If the model file is selected, the calling method is: '/pretrainmodel/' + args.pretrainmodelname

The model download path is under /model by default. Please specify the model output location to /model,
and the Qizhi platform will provide file downloads under the /model directory.
'''


from model import Model
import numpy as np
import torch
from torchvision.datasets import mnist
from torch.nn import CrossEntropyLoss
from torch.optim import SGD
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import argparse
import os

# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
#The dataset location is placed under /dataset
parser.add_argument('--traindata', default="/dataset/train" ,help='path to train dataset')
parser.add_argument('--testdata', default="/dataset/test" ,help='path to test dataset')
parser.add_argument('--epoch_size', type=int, default=10, help='how much epoch to train')
parser.add_argument('--batch_size', type=int, default=256, help='how much batch_size in epoch')
#获取模型文件名称
parser.add_argument('--ckpt_url', default="", help='pretrain model path')

# 参数声明
WORKERS = 0 # dataloder线程数
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Model().to(device)
optimizer = SGD(model.parameters(), lr=1e-1)
cost = CrossEntropyLoss()

# 模型训练
def train(model, train_loader, epoch):
model.train()
train_loss = 0
for i, data in enumerate(train_loader, 0):
x, y = data
x = x.to(device)
y = y.to(device)
optimizer.zero_grad()
y_hat = model(x)
loss = cost(y_hat, y)
loss.backward()
optimizer.step()
train_loss += loss
loss_mean = train_loss / (i+1)
print('Train Epoch: {}\t Loss: {:.6f}'.format(epoch, loss_mean.item()))
# 模型测试
def test(model, test_loader, test_data):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for i, data in enumerate(test_loader, 0):
x, y = data
x = x.to(device)
y = y.to(device)
optimizer.zero_grad()
y_hat = model(x)
test_loss += cost(y_hat, y).item()
pred = y_hat.max(1, keepdim=True)[1]
correct += pred.eq(y.view_as(pred)).sum().item()
test_loss /= (i+1)
print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_data), 100. * correct / len(test_data)))
def main():
# 如果有保存的模型,则加载模型,并在其基础上继续训练
if os.path.exists(args.ckpt_url):
checkpoint = torch.load(args.ckpt_url)
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
start_epoch = checkpoint['epoch']
print('加载 epoch {} 权重成功!'.format(start_epoch))
else:
start_epoch = 0
print('无保存模型,将从头开始训练!')
for epoch in range(start_epoch+1, epochs):
train(model, train_loader, epoch)
test(model, test_loader, test_dataset)
# 保存模型
state = {'model':model.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch':epoch}
torch.save(state, '/model/mnist_epoch{}.pkl'.format(epoch))

if __name__ == '__main__':
args, unknown = parser.parse_known_args()
#log output
print('cuda is available:{}'.format(torch.cuda.is_available()))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
batch_size = args.batch_size
epochs = args.epoch_size
train_dataset = mnist.MNIST(root=args.traindata, train=True, transform=ToTensor(),download=False)
test_dataset = mnist.MNIST(root=args.testdata, train=False, transform=ToTensor(),download=False)
train_loader = DataLoader(train_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
main()



+ 0
- 144
gpu/pretrain_for_c2net.py View File

@@ -1,144 +0,0 @@
#!/usr/bin/python
#coding=utf-8
'''
If there are Chinese comments in the code,please add at the beginning:
#!/usr/bin/python
#coding=utf-8

In the training environment,
(1)the code will be automatically placed in the /tmp/code directory,
(2)the uploaded dataset will be automatically placed in the /tmp/dataset directory
Note: the paths are different when selecting a single dataset and multiple datasets.
(1)If it is a single dataset: if MnistDataset_torch.zip is selected,
the dataset directory is /tmp/dataset/train, /dataset/test;

The dataset structure of the single dataset in the training image in this example:
tmp
├──dataset
├── test
└── train

If multiple datasets are selected, such as MnistDataset_torch.zip and checkpoint_epoch1_0.73.zip,
the dataset directory is /tmp/dataset/MnistDataset_torch/train, /tmp/dataset/MnistDataset_torch/test
and /tmp/dataset/checkpoint_epoch1_0.73/mnist_epoch1_0.73.pkl
The dataset structure in the training image for multiple datasets in this example:
tmp
├──dataset
├── MnistDataset_torch
| ├── test
| └── train
└── checkpoint_epoch1_0.73
├── mnist_epoch1_0.73.pkl
(3)the model download path is under /tmp/output by default, please specify the model output location to /tmp/output,
qizhi platform will provide file downloads under the /tmp/output directory.
(4)If the pre-training model file is selected, the selected pre-training model will be
automatically placed in the /tmp/pretrainmodel directory.
for example:
If the model file is selected, the calling method is: '/pretrainmodel/' + args.pretrainmodelname

In addition, if you want to get the model file after each training, you can call the uploader_for_gpu tool,
which is written as:
import os
os.system("cd /tmp/script_for_grampus/ &&./uploader_for_gpu " + "/tmp/output/")
'''


from model import Model
import numpy as np
import torch
from torchvision.datasets import mnist
from torch.nn import CrossEntropyLoss
from torch.optim import SGD
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import argparse
import os

# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
#The dataset location is placed under /dataset
parser.add_argument('--traindata', default="/tmp/dataset/train" ,help='path to train dataset')
parser.add_argument('--testdata', default="/tmp/dataset/test" ,help='path to test dataset')
parser.add_argument('--epoch_size', type=int, default=10, help='how much epoch to train')
parser.add_argument('--batch_size', type=int, default=256, help='how much batch_size in epoch')
#获取模型文件名称
parser.add_argument('--ckpt_url', default="", help='pretrain model path')

# 参数声明
WORKERS = 0 # dataloder线程数
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Model().to(device)
optimizer = SGD(model.parameters(), lr=1e-1)
cost = CrossEntropyLoss()

# 模型训练
def train(model, train_loader, epoch):
model.train()
train_loss = 0
for i, data in enumerate(train_loader, 0):
x, y = data
x = x.to(device)
y = y.to(device)
optimizer.zero_grad()
y_hat = model(x)
loss = cost(y_hat, y)
loss.backward()
optimizer.step()
train_loss += loss
loss_mean = train_loss / (i+1)
print('Train Epoch: {}\t Loss: {:.6f}'.format(epoch, loss_mean.item()))
# 模型测试
def test(model, test_loader, test_data):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for i, data in enumerate(test_loader, 0):
x, y = data
x = x.to(device)
y = y.to(device)
optimizer.zero_grad()
y_hat = model(x)
test_loss += cost(y_hat, y).item()
pred = y_hat.max(1, keepdim=True)[1]
correct += pred.eq(y.view_as(pred)).sum().item()
test_loss /= (i+1)
print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_data), 100. * correct / len(test_data)))
def main():
# 如果有保存的模型,则加载模型,并在其基础上继续训练
if os.path.exists(args.ckpt_url):
checkpoint = torch.load(args.ckpt_url)
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
start_epoch = checkpoint['epoch']
print('加载 epoch {} 权重成功!'.format(start_epoch))
else:
start_epoch = 0
print('无保存模型,将从头开始训练!')
for epoch in range(start_epoch+1, epochs):
train(model, train_loader, epoch)
test(model, test_loader, test_dataset)
# 保存模型
state = {'model':model.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch':epoch}
torch.save(state, '/tmp/output/mnist_epoch{}.pkl'.format(epoch))
#After calling uploader_for_gpu, after each epoch training, the result file under /tmp/output will be sent back to Qizhi
os.system("cd /tmp/script_for_grampus/ &&./uploader_for_gpu " + "/tmp/output/")

if __name__ == '__main__':
args, unknown = parser.parse_known_args()
#log output
print('cuda is available:{}'.format(torch.cuda.is_available()))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
batch_size = args.batch_size
epochs = args.epoch_size
train_dataset = mnist.MNIST(root=args.traindata, train=True, transform=ToTensor(),download=False)
test_dataset = mnist.MNIST(root=args.testdata, train=False, transform=ToTensor(),download=False)
train_loader = DataLoader(train_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
main()



+ 0
- 127
gpu/pretrain_wjtest.py View File

@@ -1,127 +0,0 @@
#!/usr/bin/python
#coding=utf-8
'''
If there are Chinese comments in the code,please add at the beginning:
#!/usr/bin/python
#coding=utf-8

1,The dataset structure of the single-dataset in this example
MnistDataset_torch.zip
├── test
└── train

2,Due to the adaptability of a100, before using the training environment, please use the recommended image of the
platform with cuda 11.Then adjust the code and submit the image.
The image of this example is: dockerhub.pcl.ac.cn:5000/user-images/openi:cuda111_python37_pytorch191
In the training environment, the uploaded dataset will be automatically placed in the /dataset directory.
Note: the paths are different when selecting a single dataset and multiple datasets.
(1)If it is a single dataset: if MnistDataset_torch.zip is selected,
the dataset directory is /dataset/train, /dataset/test;
If it is a multiple dataset: if MnistDataset_torch.zip is selected,
the dataset directory is /dataset/MnistDataset_torch/train, /dataset/MnistDataset_torch/test;

(2)If the pre-training model file is selected, the selected pre-training model will be
automatically placed in the /pretrainmodel directory.
for example:
If the model file is selected, the calling method is: '/pretrainmodel/' + args.pretrainmodelname

The model download path is under /model by default. Please specify the model output location to /model,
and the Qizhi platform will provide file downloads under the /model directory.
'''


from model import Model
import numpy as np
import torch
from torchvision.datasets import mnist
from torch.nn import CrossEntropyLoss
from torch.optim import SGD
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
import argparse
import os

# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
#The dataset location is placed under /dataset
parser.add_argument('--traindata', default="/dataset/train" ,help='path to train dataset')
parser.add_argument('--testdata', default="/dataset/test" ,help='path to test dataset')
parser.add_argument('--epoch_size', type=int, default=10, help='how much epoch to train')
parser.add_argument('--batch_size', type=int, default=256, help='how much batch_size in epoch')
#获取模型文件名称
parser.add_argument('--ckpt_url', default="", help='pretrain model path')

# 参数声明
WORKERS = 0 # dataloder线程数
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Model().to(device)
optimizer = SGD(model.parameters(), lr=1e-1)
cost = CrossEntropyLoss()

# 模型训练
def train(model, train_loader, epoch):
model.train()
train_loss = 0
for i, data in enumerate(train_loader, 0):
x, y = data
x = x.to(device)
y = y.to(device)
optimizer.zero_grad()
y_hat = model(x)
loss = cost(y_hat, y)
loss.backward()
optimizer.step()
train_loss += loss
loss_mean = train_loss / (i+1)
print('Train Epoch: {}\t Loss: {:.6f}'.format(epoch, loss_mean.item()))
# 模型测试
def test(model, test_loader, test_data):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for i, data in enumerate(test_loader, 0):
x, y = data
x = x.to(device)
y = y.to(device)
optimizer.zero_grad()
y_hat = model(x)
test_loss += cost(y_hat, y).item()
pred = y_hat.max(1, keepdim=True)[1]
correct += pred.eq(y.view_as(pred)).sum().item()
test_loss /= (i+1)
print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_data), 100. * correct / len(test_data)))
def main():
# 如果有保存的模型,则加载模型,并在其基础上继续训练
print('------ckpt_url is: ', args.ckpt_url);
if os.path.exists(args.ckpt_url):
checkpoint = torch.load(args.ckpt_url)
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
start_epoch = checkpoint['epoch']
print('加载 epoch {} 权重成功!'.format(start_epoch))
else:
start_epoch = 0
print('无保存模型,将从头开始训练!')
for epoch in range(start_epoch+1, epochs):
train(model, train_loader, epoch)
test(model, test_loader, test_dataset)
# 保存模型
state = {'model':model.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch':epoch}
torch.save(state, '/model/mnist_epoch{}.pkl'.format(epoch))

if __name__ == '__main__':
args, unknown = parser.parse_known_args()
#log output
print('cuda is available:{}'.format(torch.cuda.is_available()))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
batch_size = args.batch_size
epochs = args.epoch_size
train_dataset = mnist.MNIST(root=args.traindata, train=True, transform=ToTensor(),download=False)
test_dataset = mnist.MNIST(root=args.testdata, train=False, transform=ToTensor(),download=False)
train_loader = DataLoader(train_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
main()

Loading…
Cancel
Save