Browse Source

提交代码。

Signed-off-by: zouap <zouap@pcl.ac.cn>
tags/v1.22.7.1
zouap 3 years ago
parent
commit
3c76ea3753
2 changed files with 69 additions and 31 deletions
  1. +39
    -0
      modules/setting/setting.go
  2. +30
    -31
      routers/repo/ai_model_convert.go

+ 39
- 0
modules/setting/setting.go View File

@@ -613,6 +613,24 @@ var (
OrgName string
TeamName string
}{}

ModelConvert = struct {
GPU_PYTORCH_IMAGE string
GpuQueue string
GPU_TENSORFLOW_IMAGE string
NPU_MINDSPORE_16_IMAGE string
PytorchOnnxBootFile string
PytorchTrTBootFile string
MindsporeBootFile string
TensorFlowNpuBootFile string
TensorFlowGpuBootFile string
ConvertRepoPath string
GPU_Resource_Specs_ID int
NPU_FlavorCode string
NPU_PoolID string
NPU_MINDSPORE_IMAGE_ID int
NPU_TENSORFLOW_IMAGE_ID int
}{}
)

// DateLang transforms standard language locale name to corresponding value in datetime plugin.
@@ -1408,6 +1426,27 @@ func NewContext() {
Course.TeamName = sec.Key("team_name").MustString("")

GetGrampusConfig()

getModelConvertConfig()
}

func getModelConvertConfig() {
sec := Cfg.Section("model_convert")
ModelConvert.GPU_PYTORCH_IMAGE = sec.Key("GPU_PYTORCH_IMAGE").MustString("dockerhub.pcl.ac.cn:5000/user-images/openi:tensorRT_7_zouap")
ModelConvert.GpuQueue = sec.Key("GpuQueue").MustString("openidgx")
ModelConvert.GPU_TENSORFLOW_IMAGE = sec.Key("GPU_TENSORFLOW_IMAGE").MustString("dockerhub.pcl.ac.cn:5000/user-images/openi:tf2onnx")
ModelConvert.NPU_MINDSPORE_16_IMAGE = sec.Key("NPU_MINDSPORE_16_IMAGE").MustString("swr.cn-south-222.ai.pcl.cn/openi/mindspore1.6.1_train_v1_openi:v3_ascend")
ModelConvert.PytorchOnnxBootFile = sec.Key("PytorchOnnxBootFile").MustString("convert_pytorch.py")
ModelConvert.PytorchTrTBootFile = sec.Key("PytorchTrTBootFile").MustString("convert_pytorch_tensorrt.py")
ModelConvert.MindsporeBootFile = sec.Key("MindsporeBootFile").MustString("convert_mindspore.py")
ModelConvert.TensorFlowNpuBootFile = sec.Key("TensorFlowNpuBootFile").MustString("convert_tensorflow.py")
ModelConvert.TensorFlowGpuBootFile = sec.Key("TensorFlowGpuBootFile").MustString("convert_tensorflow_gpu.py")
ModelConvert.ConvertRepoPath = sec.Key("ConvertRepoPath").MustString("https://git.openi.org.cn/zouap/npu_test")
ModelConvert.GPU_Resource_Specs_ID = sec.Key("GPU_Resource_Specs_ID").MustInt(1)
ModelConvert.NPU_FlavorCode = sec.Key("NPU_FlavorCode").MustString("modelarts.bm.910.arm.public.1")
ModelConvert.NPU_PoolID = sec.Key("NPU_PoolID").MustString("pool7908321a")
ModelConvert.NPU_MINDSPORE_IMAGE_ID = sec.Key("NPU_MINDSPORE_IMAGE_ID").MustInt(121)
ModelConvert.NPU_TENSORFLOW_IMAGE_ID = sec.Key("NPU_TENSORFLOW_IMAGE_ID").MustInt(35)
}

func GetGrampusConfig() {


+ 30
- 31
routers/repo/ai_model_convert.go View File

@@ -36,19 +36,18 @@ const (
LogFile = "log.txt"
DefaultBranchName = "master"
SubTaskName = "task1"
GpuQueue = "openidgx"
Success = "S000"
GPU_PYTORCH_IMAGE = "dockerhub.pcl.ac.cn:5000/user-images/openi:tensorRT_7_zouap"
GPU_TENSORFLOW_IMAGE = "dockerhub.pcl.ac.cn:5000/user-images/openi:tf2onnx"
NPU_MINDSPORE_16_IMAGE = "swr.cn-south-222.ai.pcl.cn/openi/mindspore1.6.1_train_v1_openi:v3_ascend"
PytorchOnnxBootFile = "convert_pytorch.py"
PytorchTrTBootFile = "convert_pytorch_tensorrt.py"
MindsporeBootFile = "convert_mindspore.py"
TensorFlowNpuBootFile = "convert_tensorflow.py"
TensorFlowGpuBootFile = "convert_tensorflow_gpu.py"

ConvertRepoPath = "https://git.openi.org.cn/zouap/npu_test"
REPO_ID = 33267
//GpuQueue = "openidgx"
Success = "S000"
//GPU_PYTORCH_IMAGE = "dockerhub.pcl.ac.cn:5000/user-images/openi:tensorRT_7_zouap"
//GPU_TENSORFLOW_IMAGE = "dockerhub.pcl.ac.cn:5000/user-images/openi:tf2onnx"
//NPU_MINDSPORE_16_IMAGE = "swr.cn-south-222.ai.pcl.cn/openi/mindspore1.6.1_train_v1_openi:v3_ascend"
//PytorchOnnxBootFile = "convert_pytorch.py"
//PytorchTrTBootFile = "convert_pytorch_tensorrt.py"
//MindsporeBootFile = "convert_mindspore.py"
//TensorFlowNpuBootFile = "convert_tensorflow.py"
//TensorFlowGpuBootFile = "convert_tensorflow_gpu.py"

//ConvertRepoPath = "https://git.openi.org.cn/zouap/npu_test"

CONVERT_FORMAT_ONNX = 0
CONVERT_FORMAT_TRT = 1
@@ -59,10 +58,10 @@ const (
NPU_MINDSPORE_IMAGE_ID = 35
NPU_TENSORFLOW_IMAGE_ID = 121

GPU_Resource_Specs_ID = 1 //cpu 1, gpu 1
//GPU_Resource_Specs_ID = 1 //cpu 1, gpu 1

NPU_FlavorCode = "modelarts.bm.910.arm.public.1"
NPU_PoolID = "pool7908321a"
//NPU_FlavorCode = "modelarts.bm.910.arm.public.1"
//NPU_PoolID = "pool7908321a"
)

var (
@@ -142,8 +141,8 @@ func createNpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context
if err == nil {
os.RemoveAll(codeLocalPath)
}
if err := downloadConvertCode(ConvertRepoPath, codeLocalPath, DefaultBranchName); err != nil {
log.Error("downloadCode failed, server timed out: %s (%v)", ConvertRepoPath, err)
if err := downloadConvertCode(setting.ModelConvert.ConvertRepoPath, codeLocalPath, DefaultBranchName); err != nil {
log.Error("downloadCode failed, server timed out: %s (%v)", setting.ModelConvert.ConvertRepoPath, err)
return err
}
if err := obsMkdir(setting.CodePathPrefix + modelConvert.ID + modelarts.OutputPath + VersionOutputPath + "/"); err != nil {
@@ -172,10 +171,10 @@ func createNpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context

var engineId int64
engineId = int64(NPU_MINDSPORE_IMAGE_ID)
bootfile := MindsporeBootFile
bootfile := setting.ModelConvert.MindsporeBootFile
if modelConvert.SrcEngine == TENSORFLOW_ENGINE {
engineId = int64(NPU_TENSORFLOW_IMAGE_ID)
bootfile = TensorFlowNpuBootFile
bootfile = setting.ModelConvert.TensorFlowNpuBootFile
}
userCommand := "/bin/bash /home/work/run_train.sh 's3://" + codeObsPath + "' 'code/" + bootfile + "' '/tmp/log/train.log' --'data_url'='s3://" + dataPath + "' --'train_url'='s3://" + outputObsPath + "'"
userCommand += " --'model'='" + modelConvert.ModelPath + "'"
@@ -193,15 +192,15 @@ func createNpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context
BootFileUrl: codeObsPath + bootfile,
BootFile: bootfile,
TrainUrl: outputObsPath,
FlavorCode: NPU_FlavorCode,
FlavorCode: setting.ModelConvert.NPU_FlavorCode,
WorkServerNumber: 1,
IsLatestVersion: modelarts.IsLatestVersion,
EngineID: engineId,
LogUrl: logObsPath,
PoolID: NPU_PoolID,
PoolID: setting.ModelConvert.NPU_PoolID,
//Parameters: param,
BranchName: DefaultBranchName,
UserImageUrl: NPU_MINDSPORE_16_IMAGE,
UserImageUrl: setting.ModelConvert.NPU_MINDSPORE_16_IMAGE,
UserCommand: userCommand,
}
result, err := modelarts.GenerateModelConvertTrainJob(req)
@@ -323,21 +322,21 @@ func downloadFromObsToLocal(task *models.AiModelManage, localPath string) error
func createGpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context, model *models.AiModelManage) error {
modelRelativePath := model.Path
command := ""
IMAGE_URL := GPU_PYTORCH_IMAGE
IMAGE_URL := setting.ModelConvert.GPU_PYTORCH_IMAGE
dataActualPath := setting.Attachment.Minio.RealPath + modelRelativePath

if modelConvert.SrcEngine == PYTORCH_ENGINE {
if modelConvert.DestFormat == CONVERT_FORMAT_ONNX {
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, PytorchOnnxBootFile)
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, setting.ModelConvert.PytorchOnnxBootFile)
} else if modelConvert.DestFormat == CONVERT_FORMAT_TRT {
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, PytorchTrTBootFile)
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, setting.ModelConvert.PytorchTrTBootFile)
} else {
return errors.New("Not support the format.")
}
} else if modelConvert.SrcEngine == TENSORFLOW_ENGINE {
IMAGE_URL = GPU_TENSORFLOW_IMAGE
IMAGE_URL = setting.ModelConvert.GPU_TENSORFLOW_IMAGE
if modelConvert.DestFormat == CONVERT_FORMAT_ONNX {
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, TensorFlowGpuBootFile)
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, setting.ModelConvert.TensorFlowGpuBootFile)
} else {
return errors.New("Not support the format.")
}
@@ -355,7 +354,7 @@ func createGpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context
log.Info("command=" + command)

codePath := setting.JobPath + modelConvert.ID + CodeMountPath
downloadConvertCode(ConvertRepoPath, codePath, DefaultBranchName)
downloadConvertCode(setting.ModelConvert.ConvertRepoPath, codePath, DefaultBranchName)

uploadCodeToMinio(codePath+"/", modelConvert.ID, CodeMountPath+"/")

@@ -373,11 +372,11 @@ func createGpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context
if TrainResourceSpecs == nil {
json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs)
}
resourceSpec := TrainResourceSpecs.ResourceSpec[GPU_Resource_Specs_ID]
resourceSpec := TrainResourceSpecs.ResourceSpec[setting.ModelConvert.GPU_Resource_Specs_ID]
jobResult, err := cloudbrain.CreateJob(modelConvert.ID, models.CreateJobParams{
JobName: modelConvert.ID,
RetryCount: 1,
GpuType: GpuQueue,
GpuType: setting.ModelConvert.GpuQueue,
Image: IMAGE_URL,
TaskRoles: []models.TaskRole{
{


Loading…
Cancel
Save