|
|
@@ -40,18 +40,23 @@ const ( |
|
|
|
GPU_PYTORCH_IMAGE = "dockerhub.pcl.ac.cn:5000/user-images/openi:tensorRT_7_zouap" |
|
|
|
GPU_TENSORFLOW_IMAGE = "dockerhub.pcl.ac.cn:5000/user-images/openi:tf2onnx" |
|
|
|
|
|
|
|
PytorchBootFile = "convert_pytorch.py" |
|
|
|
PytorchOnnxBootFile = "convert_pytorch.py" |
|
|
|
PytorchTrTBootFile = "convert_pytorch_tensorrt.py" |
|
|
|
MindsporeBootFile = "convert_mindspore.py" |
|
|
|
TensorFlowNpuBootFile = "convert_tensorflow.py" |
|
|
|
TensorFlowGpuBootFile = "convert_tensorflow_gpu.py" |
|
|
|
|
|
|
|
ConvertRepoPath = "https://git.openi.org.cn/zouap/npu_test" |
|
|
|
REPO_ID = 33267 |
|
|
|
|
|
|
|
REPO_ID = 33267 |
|
|
|
CONVERT_FORMAT_ONNX = 0 |
|
|
|
CONVERT_FORMAT_TRT = 1 |
|
|
|
|
|
|
|
NPU_MINDSPORE_IMAGE_ID = 35 |
|
|
|
NPU_TENSORFLOW_IMAGE_ID = 121 |
|
|
|
|
|
|
|
GPU_Resource_Specs_ID = 1 //cpu 1, gpu 1 |
|
|
|
|
|
|
|
NPU_FlavorCode = "modelarts.bm.910.arm.public.1" |
|
|
|
NPU_PoolID = "pool7908321a" |
|
|
|
) |
|
|
@@ -333,10 +338,20 @@ func createGpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context |
|
|
|
dataActualPath := setting.Attachment.Minio.RealPath + modelRelativePath |
|
|
|
|
|
|
|
if modelConvert.SrcEngine == PYTORCH_ENGINE { |
|
|
|
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, PytorchBootFile) |
|
|
|
if modelConvert.DestFormat == CONVERT_FORMAT_ONNX { |
|
|
|
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, PytorchOnnxBootFile) |
|
|
|
} else if modelConvert.DestFormat == CONVERT_FORMAT_TRT { |
|
|
|
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, PytorchTrTBootFile) |
|
|
|
} else { |
|
|
|
return errors.New("Not support the format.") |
|
|
|
} |
|
|
|
} else if modelConvert.SrcEngine == TENSORFLOW_ENGINE { |
|
|
|
IMAGE_URL = GPU_TENSORFLOW_IMAGE |
|
|
|
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, TensorFlowGpuBootFile) |
|
|
|
if modelConvert.DestFormat == CONVERT_FORMAT_ONNX { |
|
|
|
command = getGpuModelConvertCommand(modelConvert.ID, modelConvert.ModelPath, modelConvert, TensorFlowGpuBootFile) |
|
|
|
} else { |
|
|
|
return errors.New("Not support the format.") |
|
|
|
} |
|
|
|
//如果模型在OBS上,需要下载到本地,并上传到minio中 |
|
|
|
if model.Type == models.TypeCloudBrainTwo { |
|
|
|
relatetiveModelPath := setting.JobPath + modelConvert.ID + "/dataset" |
|
|
@@ -345,7 +360,6 @@ func createGpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context |
|
|
|
uploadCodeToMinio(relatetiveModelPath+"/", modelConvert.ID, "/dataset/") |
|
|
|
dataActualPath = setting.Attachment.Minio.RealPath + setting.Attachment.Minio.Bucket + "/" + setting.CBCodePathPrefix + modelConvert.ID + "/dataset" |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
log.Info("dataActualPath=" + dataActualPath) |
|
|
|
|
|
|
@@ -370,7 +384,7 @@ func createGpuTrainJob(modelConvert *models.AiModelConvert, ctx *context.Context |
|
|
|
if TrainResourceSpecs == nil { |
|
|
|
json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) |
|
|
|
} |
|
|
|
resourceSpec := TrainResourceSpecs.ResourceSpec[1] |
|
|
|
resourceSpec := TrainResourceSpecs.ResourceSpec[GPU_Resource_Specs_ID] |
|
|
|
jobResult, err := cloudbrain.CreateJob(modelConvert.ID, models.CreateJobParams{ |
|
|
|
JobName: modelConvert.ID, |
|
|
|
RetryCount: 1, |
|
|
|