|
|
@@ -114,14 +114,16 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
bootFile := form.BootFile |
|
|
|
params := form.Params |
|
|
|
repo := ctx.Repo.Repository |
|
|
|
codeLocalPath := setting.JobPath + jobName + modelarts.CodePath |
|
|
|
//codeObsPath := grampus.JobPath + jobName + modelarts.CodePath |
|
|
|
//dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" |
|
|
|
codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" |
|
|
|
codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" |
|
|
|
dataMinioPath := setting.Attachment.Minio.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" |
|
|
|
branchName := form.BranchName |
|
|
|
flavorName := form.FlavorName |
|
|
|
engineName := form.EngineName |
|
|
|
image := strings.TrimSpace(form.Image) |
|
|
|
|
|
|
|
jobName = displayJobName |
|
|
|
|
|
|
|
if !jobNamePattern.MatchString(displayJobName) { |
|
|
|
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobGPUNew, &form) |
|
|
|
return |
|
|
@@ -192,6 +194,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
} |
|
|
|
|
|
|
|
//todo: upload code (send to file_server todo this work?) |
|
|
|
//upload code |
|
|
|
if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
@@ -207,6 +210,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
//init model readme |
|
|
|
if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
@@ -215,8 +219,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
} |
|
|
|
|
|
|
|
//prepare command |
|
|
|
var codeObsPath, dataObsPath string |
|
|
|
command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+attachment.Name, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name) |
|
|
|
command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", dataMinioPath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", attachment.Name) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU) |
|
|
@@ -242,6 +245,8 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
FlavorName: flavorName, |
|
|
|
EngineName: engineName, |
|
|
|
DatasetName: attachment.Name, |
|
|
|
IsLatestVersion: modelarts.IsLatestVersion, |
|
|
|
VersionCount: modelarts.VersionCount, |
|
|
|
} |
|
|
|
|
|
|
|
err = grampus.GenerateTrainJob(ctx, req) |
|
|
@@ -357,6 +362,12 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
|
|
|
|
|
//prepare command |
|
|
|
command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+attachment.Name, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) |
|
|
|
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU) |
|
|
|
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobNPUNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) |
|
|
|
|
|
|
@@ -597,7 +608,8 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo |
|
|
|
commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " " + datasetName + ";" |
|
|
|
command += commandDownload |
|
|
|
} else if processorType == grampus.ProcessorTypeGPU { |
|
|
|
|
|
|
|
commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " " + datasetName + ";" |
|
|
|
command += commandDownload |
|
|
|
} |
|
|
|
|
|
|
|
//unzip code & dataset |
|
|
@@ -636,8 +648,13 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo |
|
|
|
command += commandGetRes |
|
|
|
|
|
|
|
//upload models |
|
|
|
commandUpload := "cd /tmp/script_for_grampus/;./uploader_for_obs " + setting.Bucket + " " + outputRemotePath + " " + "/tmp/output/;" |
|
|
|
command += commandUpload |
|
|
|
if processorType == grampus.ProcessorTypeNPU { |
|
|
|
commandUpload := "cd /tmp/script_for_grampus/;./uploader_for_obs " + setting.Bucket + " " + outputRemotePath + " " + "/tmp/output/;" |
|
|
|
command += commandUpload |
|
|
|
} else if processorType == grampus.ProcessorTypeGPU { |
|
|
|
commandUpload := "cd /tmp/script_for_grampus/;./uploader_for_minio " + setting.Grampus.Env + " " + outputRemotePath + " " + "/tmp/output/;" |
|
|
|
command += commandUpload |
|
|
|
} |
|
|
|
|
|
|
|
//check exec result |
|
|
|
commandCheckRes := " [[ result -eq 0 ]] && echo success || ls failed;" |
|
|
|