Browse Source

gpu train

tags/v1.22.6.2
lewis 3 years ago
parent
commit
6a9154af31
3 changed files with 28 additions and 9 deletions
  1. +1
    -1
      modules/grampus/grampus.go
  2. +2
    -0
      modules/setting/setting.go
  3. +25
    -8
      routers/repo/grampus.go

+ 1
- 1
modules/grampus/grampus.go View File

@@ -15,7 +15,7 @@ const (
ProcessorTypeNPU = "npu.huawei.com/NPU"
ProcessorTypeGPU = "nvidia.com/gpu"

CommandPrepareScript = "pwd;cd /tmp;mkdir -p output;mkdir -p code;mkdir -p dataset;wget -q https://git.openi.org.cn/lewis/script_for_grampus/archive/master.zip;unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_obs;"
CommandPrepareScript = "pwd;cd /tmp;mkdir -p output;mkdir -p code;mkdir -p dataset;wget -q https://git.openi.org.cn/OpenIOSSG/script_for_grampus/archive/master.zip;unzip -q master.zip;cd script_for_grampus;chmod 777 downloader_for_obs uploader_for_obs downloader_for_minio uploader_for_minio;"
CodeArchiveName = "master.zip"
)



+ 2
- 0
modules/setting/setting.go View File

@@ -530,6 +530,7 @@ var (

//grampus config
Grampus = struct {
Env string
Host string
UserName string
Password string
@@ -1395,6 +1396,7 @@ func NewContext() {
func GetGrampusConfig() {
sec := Cfg.Section("grampus")

Grampus.Env = sec.Key("ENV").MustString("TEST")
Grampus.Host = sec.Key("SERVER_HOST").MustString("")
Grampus.UserName = sec.Key("USERNAME").MustString("")
Grampus.Password = sec.Key("PASSWORD").MustString("")


+ 25
- 8
routers/repo/grampus.go View File

@@ -114,14 +114,16 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
bootFile := form.BootFile
params := form.Params
repo := ctx.Repo.Repository
codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
//codeObsPath := grampus.JobPath + jobName + modelarts.CodePath
//dataObsPath := setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/"
codeMinioPath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/"
dataMinioPath := setting.Attachment.Minio.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"
branchName := form.BranchName
flavorName := form.FlavorName
engineName := form.EngineName
image := strings.TrimSpace(form.Image)

jobName = displayJobName

if !jobNamePattern.MatchString(displayJobName) {
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplGrampusTrainJobGPUNew, &form)
return
@@ -192,6 +194,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
}

//todo: upload code (send to file_server todo this work?)
//upload code
if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
@@ -207,6 +210,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
return
}

//init model readme
if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
@@ -215,8 +219,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
}

//prepare command
var codeObsPath, dataObsPath string
command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+attachment.Name, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name)
command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", dataMinioPath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", attachment.Name)
if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeGPU)
@@ -242,6 +245,8 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
FlavorName: flavorName,
EngineName: engineName,
DatasetName: attachment.Name,
IsLatestVersion: modelarts.IsLatestVersion,
VersionCount: modelarts.VersionCount,
}

err = grampus.GenerateTrainJob(ctx, req)
@@ -357,6 +362,12 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain

//prepare command
command, err := generateCommand(repo.Name, grampus.ProcessorTypeNPU, codeObsPath+cloudbrain.DefaultBranchName+".zip", dataObsPath+attachment.Name, bootFile, params, setting.CodePathPrefix+jobName+modelarts.OutputPath, attachment.Name)
if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
grampusTrainJobNewDataPrepare(ctx, grampus.ProcessorTypeNPU)
ctx.RenderWithErr("Create task failed, internal error", tplGrampusTrainJobNPUNew, &form)
return
}

commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName)

@@ -597,7 +608,8 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo
commandDownload := "./downloader_for_obs " + setting.Bucket + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " " + datasetName + ";"
command += commandDownload
} else if processorType == grampus.ProcessorTypeGPU {

commandDownload := "./downloader_for_minio " + setting.Grampus.Env + " " + codeRemotePath + " " + grampus.CodeArchiveName + " " + dataRemotePath + " " + datasetName + ";"
command += commandDownload
}

//unzip code & dataset
@@ -636,8 +648,13 @@ func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bo
command += commandGetRes

//upload models
commandUpload := "cd /tmp/script_for_grampus/;./uploader_for_obs " + setting.Bucket + " " + outputRemotePath + " " + "/tmp/output/;"
command += commandUpload
if processorType == grampus.ProcessorTypeNPU {
commandUpload := "cd /tmp/script_for_grampus/;./uploader_for_obs " + setting.Bucket + " " + outputRemotePath + " " + "/tmp/output/;"
command += commandUpload
} else if processorType == grampus.ProcessorTypeGPU {
commandUpload := "cd /tmp/script_for_grampus/;./uploader_for_minio " + setting.Grampus.Env + " " + outputRemotePath + " " + "/tmp/output/;"
command += commandUpload
}

//check exec result
commandCheckRes := " [[ result -eq 0 ]] && echo success || ls failed;"


Loading…
Cancel
Save