Browse Source

fix bug

tags/v1.21.12.1
liuzx 4 years ago
parent
commit
7f7fe8a3ca
4 changed files with 122 additions and 41 deletions
  1. +1
    -1
      models/cloudbrain.go
  2. +2
    -2
      modules/storage/obs.go
  3. +1
    -2
      routers/api/v1/repo/modelarts.go
  4. +118
    -36
      routers/repo/modelarts.go

+ 1
- 1
models/cloudbrain.go View File

@@ -87,7 +87,7 @@ type Cloudbrain struct {
LogUrl string //日志输出的obs路径 LogUrl string //日志输出的obs路径
PreVersionId int64 //父版本的版本id PreVersionId int64 //父版本的版本id
FlavorCode string //modelarts上的规格id FlavorCode string //modelarts上的规格id
Description string //描述
Description string `xorm:"varchar(256)"` //描述
WorkServerNumber int //节点数 WorkServerNumber int //节点数
FlavorName string //规格名称 FlavorName string //规格名称
EngineName string //引擎名称 EngineName string //引擎名称


+ 2
- 2
modules/storage/obs.go View File

@@ -176,10 +176,10 @@ func ObsModelDownload(JobName string, fileName string) (io.ReadCloser, error) {
} }
} }


func GetObsListObject(jobName, parentDir string) ([]FileInfo, error) {
func GetObsListObject(jobName, parentDir string, versionOutputPath string) ([]FileInfo, error) {
input := &obs.ListObjectsInput{} input := &obs.ListObjectsInput{}
input.Bucket = setting.Bucket input.Bucket = setting.Bucket
input.Prefix = strings.TrimPrefix(path.Join(setting.TrainJobModelPath, jobName, setting.OutPutPath, parentDir), "/")
input.Prefix = strings.TrimPrefix(path.Join(setting.TrainJobModelPath, jobName, setting.OutPutPath, versionOutputPath, parentDir), "/")
strPrefix := strings.Split(input.Prefix, "/") strPrefix := strings.Split(input.Prefix, "/")
output, err := ObsCli.ListObjects(input) output, err := ObsCli.ListObjects(input)
fileInfos := make([]FileInfo, 0) fileInfos := make([]FileInfo, 0)


+ 1
- 2
routers/api/v1/repo/modelarts.go View File

@@ -303,8 +303,7 @@ func ModelList(ctx *context.APIContext) {
return return
} }
VersionOutputPath := modelarts.GetVersionOutputPathByTotalVersionCount(task.TotalVersionCount) VersionOutputPath := modelarts.GetVersionOutputPathByTotalVersionCount(task.TotalVersionCount)
parentDir = VersionOutputPath + "/" + parentDir
models, err := storage.GetObsListObject(task.JobName, parentDir)
models, err := storage.GetObsListObject(task.JobName, parentDir, VersionOutputPath)
if err != nil { if err != nil {
log.Info("get TrainJobListModel failed:", err) log.Info("get TrainJobListModel failed:", err)
ctx.ServerError("GetObsListObject:", err) ctx.ServerError("GetObsListObject:", err)


+ 118
- 36
routers/repo/modelarts.go View File

@@ -477,6 +477,94 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error {
return nil return nil
} }


func ErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) error {
ctx.Data["PageIsCloudBrain"] = true
var jobID = ctx.Params(":jobid")
// var versionName = ctx.Params(":version-name")
var versionName = ctx.Query("version_name")

task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
if err != nil {
log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error())
return err
}

t := time.Now()
var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:]
ctx.Data["job_name"] = task.JobName

attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID)
if err != nil {
ctx.ServerError("GetAllUserAttachments failed:", err)
return err
}
ctx.Data["attachments"] = attachs

var resourcePools modelarts.ResourcePool
if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
}
ctx.Data["resource_pools"] = resourcePools.Info

var engines modelarts.Engine
if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
}
ctx.Data["engines"] = engines.Info

var versionInfos modelarts.VersionInfo
if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
}
ctx.Data["engine_versions"] = versionInfos.Version

var flavorInfos modelarts.Flavor
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
}
ctx.Data["flavor_infos"] = flavorInfos.Info

var Parameters modelarts.Parameters
if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
}
ctx.Data["params"] = Parameters.Parameter

outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
ctx.Data["train_url"] = outputObsPath

Branches, err := ctx.Repo.GitRepo.GetBranches()
if err != nil {
ctx.ServerError("GetBranches error:", err)
return err
}
ctx.Data["branches"] = Branches
ctx.Data["description"] = form.Description
ctx.Data["dataset_name"] = task.DatasetName
ctx.Data["work_server_number"] = form.WorkServerNumber
ctx.Data["flavor_name"] = form.FlavorName
ctx.Data["engine_name"] = form.EngineName
ctx.Data["flavor_code"] = task.FlavorCode
ctx.Data["engine_id"] = task.EngineID

ctx.Data["bootFile"] = form.BootFile
ctx.Data["uuid"] = form.Attachment
ctx.Data["branch_name"] = form.BranchName
configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom)
if err != nil {
ctx.ServerError("getConfigList failed:", err)
return err
}
ctx.Data["config_list"] = configList.ParaConfigs

return nil
}

func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) {
ctx.Data["PageIsTrainJob"] = true ctx.Data["PageIsTrainJob"] = true
VersionOutputPath := modelarts.GetVersionOutputPathByTotalVersionCount(modelarts.TotalVersionCount) VersionOutputPath := modelarts.GetVersionOutputPathByTotalVersionCount(modelarts.TotalVersionCount)
@@ -492,6 +580,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
isSaveParam := form.IsSaveParam isSaveParam := form.IsSaveParam
repo := ctx.Repo.Repository repo := ctx.Repo.Repository
codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
// codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + VersionOutputPath + "/"
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/"
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/"
@@ -529,13 +618,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
}); err != nil { }); err != nil {
log.Error("创建任务失败,服务器超时!: %s (%v)", repo.FullName(), err) log.Error("创建任务失败,服务器超时!: %s (%v)", repo.FullName(), err)
trainJobNewDataPrepare(ctx) trainJobNewDataPrepare(ctx)

ctx.Data["bootFile"] = form.BootFile
ctx.Data["uuid"] = form.Attachment
ctx.Data["datasetName"] = attach.Name
ctx.Data["params"] = form.Params
ctx.Data["branch_name"] = branch_name
trainJobNewDataPrepare(ctx)
ctx.RenderWithErr("创建任务失败,服务器超时!", tplModelArtsTrainJobNew, &form) ctx.RenderWithErr("创建任务失败,服务器超时!", tplModelArtsTrainJobNew, &form)
return return
} }
@@ -555,7 +637,9 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
return return
} }


// parentDir := VersionOutputPath + "/"
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
// if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil {
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
trainJobNewDataPrepare(ctx) trainJobNewDataPrepare(ctx)
ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form) ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobNew, &form)
@@ -651,6 +735,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
TotalVersionCount: modelarts.TotalVersionCount, TotalVersionCount: modelarts.TotalVersionCount,
} }


//将params转换Parameters.Parameter,出错时返回给前端
var Parameters modelarts.Parameters
if err := json.Unmarshal([]byte(params), &Parameters); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return
}

err = modelarts.GenerateTrainJob(ctx, req) err = modelarts.GenerateTrainJob(ctx, req)
if err != nil { if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error()) log.Error("GenerateTrainJob failed:%v", err.Error())
@@ -658,7 +749,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
ctx.Data["bootFile"] = form.BootFile ctx.Data["bootFile"] = form.BootFile
ctx.Data["uuid"] = form.Attachment ctx.Data["uuid"] = form.Attachment
ctx.Data["datasetName"] = attach.Name ctx.Data["datasetName"] = attach.Name
ctx.Data["params"] = form.Params
ctx.Data["params"] = Parameters.Parameter
ctx.Data["branch_name"] = branch_name ctx.Data["branch_name"] = branch_name
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
return return
@@ -689,7 +780,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
isSaveParam := form.IsSaveParam isSaveParam := form.IsSaveParam
repo := ctx.Repo.Repository repo := ctx.Repo.Repository
codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + VersionOutputPath + "/"
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/"
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/"
dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/"
@@ -701,16 +792,16 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ


if err := paramCheckCreateTrainJob(form); err != nil { if err := paramCheckCreateTrainJob(form); err != nil {
log.Error("paramCheckCreateTrainJob failed:(%v)", err) log.Error("paramCheckCreateTrainJob failed:(%v)", err)
trainJobNewVersionDataPrepare(ctx)
ErrorDataPrepare(ctx, form)
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form)
return return
} }


attach, err := models.GetAttachmentByUUID(uuid)
if err != nil {
log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error())
return
}
// attach, err := models.GetAttachmentByUUID(uuid)
// if err != nil {
// log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error())
// return
// }


//todo: del the codeLocalPath //todo: del the codeLocalPath
_, err = ioutil.ReadDir(codeLocalPath) _, err = ioutil.ReadDir(codeLocalPath)
@@ -724,13 +815,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
Branch: branch_name, Branch: branch_name,
}); err != nil { }); err != nil {
log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err) log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err)
trainJobNewVersionDataPrepare(ctx)

ctx.Data["bootFile"] = form.BootFile
ctx.Data["uuid"] = form.Attachment
ctx.Data["datasetName"] = attach.Name
ctx.Data["params"] = form.Params
ctx.Data["branch_name"] = branch_name
ErrorDataPrepare(ctx, form)
ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobVersionNew, &form) ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobVersionNew, &form)
return return
} }
@@ -738,21 +823,23 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
//todo: upload code (send to file_server todo this work?) //todo: upload code (send to file_server todo this work?)
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil {
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
trainJobNewVersionDataPrepare(ctx)
ErrorDataPrepare(ctx, form)
ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form) ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form)
return return
} }


if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil {
log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
trainJobNewVersionDataPrepare(ctx)
ErrorDataPrepare(ctx, form)
ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form) ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form)
return return
} }


if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
parentDir := VersionOutputPath + "/"
// if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil {
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
trainJobNewVersionDataPrepare(ctx)
ErrorDataPrepare(ctx, form)
ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobVersionNew, &form) ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobVersionNew, &form)
return return
} }
@@ -772,7 +859,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
err := json.Unmarshal([]byte(params), &parameters) err := json.Unmarshal([]byte(params), &parameters)
if err != nil { if err != nil {
log.Error("Failed to Unmarshal params: %s (%v)", params, err) log.Error("Failed to Unmarshal params: %s (%v)", params, err)
trainJobNewVersionDataPrepare(ctx)
ErrorDataPrepare(ctx, form)
ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobVersionNew, &form) ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobVersionNew, &form)
return return
} }
@@ -791,7 +878,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
if isSaveParam == "on" { if isSaveParam == "on" {
if form.ParameterTemplateName == "" { if form.ParameterTemplateName == "" {
log.Error("ParameterTemplateName is empty") log.Error("ParameterTemplateName is empty")
trainJobNewVersionDataPrepare(ctx)
ErrorDataPrepare(ctx, form)
ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobVersionNew, &form) ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobVersionNew, &form)
return return
} }
@@ -815,7 +902,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ


if err != nil { if err != nil {
log.Error("Failed to CreateTrainJobConfig: %v", err) log.Error("Failed to CreateTrainJobConfig: %v", err)
trainJobNewVersionDataPrepare(ctx)
ErrorDataPrepare(ctx, form)
ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobVersionNew, &form) ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobVersionNew, &form)
return return
} }
@@ -862,15 +949,11 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
err = modelarts.GenerateTrainJobVersion(ctx, req, jobID) err = modelarts.GenerateTrainJobVersion(ctx, req, jobID)
if err != nil { if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error()) log.Error("GenerateTrainJob failed:%v", err.Error())
trainJobNewVersionDataPrepare(ctx)
ctx.Data["bootFile"] = form.BootFile
ctx.Data["uuid"] = form.Attachment
ctx.Data["datasetName"] = attach.Name
ctx.Data["params"] = form.Params
ErrorDataPrepare(ctx, form)
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form)
return return
} }
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job/" + jobID)
// ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) // ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
} }


@@ -963,7 +1046,6 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error {


func TrainJobShow(ctx *context.Context) { func TrainJobShow(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true ctx.Data["PageIsCloudBrain"] = true

var jobID = ctx.Params(":jobid") var jobID = ctx.Params(":jobid")


repo := ctx.Repo.Repository repo := ctx.Repo.Repository


Loading…
Cancel
Save