|
|
|
@@ -40,6 +40,7 @@ const ( |
|
|
|
tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new" |
|
|
|
tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show" |
|
|
|
tplModelArtsTrainJobShowModels base.TplName = "repo/modelarts/trainjob/models/index" |
|
|
|
tplModelArtsTrainJobVersionNew base.TplName = "repo/modelarts/trainjob/version_new" |
|
|
|
) |
|
|
|
|
|
|
|
// MustEnableDataset check if repository enable internal cb |
|
|
|
@@ -286,8 +287,8 @@ func NotebookIndex(ctx *context.Context) { |
|
|
|
Page: page, |
|
|
|
PageSize: setting.UI.IssuePagingNum, |
|
|
|
}, |
|
|
|
RepoID: repo.ID, |
|
|
|
Type: models.TypeCloudBrainTwo, |
|
|
|
RepoID: repo.ID, |
|
|
|
Type: models.TypeCloudBrainTwo, |
|
|
|
JobType: string(models.JobTypeDebug), |
|
|
|
}) |
|
|
|
if err != nil { |
|
|
|
@@ -493,14 +494,6 @@ func NotebookDel(ctx *context.Context) { |
|
|
|
func TrainJobIndex(ctx *context.Context) { |
|
|
|
MustEnableModelArts(ctx) |
|
|
|
|
|
|
|
//can, err := canUserCreateTrainJob(ctx.User.ID) |
|
|
|
//if err != nil { |
|
|
|
// ctx.ServerError("canUserCreateTrainJob", err) |
|
|
|
// return |
|
|
|
//} |
|
|
|
// |
|
|
|
//ctx.Data["CanCreate"] = can |
|
|
|
|
|
|
|
repo := ctx.Repo.Repository |
|
|
|
page := ctx.QueryInt("page") |
|
|
|
if page <= 0 { |
|
|
|
@@ -512,9 +505,10 @@ func TrainJobIndex(ctx *context.Context) { |
|
|
|
Page: page, |
|
|
|
PageSize: setting.UI.IssuePagingNum, |
|
|
|
}, |
|
|
|
RepoID: repo.ID, |
|
|
|
Type: models.TypeCloudBrainTwo, |
|
|
|
JobType: string(models.JobTypeTrain), |
|
|
|
RepoID: repo.ID, |
|
|
|
Type: models.TypeCloudBrainTwo, |
|
|
|
JobType: string(models.JobTypeTrain), |
|
|
|
VersionName: string(models.JobVersionName), |
|
|
|
}) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("Cloudbrain", err) |
|
|
|
@@ -614,6 +608,82 @@ func trainJobNewDataPrepare(ctx *context.Context) error { |
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func TrainJobNewVersion(ctx *context.Context) { |
|
|
|
err := trainJobNewVersionDataPrepare(ctx) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("get new train-job info failed", err) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.HTML(200, tplModelArtsTrainJobVersionNew) |
|
|
|
} |
|
|
|
|
|
|
|
func trainJobNewVersionDataPrepare(ctx *context.Context) error { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
var versionName = ctx.Query("versionName") |
|
|
|
|
|
|
|
t := time.Now() |
|
|
|
var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] |
|
|
|
ctx.Data["job_name"] = jobName |
|
|
|
|
|
|
|
attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("GetAllUserAttachments failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["attachments"] = attachs |
|
|
|
|
|
|
|
var resourcePools modelarts.ResourcePool |
|
|
|
if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["resource_pools"] = resourcePools.Info |
|
|
|
|
|
|
|
var engines modelarts.Engine |
|
|
|
if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["engines"] = engines.Info |
|
|
|
|
|
|
|
var versionInfos modelarts.VersionInfo |
|
|
|
if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["engine_versions"] = versionInfos.Version |
|
|
|
|
|
|
|
var flavorInfos modelarts.Flavor |
|
|
|
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["flavor_infos"] = flavorInfos.Info |
|
|
|
|
|
|
|
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath |
|
|
|
ctx.Data["train_url"] = outputObsPath |
|
|
|
|
|
|
|
Branches, err := ctx.Repo.GitRepo.GetBranches() |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("GetBranches error:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["Branches"] = Branches |
|
|
|
ctx.Data["BranchesCount"] = len(Branches) |
|
|
|
ctx.Data["jobID"] = jobID |
|
|
|
ctx.Data["versionName"] = versionName |
|
|
|
|
|
|
|
configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("getConfigList failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["config_list"] = configList.ParaConfigs |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { |
|
|
|
ctx.Data["PageIsTrainJob"] = true |
|
|
|
jobName := form.JobName |
|
|
|
@@ -634,19 +704,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" |
|
|
|
branch_name := form.BranchName |
|
|
|
|
|
|
|
//can, err := canUserCreateTrainJob(ctx.User.ID) |
|
|
|
//if err != nil { |
|
|
|
// ctx.ServerError("canUserCreateTrainJob", err) |
|
|
|
// return |
|
|
|
//} |
|
|
|
// |
|
|
|
//if !can { |
|
|
|
// log.Error("the user can not create train-job") |
|
|
|
// ctx.RenderWithErr("the user can not create train-job", tplModelArtsTrainJobNew, &form) |
|
|
|
// return |
|
|
|
//} |
|
|
|
|
|
|
|
//param check |
|
|
|
if err := paramCheckCreateTrainJob(form); err != nil { |
|
|
|
log.Error("paramCheckCreateTrainJob failed:(%v)", err) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
@@ -665,9 +722,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
if err == nil { |
|
|
|
os.RemoveAll(codeLocalPath) |
|
|
|
} |
|
|
|
// branch_name := "testbranch" |
|
|
|
// gitRepo, _ := git.OpenRepository(repo.RepoPath()) |
|
|
|
// commitID, _ := gitRepo.GetBranchCommitID(branch_name) |
|
|
|
|
|
|
|
if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{ |
|
|
|
Branch: branch_name, |
|
|
|
@@ -786,7 +840,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
LogUrl: logObsPath, |
|
|
|
PoolID: poolID, |
|
|
|
Uuid: uuid, |
|
|
|
Parameters: param, |
|
|
|
Parameters: parameters.Parameter, |
|
|
|
} |
|
|
|
|
|
|
|
err = modelarts.GenerateTrainJob(ctx, req) |
|
|
|
@@ -797,12 +851,220 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
ctx.Data["uuid"] = form.Attachment |
|
|
|
ctx.Data["datasetName"] = attach.Name |
|
|
|
ctx.Data["params"] = form.Params |
|
|
|
ctx.Data["branch_name"] = branch_name |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") |
|
|
|
} |
|
|
|
|
|
|
|
func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { |
|
|
|
ctx.Data["PageIsTrainJob"] = true |
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
var versionName = ctx.Query("versionName") |
|
|
|
jobName := form.JobName |
|
|
|
uuid := form.Attachment |
|
|
|
description := form.Description |
|
|
|
workServerNumber := form.WorkServerNumber |
|
|
|
engineID := form.EngineID |
|
|
|
bootFile := form.BootFile |
|
|
|
flavorCode := form.Flavor |
|
|
|
params := form.Params |
|
|
|
poolID := form.PoolID |
|
|
|
isSaveParam := form.IsSaveParam |
|
|
|
repo := ctx.Repo.Repository |
|
|
|
codeLocalPath := setting.JobPath + jobName + modelarts.CodePath |
|
|
|
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath |
|
|
|
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath |
|
|
|
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath |
|
|
|
dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" |
|
|
|
branch_name := form.BranchName |
|
|
|
|
|
|
|
if err := paramCheckCreateTrainJob(form); err != nil { |
|
|
|
log.Error("paramCheckCreateTrainJob failed:(%v)", err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
attach, err := models.GetAttachmentByUUID(uuid) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetAttachmentByUUID(%s) failed:%v", uuid, err.Error()) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
//todo: del the codeLocalPath |
|
|
|
_, err = ioutil.ReadDir(codeLocalPath) |
|
|
|
if err == nil { |
|
|
|
os.RemoveAll(codeLocalPath) |
|
|
|
} |
|
|
|
|
|
|
|
if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{ |
|
|
|
Branch: branch_name, |
|
|
|
}); err != nil { |
|
|
|
log.Error("创建任务失败,任务名称已存在!: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
|
|
|
|
ctx.Data["bootFile"] = form.BootFile |
|
|
|
ctx.Data["uuid"] = form.Attachment |
|
|
|
ctx.Data["datasetName"] = attach.Name |
|
|
|
ctx.Data["params"] = form.Params |
|
|
|
ctx.Data["branch_name"] = branch_name |
|
|
|
// ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form) |
|
|
|
ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
// ctx.RenderWithErr(err, tplModelArtsTrainJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
//todo: upload code (send to file_server todo this work?) |
|
|
|
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
//todo: del local code? |
|
|
|
|
|
|
|
var parameters models.Parameters |
|
|
|
param := make([]models.Parameter, 0) |
|
|
|
param = append(param, models.Parameter{ |
|
|
|
Label: modelarts.TrainUrl, |
|
|
|
Value: outputObsPath, |
|
|
|
}, models.Parameter{ |
|
|
|
Label: modelarts.DataUrl, |
|
|
|
Value: dataPath, |
|
|
|
}) |
|
|
|
if len(params) != 0 { |
|
|
|
err := json.Unmarshal([]byte(params), ¶meters) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Unmarshal params: %s (%v)", params, err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("运行参数错误", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
for _, parameter := range parameters.Parameter { |
|
|
|
if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { |
|
|
|
param = append(param, models.Parameter{ |
|
|
|
Label: parameter.Label, |
|
|
|
Value: parameter.Value, |
|
|
|
}) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//save param config |
|
|
|
if isSaveParam == "on" { |
|
|
|
if form.ParameterTemplateName == "" { |
|
|
|
log.Error("ParameterTemplateName is empty") |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("保存作业参数时,作业参数名称不能为空", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
_, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{ |
|
|
|
ConfigName: form.ParameterTemplateName, |
|
|
|
Description: form.PrameterDescription, |
|
|
|
DataUrl: dataPath, |
|
|
|
AppUrl: codeObsPath, |
|
|
|
BootFileUrl: codeObsPath + bootFile, |
|
|
|
TrainUrl: outputObsPath, |
|
|
|
Flavor: models.Flavor{ |
|
|
|
Code: flavorCode, |
|
|
|
}, |
|
|
|
WorkServerNum: workServerNumber, |
|
|
|
EngineID: int64(engineID), |
|
|
|
LogUrl: logObsPath, |
|
|
|
PoolID: poolID, |
|
|
|
Parameter: parameters.Parameter, |
|
|
|
}) |
|
|
|
|
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to CreateTrainJobConfig: %v", err) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
|
// JobVersionName := "V0001" |
|
|
|
// PreVersionId := int64(67646) |
|
|
|
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
req := &modelarts.GenerateTrainJobVersionReq{ |
|
|
|
JobName: task.JobName, |
|
|
|
DataUrl: dataPath, |
|
|
|
Description: description, |
|
|
|
CodeObsPath: codeObsPath, |
|
|
|
BootFile: codeObsPath + bootFile, |
|
|
|
TrainUrl: outputObsPath, |
|
|
|
FlavorCode: flavorCode, |
|
|
|
WorkServerNumber: workServerNumber, |
|
|
|
EngineID: int64(engineID), |
|
|
|
LogUrl: logObsPath, |
|
|
|
PoolID: poolID, |
|
|
|
Uuid: uuid, |
|
|
|
Parameters: parameters.Parameter, |
|
|
|
PreVersionId: task.VersionID, |
|
|
|
} |
|
|
|
err = modelarts.GenerateTrainJobVersion(ctx, req, jobID) |
|
|
|
if err != nil { |
|
|
|
log.Error("GenerateTrainJob failed:%v", err.Error()) |
|
|
|
trainJobNewVersionDataPrepare(ctx) |
|
|
|
ctx.Data["bootFile"] = form.BootFile |
|
|
|
ctx.Data["uuid"] = form.Attachment |
|
|
|
ctx.Data["datasetName"] = attach.Name |
|
|
|
ctx.Data["params"] = form.Params |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
//保存openi创建训练任务界面的参数 |
|
|
|
// err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ |
|
|
|
|
|
|
|
// JobName: req.JobName, |
|
|
|
// ResourcePools: form.PoolID, |
|
|
|
// EngineVersions: form.EngineID, |
|
|
|
// FlavorInfos: form.Flavor, |
|
|
|
// TrainUrl: outputObsPath, |
|
|
|
// BootFile: form.BootFile, |
|
|
|
// Uuid: form.Attachment, |
|
|
|
// DatasetName: attach.Name, |
|
|
|
// Params: form.Params, |
|
|
|
// BranchName: branch_name, |
|
|
|
// }) |
|
|
|
|
|
|
|
// if err != nil { |
|
|
|
// log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) |
|
|
|
// trainJobNewVersionDataPrepare(ctx) |
|
|
|
// ctx.Data["bootFile"] = form.BootFile |
|
|
|
// ctx.Data["uuid"] = form.Attachment |
|
|
|
// ctx.Data["datasetName"] = attach.Name |
|
|
|
// ctx.Data["params"] = form.Params |
|
|
|
// ctx.Data["branch_name"] = branch_name |
|
|
|
// ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) |
|
|
|
// return |
|
|
|
// } |
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") |
|
|
|
} |
|
|
|
|
|
|
|
// readDir reads the directory named by dirname and returns |
|
|
|
// a list of directory entries sorted by filename. |
|
|
|
func readDir(dirname string) ([]os.FileInfo, error) { |
|
|
|
@@ -895,6 +1157,27 @@ func TrainJobShow(ctx *context.Context) { |
|
|
|
|
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
task, err := models.GetCloudbrainByJobID(jobID) |
|
|
|
|
|
|
|
repo := ctx.Repo.Repository |
|
|
|
page := ctx.QueryInt("page") |
|
|
|
if page <= 0 { |
|
|
|
page = 1 |
|
|
|
} |
|
|
|
VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ |
|
|
|
ListOptions: models.ListOptions{ |
|
|
|
Page: page, |
|
|
|
PageSize: setting.UI.IssuePagingNum, |
|
|
|
}, |
|
|
|
RepoID: repo.ID, |
|
|
|
Type: models.TypeCloudBrainTwo, |
|
|
|
JobType: string(models.JobTypeTrain), |
|
|
|
JobID: jobID, |
|
|
|
}) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("Cloudbrain", err) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) |
|
|
|
@@ -945,6 +1228,8 @@ func TrainJobShow(ctx *context.Context) { |
|
|
|
ctx.Data["task"] = task |
|
|
|
ctx.Data["jobID"] = jobID |
|
|
|
ctx.Data["result"] = result |
|
|
|
ctx.Data["VersionListTasks"] = VersionListTasks |
|
|
|
ctx.Data["VersionLisCount"] = VersionListCount |
|
|
|
ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) |
|
|
|
} |
|
|
|
|
|
|
|
|