|
|
@@ -4,6 +4,7 @@ import ( |
|
|
|
"encoding/json" |
|
|
|
"errors" |
|
|
|
"io" |
|
|
|
"io/ioutil" |
|
|
|
"net/http" |
|
|
|
"os" |
|
|
|
"path" |
|
|
@@ -38,6 +39,10 @@ const ( |
|
|
|
tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new" |
|
|
|
tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show" |
|
|
|
tplModelArtsTrainJobVersionNew base.TplName = "repo/modelarts/trainjob/version_new" |
|
|
|
|
|
|
|
tplModelArtsInferenceJobIndex base.TplName = "repo/modelarts/inferencejob/index" |
|
|
|
tplModelArtsInferenceJobNew base.TplName = "repo/modelarts/inferencejob/new" |
|
|
|
tplModelArtsInferenceJobShow base.TplName = "repo/modelarts/inferencejob/show" |
|
|
|
) |
|
|
|
|
|
|
|
func DebugJobIndex(ctx *context.Context) { |
|
|
@@ -737,11 +742,16 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) |
|
|
|
// } |
|
|
|
|
|
|
|
//todo: del the codeLocalPath |
|
|
|
// _, err := ioutil.ReadDir(codeLocalPath) |
|
|
|
// if err == nil { |
|
|
|
// os.RemoveAll(codeLocalPath) |
|
|
|
// } |
|
|
|
os.RemoveAll(codeLocalPath) |
|
|
|
_, err = ioutil.ReadDir(codeLocalPath) |
|
|
|
if err == nil { |
|
|
|
os.RemoveAll(codeLocalPath) |
|
|
|
} else { |
|
|
|
log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
// os.RemoveAll(codeLocalPath) |
|
|
|
|
|
|
|
gitRepo, _ := git.OpenRepository(repo.RepoPath()) |
|
|
|
commitID, _ := gitRepo.GetBranchCommitID(branch_name) |
|
|
@@ -954,11 +964,16 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ |
|
|
|
// } |
|
|
|
|
|
|
|
//todo: del the codeLocalPath |
|
|
|
// _, err = ioutil.ReadDir(codeLocalPath) |
|
|
|
// if err == nil { |
|
|
|
// os.RemoveAll(codeLocalPath) |
|
|
|
// } |
|
|
|
os.RemoveAll(codeLocalPath) |
|
|
|
_, err = ioutil.ReadDir(codeLocalPath) |
|
|
|
if err == nil { |
|
|
|
os.RemoveAll(codeLocalPath) |
|
|
|
} else { |
|
|
|
log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) |
|
|
|
versionErrorDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsTrainJobVersionNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
// os.RemoveAll(codeLocalPath) |
|
|
|
|
|
|
|
gitRepo, _ := git.OpenRepository(repo.RepoPath()) |
|
|
|
commitID, _ := gitRepo.GetBranchCommitID(branch_name) |
|
|
@@ -1196,6 +1211,20 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error { |
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func paramCheckCreateInferenceJob(form auth.CreateModelArtsInferenceJobForm) error { |
|
|
|
if !strings.HasSuffix(form.BootFile, ".py") { |
|
|
|
log.Error("the boot file(%s) must be a python file", form.BootFile) |
|
|
|
return errors.New("启动文件必须是python文件") |
|
|
|
} |
|
|
|
|
|
|
|
if form.WorkServerNumber > 25 || form.WorkServerNumber < 1 { |
|
|
|
log.Error("the WorkServerNumber(%d) must be in (1,25)", form.WorkServerNumber) |
|
|
|
return errors.New("计算节点数必须在1-25之间") |
|
|
|
} |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func TrainJobShow(ctx *context.Context) { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
@@ -1474,3 +1503,485 @@ func ModelDownload(ctx *context.Context) { |
|
|
|
} |
|
|
|
http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) |
|
|
|
} |
|
|
|
|
|
|
|
func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) { |
|
|
|
ctx.Data["PageIsTrainJob"] = true |
|
|
|
jobName := form.JobName |
|
|
|
uuid := form.Attachment |
|
|
|
description := form.Description |
|
|
|
workServerNumber := form.WorkServerNumber |
|
|
|
engineID := form.EngineID |
|
|
|
bootFile := form.BootFile |
|
|
|
flavorCode := form.Flavor |
|
|
|
params := form.Params |
|
|
|
poolID := form.PoolID |
|
|
|
isSaveParam := form.IsSaveParam |
|
|
|
repo := ctx.Repo.Repository |
|
|
|
codeLocalPath := setting.JobPath + jobName + modelarts.CodePath |
|
|
|
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath |
|
|
|
resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath |
|
|
|
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath |
|
|
|
dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" |
|
|
|
branch_name := form.BranchName |
|
|
|
isLatestVersion := modelarts.IsLatestVersion |
|
|
|
FlavorName := form.FlavorName |
|
|
|
VersionCount := modelarts.VersionCount |
|
|
|
EngineName := form.EngineName |
|
|
|
trainUrl := form.TrainUrl |
|
|
|
ckptName := form.CkptName |
|
|
|
|
|
|
|
count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
|
inferenceJobErrorNewDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("system error", tplModelArtsInferenceJobNew, &form) |
|
|
|
return |
|
|
|
} else { |
|
|
|
if count >= 1 { |
|
|
|
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) |
|
|
|
inferenceJobErrorNewDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsInferenceJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if err := paramCheckCreateInferenceJob(form); err != nil { |
|
|
|
log.Error("paramCheckCreateInferenceJob failed:(%v)", err) |
|
|
|
inferenceJobErrorNewDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
//todo: del the codeLocalPath |
|
|
|
_, err = ioutil.ReadDir(codeLocalPath) |
|
|
|
if err == nil { |
|
|
|
os.RemoveAll(codeLocalPath) |
|
|
|
} else { |
|
|
|
log.Error("创建任务失败,原代码还未删除,请重试!: %s (%v)", repo.FullName(), err) |
|
|
|
inferenceJobErrorNewDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("创建任务失败,原代码还未删除,请重试!", tplModelArtsInferenceJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
// os.RemoveAll(codeLocalPath) |
|
|
|
|
|
|
|
gitRepo, _ := git.OpenRepository(repo.RepoPath()) |
|
|
|
commitID, _ := gitRepo.GetBranchCommitID(branch_name) |
|
|
|
|
|
|
|
if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{ |
|
|
|
Branch: branch_name, |
|
|
|
}); err != nil { |
|
|
|
log.Error("创建任务失败,服务器超时!: %s (%v)", repo.FullName(), err) |
|
|
|
inferenceJobErrorNewDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("创建任务失败,服务器超时!", tplModelArtsInferenceJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
//todo: upload code (send to file_server todo this work?) |
|
|
|
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.ResultPath); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_result: %s (%v)", repo.FullName(), err) |
|
|
|
inferenceJobErrorNewDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("Failed to obsMkdir_result", tplModelArtsInferenceJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { |
|
|
|
log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) |
|
|
|
inferenceJobErrorNewDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsInferenceJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
// parentDir := VersionOutputPath + "/" |
|
|
|
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { |
|
|
|
// if err := uploadCodeToObs(codeLocalPath, jobName, parentDir); err != nil { |
|
|
|
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) |
|
|
|
inferenceJobErrorNewDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("Failed to uploadCodeToObs", tplModelArtsInferenceJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
//todo: del local code? |
|
|
|
|
|
|
|
var parameters models.Parameters |
|
|
|
param := make([]models.Parameter, 0) |
|
|
|
param = append(param, models.Parameter{ |
|
|
|
Label: modelarts.TrainUrl, |
|
|
|
Value: trainUrl, |
|
|
|
}, models.Parameter{ |
|
|
|
Label: modelarts.DataUrl, |
|
|
|
Value: dataPath, |
|
|
|
}) |
|
|
|
if len(params) != 0 { |
|
|
|
err := json.Unmarshal([]byte(params), ¶meters) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Unmarshal params: %s (%v)", params, err) |
|
|
|
inferenceJobErrorNewDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr("运行参数错误", tplModelArtsInferenceJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
for _, parameter := range parameters.Parameter { |
|
|
|
if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { |
|
|
|
param = append(param, models.Parameter{ |
|
|
|
Label: parameter.Label, |
|
|
|
Value: parameter.Value, |
|
|
|
}) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
req := &modelarts.GenerateInferenceJobReq{ |
|
|
|
JobName: jobName, |
|
|
|
DataUrl: dataPath, |
|
|
|
Description: description, |
|
|
|
CodeObsPath: codeObsPath, |
|
|
|
BootFileUrl: codeObsPath + bootFile, |
|
|
|
BootFile: bootFile, |
|
|
|
TrainUrl: trainUrl, |
|
|
|
FlavorCode: flavorCode, |
|
|
|
WorkServerNumber: workServerNumber, |
|
|
|
EngineID: int64(engineID), |
|
|
|
LogUrl: logObsPath, |
|
|
|
PoolID: poolID, |
|
|
|
Uuid: uuid, |
|
|
|
Parameters: parameters.Parameter, |
|
|
|
CommitID: commitID, |
|
|
|
IsLatestVersion: isLatestVersion, |
|
|
|
BranchName: branch_name, |
|
|
|
Params: form.Params, |
|
|
|
FlavorName: FlavorName, |
|
|
|
EngineName: EngineName, |
|
|
|
VersionCount: VersionCount, |
|
|
|
TotalVersionCount: modelarts.TotalVersionCount, |
|
|
|
} |
|
|
|
|
|
|
|
//将params转换Parameters.Parameter,出错时返回给前端 |
|
|
|
var Parameters modelarts.Parameters |
|
|
|
if err := json.Unmarshal([]byte(params), &Parameters); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
err = modelarts.GenerateTrainJob(ctx, req) |
|
|
|
if err != nil { |
|
|
|
log.Error("GenerateTrainJob failed:%v", err.Error()) |
|
|
|
inferenceJobErrorNewDataPrepare(ctx, form) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsInferenceJobNew, &form) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/inference-job") |
|
|
|
} |
|
|
|
func InferenceJobIndex(ctx *context.Context) { |
|
|
|
MustEnableModelArts(ctx) |
|
|
|
|
|
|
|
repo := ctx.Repo.Repository |
|
|
|
page := ctx.QueryInt("page") |
|
|
|
if page <= 0 { |
|
|
|
page = 1 |
|
|
|
} |
|
|
|
|
|
|
|
tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ |
|
|
|
ListOptions: models.ListOptions{ |
|
|
|
Page: page, |
|
|
|
PageSize: setting.UI.IssuePagingNum, |
|
|
|
}, |
|
|
|
RepoID: repo.ID, |
|
|
|
Type: models.TypeCloudBrainTwo, |
|
|
|
JobType: string(models.JobTypeTrain), |
|
|
|
IsLatestVersion: modelarts.IsLatestVersion, |
|
|
|
}) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("Cloudbrain", err) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
for i, task := range tasks { |
|
|
|
tasks[i].CanDel = cloudbrain.CanDeleteJob(ctx, &task.Cloudbrain) |
|
|
|
tasks[i].CanModify = cloudbrain.CanModifyJob(ctx, &task.Cloudbrain) |
|
|
|
} |
|
|
|
|
|
|
|
pager := context.NewPagination(int(count), setting.UI.IssuePagingNum, page, 5) |
|
|
|
pager.SetDefaultParams(ctx) |
|
|
|
ctx.Data["Page"] = pager |
|
|
|
|
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
ctx.Data["Tasks"] = tasks |
|
|
|
ctx.Data["CanCreate"] = cloudbrain.CanCreateOrDebugJob(ctx) |
|
|
|
ctx.Data["RepoIsEmpty"] = repo.IsEmpty |
|
|
|
ctx.HTML(200, tplModelArtsInferenceJobIndex) |
|
|
|
} |
|
|
|
func InferenceJobNew(ctx *context.Context) { |
|
|
|
err := inferenceJobNewDataPrepare(ctx) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("get new inference-job info failed", err) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.HTML(200, tplModelArtsInferenceJobNew) |
|
|
|
} |
|
|
|
func inferenceJobNewDataPrepare(ctx *context.Context) error { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
|
|
|
|
t := time.Now() |
|
|
|
var jobName = "inference" + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] |
|
|
|
ctx.Data["job_name"] = jobName |
|
|
|
|
|
|
|
attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("GetAllUserAttachments failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["attachments"] = attachs |
|
|
|
|
|
|
|
var resourcePools modelarts.ResourcePool |
|
|
|
if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["resource_pools"] = resourcePools.Info |
|
|
|
|
|
|
|
var engines modelarts.Engine |
|
|
|
if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["engines"] = engines.Info |
|
|
|
|
|
|
|
var versionInfos modelarts.VersionInfo |
|
|
|
if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["engine_versions"] = versionInfos.Version |
|
|
|
|
|
|
|
var flavorInfos modelarts.Flavor |
|
|
|
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["flavor_infos"] = flavorInfos.Info |
|
|
|
|
|
|
|
resultObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.ResultPath |
|
|
|
ctx.Data["result_url"] = resultObsPath |
|
|
|
ctx.Data["params"] = "" |
|
|
|
ctx.Data["branchName"] = ctx.Repo.BranchName |
|
|
|
|
|
|
|
configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("getConfigList failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["config_list"] = configList.ParaConfigs |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
|
|
|
|
func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModelArtsInferenceJobForm) error { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
|
|
|
|
t := time.Now() |
|
|
|
var jobName = cutString(ctx.User.Name, 5) + t.Format("2006010215") + strconv.Itoa(int(t.Unix()))[5:] |
|
|
|
ctx.Data["job_name"] = jobName |
|
|
|
|
|
|
|
attachs, err := models.GetModelArtsUserAttachments(ctx.User.ID) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("GetAllUserAttachments failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["attachments"] = attachs |
|
|
|
|
|
|
|
var resourcePools modelarts.ResourcePool |
|
|
|
if err = json.Unmarshal([]byte(setting.ResourcePools), &resourcePools); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["resource_pools"] = resourcePools.Info |
|
|
|
|
|
|
|
var engines modelarts.Engine |
|
|
|
if err = json.Unmarshal([]byte(setting.Engines), &engines); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["engines"] = engines.Info |
|
|
|
|
|
|
|
var versionInfos modelarts.VersionInfo |
|
|
|
if err = json.Unmarshal([]byte(setting.EngineVersions), &versionInfos); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["engine_versions"] = versionInfos.Version |
|
|
|
|
|
|
|
var flavorInfos modelarts.Flavor |
|
|
|
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["flavor_infos"] = flavorInfos.Info |
|
|
|
|
|
|
|
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath |
|
|
|
ctx.Data["train_url"] = outputObsPath |
|
|
|
|
|
|
|
configList, err := getConfigList(modelarts.PerPage, 1, modelarts.SortByCreateTime, "desc", "", modelarts.ConfigTypeCustom) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("getConfigList failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
var Parameters modelarts.Parameters |
|
|
|
if err = json.Unmarshal([]byte(form.Params), &Parameters); err != nil { |
|
|
|
ctx.ServerError("json.Unmarshal failed:", err) |
|
|
|
return err |
|
|
|
} |
|
|
|
ctx.Data["params"] = Parameters.Parameter |
|
|
|
ctx.Data["config_list"] = configList.ParaConfigs |
|
|
|
ctx.Data["bootFile"] = form.BootFile |
|
|
|
ctx.Data["uuid"] = form.Attachment |
|
|
|
ctx.Data["branch_name"] = form.BranchName |
|
|
|
|
|
|
|
return nil |
|
|
|
} |
|
|
|
func InferenceJobShow(ctx *context.Context) { |
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
|
|
|
|
repo := ctx.Repo.Repository |
|
|
|
page := ctx.QueryInt("page") |
|
|
|
if page <= 0 { |
|
|
|
page = 1 |
|
|
|
} |
|
|
|
VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ |
|
|
|
ListOptions: models.ListOptions{ |
|
|
|
Page: page, |
|
|
|
PageSize: setting.UI.IssuePagingNum, |
|
|
|
}, |
|
|
|
RepoID: repo.ID, |
|
|
|
Type: models.TypeCloudBrainTwo, |
|
|
|
JobType: string(models.JobTypeTrain), |
|
|
|
JobID: jobID, |
|
|
|
}) |
|
|
|
|
|
|
|
if err != nil { |
|
|
|
log.Error("GetVersionListTasks(%s) failed:%v", jobID, err.Error()) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
//设置权限 |
|
|
|
canNewJob, err := canUserCreateTrainJobVersion(ctx, VersionListTasks[0].UserID) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("canNewJob failed", err) |
|
|
|
return |
|
|
|
} |
|
|
|
ctx.Data["canNewJob"] = canNewJob |
|
|
|
|
|
|
|
//将运行参数转化为epoch_size = 3, device_target = Ascend的格式 |
|
|
|
for i, _ := range VersionListTasks { |
|
|
|
|
|
|
|
var parameters models.Parameters |
|
|
|
|
|
|
|
err := json.Unmarshal([]byte(VersionListTasks[i].Parameters), ¶meters) |
|
|
|
if err != nil { |
|
|
|
log.Error("Failed to Unmarshal Parameters: %s (%v)", VersionListTasks[i].Parameters, err) |
|
|
|
trainJobNewDataPrepare(ctx) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if len(parameters.Parameter) > 0 { |
|
|
|
paramTemp := "" |
|
|
|
for _, Parameter := range parameters.Parameter { |
|
|
|
param := Parameter.Label + " = " + Parameter.Value + "; " |
|
|
|
paramTemp = paramTemp + param |
|
|
|
} |
|
|
|
VersionListTasks[i].Parameters = paramTemp[:len(paramTemp)-2] |
|
|
|
} else { |
|
|
|
VersionListTasks[i].Parameters = "" |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5) |
|
|
|
pager.SetDefaultParams(ctx) |
|
|
|
ctx.Data["Page"] = pager |
|
|
|
ctx.Data["jobID"] = jobID |
|
|
|
ctx.Data["jobName"] = VersionListTasks[0].JobName |
|
|
|
ctx.Data["version_list_task"] = VersionListTasks |
|
|
|
ctx.Data["version_list_count"] = VersionListCount |
|
|
|
ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) |
|
|
|
} |
|
|
|
func InferenceJobStop(ctx *context.Context) { |
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
task, err := models.GetCloudbrainByJobID(jobID) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
_, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) |
|
|
|
if err != nil { |
|
|
|
log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") |
|
|
|
} |
|
|
|
|
|
|
|
func InferenceJobDel(ctx *context.Context) { |
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
repo := ctx.Repo.Repository |
|
|
|
|
|
|
|
VersionListTasks, _, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ |
|
|
|
RepoID: repo.ID, |
|
|
|
Type: models.TypeCloudBrainTwo, |
|
|
|
JobType: string(models.JobTypeTrain), |
|
|
|
JobID: jobID, |
|
|
|
}) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("get VersionListTasks failed", err) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
//删除modelarts上的任务记录 |
|
|
|
_, err = modelarts.DelTrainJob(jobID) |
|
|
|
if err != nil { |
|
|
|
log.Error("DelTrainJob(%s) failed:%v", jobID, err.Error()) |
|
|
|
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
//删除数据库Cloudbrain表的记录 |
|
|
|
for _, task := range VersionListTasks { |
|
|
|
err = models.DeleteJob(&task.Cloudbrain) |
|
|
|
if err != nil { |
|
|
|
ctx.ServerError("DeleteJob failed", err) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") |
|
|
|
} |
|
|
|
|
|
|
|
func ResultDownload(ctx *context.Context) { |
|
|
|
var ( |
|
|
|
err error |
|
|
|
) |
|
|
|
|
|
|
|
var jobID = ctx.Params(":jobid") |
|
|
|
versionName := ctx.Query("version_name") |
|
|
|
parentDir := ctx.Query("parent_dir") |
|
|
|
fileName := ctx.Query("file_name") |
|
|
|
log.Info("DownloadSingleModelFile start.") |
|
|
|
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, setting.OutPutPath, versionName, parentDir, fileName), "/") |
|
|
|
log.Info("Download path is:%s", path) |
|
|
|
|
|
|
|
url, err := storage.GetObsCreateSignedUrlByBucketAndKey(setting.Bucket, path) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"]) |
|
|
|
ctx.ServerError("GetObsCreateSignedUrl", err) |
|
|
|
return |
|
|
|
} |
|
|
|
http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently) |
|
|
|
} |