|
|
@@ -45,6 +45,7 @@ import ( |
|
|
|
|
|
|
|
|
const ( |
|
|
const ( |
|
|
tplGrampusTrainJobShow base.TplName = "repo/grampus/trainjob/show" |
|
|
tplGrampusTrainJobShow base.TplName = "repo/grampus/trainjob/show" |
|
|
|
|
|
tplGrampusNotebookShow base.TplName = "repo/grampus/notebook/show" |
|
|
|
|
|
|
|
|
//GPU |
|
|
//GPU |
|
|
tplGrampusNotebookGPUNew base.TplName = "repo/grampus/notebook/gpu/new" |
|
|
tplGrampusNotebookGPUNew base.TplName = "repo/grampus/notebook/gpu/new" |
|
|
@@ -104,6 +105,10 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook |
|
|
repo := ctx.Repo.Repository |
|
|
repo := ctx.Repo.Repository |
|
|
branchName := form.BranchName |
|
|
branchName := form.BranchName |
|
|
image := strings.TrimSpace(form.Image) |
|
|
image := strings.TrimSpace(form.Image) |
|
|
|
|
|
codeLocalPath := setting.JobPath + jobName + cloudbrain.CodeMountPath + "/" |
|
|
|
|
|
|
|
|
|
|
|
codeStoragePath := setting.CBCodePathPrefix + jobName + cloudbrain.CodeMountPath + "/" |
|
|
|
|
|
|
|
|
tpl := tplGrampusNotebookGPUNew |
|
|
tpl := tplGrampusNotebookGPUNew |
|
|
processType := grampus.ProcessorTypeGPU |
|
|
processType := grampus.ProcessorTypeGPU |
|
|
computeSource := models.GPUResource |
|
|
computeSource := models.GPUResource |
|
|
@@ -112,7 +117,8 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook |
|
|
tpl = tplGrampusNotebookNPUNew |
|
|
tpl = tplGrampusNotebookNPUNew |
|
|
processType = grampus.ProcessorTypeNPU |
|
|
processType = grampus.ProcessorTypeNPU |
|
|
computeSource = models.NPUResource |
|
|
computeSource = models.NPUResource |
|
|
computeSourceSimple := models.NPU |
|
|
|
|
|
|
|
|
computeSourceSimple = models.NPU |
|
|
|
|
|
codeStoragePath = grampus.JobPath + jobName + modelarts.CodePath |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName)) |
|
|
lock := redis_lock.NewDistributeLock(redis_key.CloudbrainBindingJobNameKey(fmt.Sprint(repo.ID), string(models.JobTypeDebug), displayJobName)) |
|
|
@@ -135,13 +141,13 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook |
|
|
count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeDebug), computeSource) |
|
|
count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeDebug), computeSource) |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
log.Error("GetGrampusCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
grampusTrainJobNewDataPrepare(ctx, processType) |
|
|
|
|
|
|
|
|
grampusNotebookNewDataPrepare(ctx, processType) |
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
return |
|
|
return |
|
|
} else { |
|
|
} else { |
|
|
if count >= 1 { |
|
|
if count >= 1 { |
|
|
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) |
|
|
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) |
|
|
grampusTrainJobNewDataPrepare(ctx, processType) |
|
|
|
|
|
|
|
|
grampusNotebookNewDataPrepare(ctx, processType) |
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) |
|
|
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tpl, &form) |
|
|
return |
|
|
return |
|
|
} |
|
|
} |
|
|
@@ -152,14 +158,14 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook |
|
|
if err == nil { |
|
|
if err == nil { |
|
|
if len(tasks) != 0 { |
|
|
if len(tasks) != 0 { |
|
|
log.Error("the job name did already exist", ctx.Data["MsgID"]) |
|
|
log.Error("the job name did already exist", ctx.Data["MsgID"]) |
|
|
grampusTrainJobNewDataPrepare(ctx, processType) |
|
|
|
|
|
|
|
|
grampusNotebookNewDataPrepare(ctx, processType) |
|
|
ctx.RenderWithErr("the job name did already exist", tpl, &form) |
|
|
ctx.RenderWithErr("the job name did already exist", tpl, &form) |
|
|
return |
|
|
return |
|
|
} |
|
|
} |
|
|
} else { |
|
|
} else { |
|
|
if !models.IsErrJobNotExist(err) { |
|
|
if !models.IsErrJobNotExist(err) { |
|
|
log.Error("system error, %v", err, ctx.Data["MsgID"]) |
|
|
log.Error("system error, %v", err, ctx.Data["MsgID"]) |
|
|
grampusTrainJobNewDataPrepare(ctx, processType) |
|
|
|
|
|
|
|
|
grampusNotebookNewDataPrepare(ctx, processType) |
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
ctx.RenderWithErr("system error", tpl, &form) |
|
|
return |
|
|
return |
|
|
} |
|
|
} |
|
|
@@ -172,41 +178,80 @@ func GrampusNotebookCreate(ctx *context.Context, form auth.CreateGrampusNotebook |
|
|
Cluster: models.C2NetCluster, |
|
|
Cluster: models.C2NetCluster, |
|
|
}) |
|
|
}) |
|
|
if err != nil || spec == nil { |
|
|
if err != nil || spec == nil { |
|
|
grampusTrainJobNewDataPrepare(ctx, processType) |
|
|
|
|
|
|
|
|
grampusNotebookNewDataPrepare(ctx, processType) |
|
|
ctx.RenderWithErr("Resource specification not available", tpl, &form) |
|
|
ctx.RenderWithErr("Resource specification not available", tpl, &form) |
|
|
return |
|
|
return |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { |
|
|
if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { |
|
|
log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) |
|
|
log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) |
|
|
grampusTrainJobNewDataPrepare(ctx, processType) |
|
|
|
|
|
|
|
|
grampusNotebookNewDataPrepare(ctx, processType) |
|
|
ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tpl, &form) |
|
|
ctx.RenderWithErr(ctx.Tr("points.insufficient_points_balance"), tpl, &form) |
|
|
return |
|
|
return |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
datasetInfos, datasetNames, err := models.GetDatasetInfo(uuid, computeSourceSimple) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) |
|
|
|
|
|
grampusNotebookNewDataPrepare(ctx, processType) |
|
|
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
//prepare code and out path |
|
|
|
|
|
_, err = ioutil.ReadDir(codeLocalPath) |
|
|
|
|
|
if err == nil { |
|
|
|
|
|
os.RemoveAll(codeLocalPath) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { |
|
|
|
|
|
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) |
|
|
|
|
|
grampusNotebookNewDataPrepare(ctx, processType) |
|
|
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if processType == grampus.ProcessorTypeGPU { |
|
|
|
|
|
if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { |
|
|
|
|
|
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) |
|
|
|
|
|
grampusNotebookNewDataPrepare(ctx, processType) |
|
|
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
|
|
|
|
if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { |
|
|
|
|
|
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) |
|
|
|
|
|
grampusNotebookNewDataPrepare(ctx, processType) |
|
|
|
|
|
ctx.RenderWithErr(ctx.Tr("cloudbrain.load_code_failed"), tpl, &form) |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) |
|
|
commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) |
|
|
command := "" |
|
|
command := "" |
|
|
|
|
|
|
|
|
req := &grampus.GenerateNotebookReq{ |
|
|
|
|
|
JobName: jobName, |
|
|
|
|
|
DisplayJobName: displayJobName, |
|
|
|
|
|
ComputeResource: computeSource, |
|
|
|
|
|
ProcessType: processType, |
|
|
|
|
|
Command: command, |
|
|
|
|
|
ImageUrl: image, |
|
|
|
|
|
ImageId: form.ImageID, |
|
|
|
|
|
Description: description, |
|
|
|
|
|
Uuid: uuid, |
|
|
|
|
|
CommitID: commitID, |
|
|
|
|
|
BranchName: branchName, |
|
|
|
|
|
DatasetNames: form.DatasetName, |
|
|
|
|
|
WorkServerNumber: 1, |
|
|
|
|
|
Spec: spec, |
|
|
|
|
|
|
|
|
req := &grampus.GenerateNotebookJobReq{ |
|
|
|
|
|
JobName: jobName, |
|
|
|
|
|
DisplayJobName: displayJobName, |
|
|
|
|
|
ComputeResource: computeSource, |
|
|
|
|
|
ProcessType: processType, |
|
|
|
|
|
Command: command, |
|
|
|
|
|
ImageUrl: image, |
|
|
|
|
|
ImageId: form.ImageID, |
|
|
|
|
|
Description: description, |
|
|
|
|
|
Uuid: uuid, |
|
|
|
|
|
CommitID: commitID, |
|
|
|
|
|
BranchName: branchName, |
|
|
|
|
|
DatasetNames: datasetNames, |
|
|
|
|
|
DatasetInfos: datasetInfos, |
|
|
|
|
|
Spec: spec, |
|
|
|
|
|
CodeStoragePath: codeStoragePath, |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
_, err = grampus.GenerateNotebook(ctx, req) |
|
|
|
|
|
|
|
|
_, err = grampus.GenerateNotebookJob(ctx, req) |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) |
|
|
|
|
|
|
|
|
log.Error("GenerateNotebookJob failed:%v", err.Error(), ctx.Data["MsgID"]) |
|
|
grampusTrainJobNewDataPrepare(ctx, processType) |
|
|
grampusTrainJobNewDataPrepare(ctx, processType) |
|
|
ctx.RenderWithErr(err.Error(), tpl, &form) |
|
|
ctx.RenderWithErr(err.Error(), tpl, &form) |
|
|
return |
|
|
return |
|
|
@@ -844,22 +889,54 @@ func grampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain |
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") |
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func GetGrampusNotebook(ctx *context.APIContext) { |
|
|
|
|
|
var ( |
|
|
|
|
|
err error |
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
ID := ctx.Params(":id") |
|
|
|
|
|
job, err := models.GetCloudbrainByID(ID) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
ctx.NotFound("", err) |
|
|
|
|
|
log.Error("GetCloudbrainByID failed:", err) |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
jobAfter, err := cloudbrainTask.SyncGrampusNotebookStatus(job) |
|
|
|
|
|
|
|
|
|
|
|
if err != nil { |
|
|
|
|
|
ctx.NotFound(err) |
|
|
|
|
|
log.Error("Sync cloud brain one status failed:", err) |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
ctx.JSON(http.StatusOK, map[string]interface{}{ |
|
|
|
|
|
"ID": ID, |
|
|
|
|
|
"JobName": jobAfter.JobName, |
|
|
|
|
|
"JobStatus": jobAfter.Status, |
|
|
|
|
|
"SubState": "", |
|
|
|
|
|
"CreatedTime": jobAfter.CreatedUnix.Format("2006-01-02 15:04:05"), |
|
|
|
|
|
"CompletedTime": jobAfter.UpdatedUnix.Format("2006-01-02 15:04:05"), |
|
|
|
|
|
"JobDuration": jobAfter.TrainJobDuration, |
|
|
|
|
|
}) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
func GrampusStopJob(ctx *context.Context) { |
|
|
func GrampusStopJob(ctx *context.Context) { |
|
|
var ID = ctx.Params(":jobid") |
|
|
|
|
|
|
|
|
var ID = ctx.Params(":id") |
|
|
var resultCode = "0" |
|
|
var resultCode = "0" |
|
|
var errorMsg = "" |
|
|
var errorMsg = "" |
|
|
var status = "" |
|
|
var status = "" |
|
|
|
|
|
|
|
|
task := ctx.Cloudbrain |
|
|
task := ctx.Cloudbrain |
|
|
for { |
|
|
for { |
|
|
if task.Status == string(models.GrampusStatusStopped) || task.Status == string(models.GrampusStatusFailed) || task.Status == string(models.GrampusStatusSucceeded) { |
|
|
|
|
|
|
|
|
if task.Status == models.GrampusStatusStopped || task.Status == models.GrampusStatusFailed || task.Status == models.GrampusStatusSucceeded { |
|
|
log.Error("the job(%s) has been stopped", task.JobName, ctx.Data["msgID"]) |
|
|
log.Error("the job(%s) has been stopped", task.JobName, ctx.Data["msgID"]) |
|
|
resultCode = "-1" |
|
|
resultCode = "-1" |
|
|
errorMsg = "system error" |
|
|
|
|
|
|
|
|
errorMsg = "System error" |
|
|
break |
|
|
break |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
res, err := grampus.StopJob(task.JobID) |
|
|
|
|
|
|
|
|
res, err := grampus.StopJob(task.JobID, task.JobType) |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
log.Error("StopJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) |
|
|
log.Error("StopJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) |
|
|
resultCode = strconv.Itoa(res.ErrorCode) |
|
|
resultCode = strconv.Itoa(res.ErrorCode) |
|
|
@@ -896,6 +973,25 @@ func GrampusStopJob(ctx *context.Context) { |
|
|
}) |
|
|
}) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func GrampusNotebookDel(ctx *context.Context) { |
|
|
|
|
|
var listType = ctx.Query("listType") |
|
|
|
|
|
if err := deleteGrampusJob(ctx); err != nil { |
|
|
|
|
|
log.Error("deleteGrampusJob failed: %v", err, ctx.Data["msgID"]) |
|
|
|
|
|
ctx.ServerError(err.Error(), err) |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
var isAdminPage = ctx.Query("isadminpage") |
|
|
|
|
|
var isHomePage = ctx.Query("ishomepage") |
|
|
|
|
|
if ctx.IsUserSiteAdmin() && isAdminPage == "true" { |
|
|
|
|
|
ctx.Redirect(setting.AppSubURL + "/admin" + "/cloudbrains") |
|
|
|
|
|
} else if isHomePage == "true" { |
|
|
|
|
|
ctx.Redirect(setting.AppSubURL + "/cloudbrains") |
|
|
|
|
|
} else { |
|
|
|
|
|
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=" + listType) |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
func GrampusTrainJobDel(ctx *context.Context) { |
|
|
func GrampusTrainJobDel(ctx *context.Context) { |
|
|
var listType = ctx.Query("listType") |
|
|
var listType = ctx.Query("listType") |
|
|
if err := deleteGrampusJob(ctx); err != nil { |
|
|
if err := deleteGrampusJob(ctx); err != nil { |
|
|
@@ -918,9 +1014,9 @@ func GrampusTrainJobDel(ctx *context.Context) { |
|
|
func deleteGrampusJob(ctx *context.Context) error { |
|
|
func deleteGrampusJob(ctx *context.Context) error { |
|
|
task := ctx.Cloudbrain |
|
|
task := ctx.Cloudbrain |
|
|
|
|
|
|
|
|
if task.Status != string(models.GrampusStatusStopped) && task.Status != string(models.GrampusStatusSucceeded) && task.Status != string(models.GrampusStatusFailed) { |
|
|
|
|
|
|
|
|
if task.Status != models.GrampusStatusStopped && task.Status != models.GrampusStatusSucceeded && task.Status != models.GrampusStatusFailed { |
|
|
log.Error("the job(%s) has not been stopped", task.JobName, ctx.Data["msgID"]) |
|
|
log.Error("the job(%s) has not been stopped", task.JobName, ctx.Data["msgID"]) |
|
|
return errors.New("the job has not been stopped") |
|
|
|
|
|
|
|
|
return errors.New(ctx.Tr("cloudbrain.Not_Stopped")) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
err := models.DeleteJob(task) |
|
|
err := models.DeleteJob(task) |
|
|
@@ -938,6 +1034,89 @@ func deleteGrampusJob(ctx *context.Context) error { |
|
|
return nil |
|
|
return nil |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func GrampusNotebookShow(ctx *context.Context) { |
|
|
|
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
|
|
|
|
|
|
|
|
var task *models.Cloudbrain |
|
|
|
|
|
task, err := models.GetCloudbrainByJobIDWithDeleted(ctx.Params(":jobid")) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("GetCloudbrainByJobID failed:" + err.Error()) |
|
|
|
|
|
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
task.ContainerIp = "" |
|
|
|
|
|
|
|
|
|
|
|
if task.DeletedAt.IsZero() { //normal record |
|
|
|
|
|
result, err := grampus.GetNotebookJob(task.JobID) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("GetJob failed:" + err.Error()) |
|
|
|
|
|
ctx.NotFound(ctx.Req.URL.RequestURI(), nil) |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if result != nil { |
|
|
|
|
|
if len(result.JobInfo.Tasks[0].CenterID) == 1 && len(result.JobInfo.Tasks[0].CenterName) == 1 { |
|
|
|
|
|
task.AiCenter = result.JobInfo.Tasks[0].CenterID[0] + "+" + result.JobInfo.Tasks[0].CenterName[0] |
|
|
|
|
|
} |
|
|
|
|
|
oldStatus := task.Status |
|
|
|
|
|
task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) |
|
|
|
|
|
if task.Status != oldStatus || task.Status == models.GrampusStatusRunning { |
|
|
|
|
|
task.Duration = result.JobInfo.RunSec |
|
|
|
|
|
if task.Duration < 0 { |
|
|
|
|
|
task.Duration = 0 |
|
|
|
|
|
} |
|
|
|
|
|
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) |
|
|
|
|
|
|
|
|
|
|
|
if task.StartTime == 0 && result.JobInfo.StartedAt > 0 { |
|
|
|
|
|
task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt) |
|
|
|
|
|
} |
|
|
|
|
|
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { |
|
|
|
|
|
task.EndTime = task.StartTime.Add(task.Duration) |
|
|
|
|
|
} |
|
|
|
|
|
task.CorrectCreateUnix() |
|
|
|
|
|
if oldStatus != task.Status { |
|
|
|
|
|
notification.NotifyChangeCloudbrainStatus(task, oldStatus) |
|
|
|
|
|
if models.IsTrainJobTerminal(task.Status) && task.ComputeResource == models.NPUResource { |
|
|
|
|
|
if len(result.JobInfo.Tasks[0].CenterID) == 1 { |
|
|
|
|
|
urchin.GetBackNpuModel(task.ID, grampus.GetRemoteEndPoint(result.JobInfo.Tasks[0].CenterID[0]), grampus.BucketRemote, grampus.GetNpuModelObjectKey(task.JobName), grampus.GetCenterProxy(setting.Grampus.LocalCenterID)) |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
err = models.UpdateJob(task) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("UpdateJob failed:" + err.Error()) |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if len(task.Parameters) > 0 { |
|
|
|
|
|
var parameters models.Parameters |
|
|
|
|
|
err := json.Unmarshal([]byte(task.Parameters), ¶meters) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("Failed to Unmarshal Parameters: %s (%v)", task.Parameters, err) |
|
|
|
|
|
ctx.ServerError("system error", err) |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if len(parameters.Parameter) > 0 { |
|
|
|
|
|
paramTemp := "" |
|
|
|
|
|
for _, Parameter := range parameters.Parameter { |
|
|
|
|
|
param := Parameter.Label + " = " + Parameter.Value + "; " |
|
|
|
|
|
paramTemp = paramTemp + param |
|
|
|
|
|
} |
|
|
|
|
|
task.Parameters = paramTemp[:len(paramTemp)-2] |
|
|
|
|
|
} else { |
|
|
|
|
|
task.Parameters = "" |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
prepareSpec4Show(ctx, task) |
|
|
|
|
|
ctx.Data["datasetDownload"] = GetCloudBrainDataSetInfo(task.Uuid, task.DatasetName, false) |
|
|
|
|
|
ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task) |
|
|
|
|
|
ctx.Data["ai_center"] = cloudbrainService.GetAiCenterShow(task.AiCenter, ctx) |
|
|
|
|
|
ctx.HTML(http.StatusOK, tplGrampusNotebookShow) |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
func GrampusTrainJobShow(ctx *context.Context) { |
|
|
func GrampusTrainJobShow(ctx *context.Context) { |
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
ctx.Data["PageIsCloudBrain"] = true |
|
|
|
|
|
|
|
|
@@ -1281,3 +1460,134 @@ func HandleTaskWithAiCenter(ctx *context.Context) { |
|
|
r["updateCounts"] = updateCounts |
|
|
r["updateCounts"] = updateCounts |
|
|
ctx.JSON(http.StatusOK, response.SuccessWithData(r)) |
|
|
ctx.JSON(http.StatusOK, response.SuccessWithData(r)) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func GrampusNotebookDebug(ctx *context.Context) { |
|
|
|
|
|
|
|
|
|
|
|
result, err := grampus.GetNotebookJob(ctx.Cloudbrain.JobID) |
|
|
|
|
|
|
|
|
|
|
|
if err != nil { |
|
|
|
|
|
ctx.RenderWithErr(err.Error(), tplDebugJobIndex, nil) |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
if len(result.JobInfo.Tasks) > 0 { |
|
|
|
|
|
ctx.Redirect(result.JobInfo.Tasks[0].Url + "?token=" + result.JobInfo.Tasks[0].Token) |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
ctx.NotFound("Can not find the job.", nil) |
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func GrampusNotebookRestart(ctx *context.Context) { |
|
|
|
|
|
var id = ctx.Params(":id") |
|
|
|
|
|
var resultCode = "-1" |
|
|
|
|
|
var errorMsg = "" |
|
|
|
|
|
var status = "" |
|
|
|
|
|
var spec *models.Specification |
|
|
|
|
|
|
|
|
|
|
|
task := ctx.Cloudbrain |
|
|
|
|
|
if ctx.Written() { |
|
|
|
|
|
return |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
for { |
|
|
|
|
|
|
|
|
|
|
|
if task.Status != models.GrampusStatusStopped && task.Status != models.GrampusStatusSucceeded && task.Status != models.GrampusStatusFailed { |
|
|
|
|
|
log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) |
|
|
|
|
|
errorMsg = "the job is not stopped" |
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
count, err := cloudbrainTask.GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeC2Net, string(models.JobTypeDebug), task.ComputeResource) |
|
|
|
|
|
|
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) |
|
|
|
|
|
errorMsg = "system error" |
|
|
|
|
|
break |
|
|
|
|
|
} else { |
|
|
|
|
|
if count >= 1 { |
|
|
|
|
|
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) |
|
|
|
|
|
resultCode = "2" |
|
|
|
|
|
errorMsg = ctx.Tr("repo.cloudbrain.morethanonejob") |
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
oldSpec, err := resource.GetCloudbrainSpec(task.ID) |
|
|
|
|
|
if err != nil || oldSpec == nil { |
|
|
|
|
|
log.Error("NotebookManage GetCloudbrainSpec error.%v", err) |
|
|
|
|
|
errorMsg = "Resource specification not available" |
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
computeSourceSimple := models.GPU |
|
|
|
|
|
action := models.ActionCreateGrampusGPUDebugTask |
|
|
|
|
|
if task.ComputeResource == models.NPUResource { |
|
|
|
|
|
computeSourceSimple = models.NPU |
|
|
|
|
|
action = models.ActionCreateGrampusNPUDebugTask |
|
|
|
|
|
} |
|
|
|
|
|
spec, err = resource.GetAndCheckSpec(ctx.User.ID, oldSpec.ID, models.FindSpecsOptions{ |
|
|
|
|
|
JobType: models.JobType(task.JobType), |
|
|
|
|
|
ComputeResource: computeSourceSimple, |
|
|
|
|
|
Cluster: models.C2NetCluster, |
|
|
|
|
|
}) |
|
|
|
|
|
if err != nil || spec == nil { |
|
|
|
|
|
log.Error("NotebookManage GetAndCheckSpec error.task.id = %d", task.ID) |
|
|
|
|
|
errorMsg = "Resource specification not support any more" |
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { |
|
|
|
|
|
log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) |
|
|
|
|
|
errorMsg = ctx.Tr("points.insufficient_points_balance") |
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
createTime := timeutil.TimeStampNow() |
|
|
|
|
|
|
|
|
|
|
|
res, err := grampus.RestartNotebookJob(task.JobID) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("ManageNotebook2(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["MsgID"]) |
|
|
|
|
|
errorMsg = err.Error() |
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
newTask := &models.Cloudbrain{ |
|
|
|
|
|
Status: res.Status, |
|
|
|
|
|
UserID: task.UserID, |
|
|
|
|
|
RepoID: task.RepoID, |
|
|
|
|
|
JobID: res.NewId, |
|
|
|
|
|
JobName: task.JobName, |
|
|
|
|
|
DisplayJobName: task.DisplayJobName, |
|
|
|
|
|
JobType: task.JobType, |
|
|
|
|
|
Type: task.Type, |
|
|
|
|
|
Uuid: task.Uuid, |
|
|
|
|
|
Image: task.Image, |
|
|
|
|
|
ComputeResource: task.ComputeResource, |
|
|
|
|
|
Description: task.Description, |
|
|
|
|
|
CreatedUnix: createTime, |
|
|
|
|
|
UpdatedUnix: createTime, |
|
|
|
|
|
Spec: spec, |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
err = models.RestartCloudbrain(task, newTask) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) |
|
|
|
|
|
errorMsg = "system error" |
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
id = strconv.FormatInt(newTask.ID, 10) |
|
|
|
|
|
|
|
|
|
|
|
status = res.Status |
|
|
|
|
|
resultCode = "0" |
|
|
|
|
|
|
|
|
|
|
|
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, id, newTask.DisplayJobName, action) |
|
|
|
|
|
|
|
|
|
|
|
break |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
ctx.JSON(200, map[string]string{ |
|
|
|
|
|
"result_code": resultCode, |
|
|
|
|
|
"error_msg": errorMsg, |
|
|
|
|
|
"status": status, |
|
|
|
|
|
"id": id, |
|
|
|
|
|
}) |
|
|
|
|
|
} |