Reviewed-on: https://git.openi.org.cn/OpenI/aiforge/pulls/1278 Reviewed-by: ychao_1983 <ychao_1983@sina.com>tags/v1.22.1.1^2
| @@ -1210,3 +1210,28 @@ func GetCloudbrainTrainJobCountByUserID(userID int64) (int, error) { | |||
| And("job_type = ? and user_id = ? and type = ?", JobTypeTrain, userID, TypeCloudBrainTwo).Count(new(Cloudbrain)) | |||
| return int(count), err | |||
| } | |||
| func RestartCloudbrain(old *Cloudbrain, new *Cloudbrain) (err error) { | |||
| sess := x.NewSession() | |||
| defer sess.Close() | |||
| if err = sess.Begin(); err != nil { | |||
| return err | |||
| } | |||
| if _, err = sess.Delete(old); err != nil { | |||
| sess.Rollback() | |||
| return err | |||
| } | |||
| if _, err = sess.Insert(new); err != nil { | |||
| sess.Rollback() | |||
| return err | |||
| } | |||
| if err = sess.Commit(); err != nil { | |||
| return err | |||
| } | |||
| return nil | |||
| } | |||
| @@ -82,7 +82,7 @@ func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| job, err := models.GetCloudbrainByJobID(jobID) | |||
| ctx.Cloudbrain = job | |||
| if !isAdminOrOwnerOrJobCreater(ctx, job, err) { | |||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
| @@ -94,6 +94,7 @@ func AdminOrJobCreaterRight(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| job, err := models.GetCloudbrainByJobID(jobID) | |||
| ctx.Cloudbrain = job | |||
| if !isAdminOrJobCreater(ctx, job, err) { | |||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||
| @@ -222,7 +223,7 @@ func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath, | |||
| return nil | |||
| } | |||
| func RestartTask(ctx *context.Context, task *models.Cloudbrain) error { | |||
| func RestartTask(ctx *context.Context, task *models.Cloudbrain, newJobID *string) error { | |||
| dataActualPath := setting.Attachment.Minio.RealPath + | |||
| setting.Attachment.Minio.Bucket + "/" + | |||
| setting.Attachment.Minio.BasePath + | |||
| @@ -312,7 +313,7 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain) error { | |||
| }, | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateJob failed:", err.Error(), ctx.Data["MsgID"]) | |||
| log.Error("CreateJob failed:%v", err.Error(), ctx.Data["MsgID"]) | |||
| return err | |||
| } | |||
| if jobResult.Code != Success { | |||
| @@ -321,14 +322,29 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain) error { | |||
| } | |||
| var jobID = jobResult.Payload["jobId"].(string) | |||
| task.JobID = jobID | |||
| task.Status = string(models.JobWaiting) | |||
| err = models.UpdateJob(task) | |||
| newTask := &models.Cloudbrain{ | |||
| Status: string(models.JobWaiting), | |||
| UserID: task.UserID, | |||
| RepoID: task.RepoID, | |||
| JobID: jobID, | |||
| JobName: task.JobName, | |||
| SubTaskName: task.SubTaskName, | |||
| JobType: task.JobType, | |||
| Type: task.Type, | |||
| Uuid: task.Uuid, | |||
| Image: task.Image, | |||
| GpuQueue: task.GpuQueue, | |||
| ResourceSpecId: task.ResourceSpecId, | |||
| ComputeResource: task.ComputeResource, | |||
| } | |||
| err = models.RestartCloudbrain(task, newTask) | |||
| if err != nil { | |||
| log.Error("UpdateJob(%s) failed:%v", jobName, err.Error(), ctx.Data["MsgID"]) | |||
| log.Error("RestartCloudbrain(%s) failed:%v", jobName, err.Error(), ctx.Data["MsgID"]) | |||
| return err | |||
| } | |||
| *newJobID = jobID | |||
| return nil | |||
| } | |||
| @@ -47,6 +47,7 @@ type Context struct { | |||
| Repo *Repository | |||
| Org *Organization | |||
| Cloudbrain *models.Cloudbrain | |||
| } | |||
| // IsUserSiteAdmin returns true if current user is a site admin | |||
| @@ -251,17 +251,10 @@ func CloudBrainRestart(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| var resultCode = "0" | |||
| var errorMsg = "" | |||
| var status = "" | |||
| var status = string(models.JobWaiting) | |||
| task := ctx.Cloudbrain | |||
| for { | |||
| task, err := models.GetCloudbrainByJobID(jobID) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error(), ctx.Data["MsgID"]) | |||
| resultCode = "-1" | |||
| errorMsg = "system error" | |||
| break | |||
| } | |||
| if task.Status != string(models.JobStopped) && task.Status != string(models.JobSucceeded) && task.Status != string(models.JobFailed) { | |||
| log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) | |||
| resultCode = "-1" | |||
| @@ -298,7 +291,7 @@ func CloudBrainRestart(ctx *context.Context) { | |||
| } | |||
| } | |||
| err = cloudbrain.RestartTask(ctx, task) | |||
| err = cloudbrain.RestartTask(ctx, task, &jobID) | |||
| if err != nil { | |||
| log.Error("RestartTask failed:%v", err.Error(), ctx.Data["MsgID"]) | |||
| resultCode = "-1" | |||
| @@ -306,9 +299,6 @@ func CloudBrainRestart(ctx *context.Context) { | |||
| break | |||
| } | |||
| status = task.Status | |||
| jobID = task.JobID | |||
| break | |||
| } | |||
| @@ -369,46 +359,19 @@ func CloudBrainShow(ctx *context.Context) { | |||
| } | |||
| func CloudBrainDebug(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| if !ctx.IsSigned { | |||
| log.Error("the user has not signed in") | |||
| ctx.Error(http.StatusForbidden, "", "the user has not signed in") | |||
| return | |||
| } | |||
| task, err := models.GetCloudbrainByJobID(jobID) | |||
| if err != nil { | |||
| ctx.ServerError("GetCloudbrainByJobID failed", err) | |||
| return | |||
| } | |||
| debugUrl := setting.DebugServerHost + "jpylab_" + task.JobID + "_" + task.SubTaskName | |||
| debugUrl := setting.DebugServerHost + "jpylab_" + ctx.Cloudbrain.JobID + "_" + ctx.Cloudbrain.SubTaskName | |||
| ctx.Redirect(debugUrl) | |||
| } | |||
| func CloudBrainCommitImage(ctx *context.Context, form auth.CommitImageCloudBrainForm) { | |||
| var jobID = ctx.Params(":jobid") | |||
| if !ctx.IsSigned { | |||
| log.Error("the user has not signed in") | |||
| ctx.Error(http.StatusForbidden, "", "the user has not signed in") | |||
| return | |||
| } | |||
| task, err := models.GetCloudbrainByJobID(jobID) | |||
| if err != nil { | |||
| ctx.JSON(200, map[string]string{ | |||
| "result_code": "-1", | |||
| "error_msg": "GetCloudbrainByJobID failed", | |||
| }) | |||
| return | |||
| } | |||
| err = cloudbrain.CommitImage(jobID, models.CommitImageParams{ | |||
| Ip: task.ContainerIp, | |||
| TaskContainerId: task.ContainerID, | |||
| err := cloudbrain.CommitImage(ctx.Cloudbrain.JobID, models.CommitImageParams{ | |||
| Ip: ctx.Cloudbrain.ContainerIp, | |||
| TaskContainerId: ctx.Cloudbrain.ContainerID, | |||
| ImageDescription: form.Description, | |||
| ImageTag: form.Tag, | |||
| }) | |||
| if err != nil { | |||
| log.Error("CommitImage(%s) failed:%v", task.JobName, err.Error(), ctx.Data["msgID"]) | |||
| log.Error("CommitImage(%s) failed:%v", ctx.Cloudbrain.JobName, err.Error(), ctx.Data["msgID"]) | |||
| ctx.JSON(200, map[string]string{ | |||
| "result_code": "-1", | |||
| "error_msg": "CommitImage failed", | |||
| @@ -428,15 +391,8 @@ func CloudBrainStop(ctx *context.Context) { | |||
| var errorMsg = "" | |||
| var status = "" | |||
| task := ctx.Cloudbrain | |||
| for { | |||
| task, err := models.GetCloudbrainByJobID(jobID) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) | |||
| resultCode = "-1" | |||
| errorMsg = "system error" | |||
| break | |||
| } | |||
| if task.Status == string(models.JobStopped) || task.Status == string(models.JobFailed) { | |||
| log.Error("the job(%s) has been stopped", task.JobName, ctx.Data["msgID"]) | |||
| resultCode = "-1" | |||
| @@ -444,7 +400,7 @@ func CloudBrainStop(ctx *context.Context) { | |||
| break | |||
| } | |||
| err = cloudbrain.StopJob(jobID) | |||
| err := cloudbrain.StopJob(jobID) | |||
| if err != nil { | |||
| log.Error("StopJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) | |||
| resultCode = "-1" | |||
| @@ -554,12 +510,7 @@ func logErrorAndUpdateJobStatus(err error, taskInfo *models.Cloudbrain) { | |||
| } | |||
| func CloudBrainDel(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| task, err := models.GetCloudbrainByJobID(jobID) | |||
| if err != nil { | |||
| ctx.ServerError("GetCloudbrainByJobID failed", err) | |||
| return | |||
| } | |||
| task := ctx.Cloudbrain | |||
| if task.Status != string(models.JobStopped) && task.Status != string(models.JobFailed) { | |||
| log.Error("the job(%s) has not been stopped", task.JobName, ctx.Data["msgID"]) | |||
| @@ -567,7 +518,7 @@ func CloudBrainDel(ctx *context.Context) { | |||
| return | |||
| } | |||
| err = models.DeleteJob(task) | |||
| err := models.DeleteJob(task) | |||
| if err != nil { | |||
| ctx.ServerError("DeleteJob failed", err) | |||
| return | |||
| @@ -192,11 +192,6 @@ func NotebookShow(ctx *context.Context) { | |||
| func NotebookDebug(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| _, err := models.GetCloudbrainByJobID(jobID) | |||
| if err != nil { | |||
| ctx.ServerError("GetCloudbrainByJobID failed", err) | |||
| return | |||
| } | |||
| result, err := modelarts.GetJob(jobID) | |||
| if err != nil { | |||
| @@ -325,11 +320,7 @@ func NotebookManage(ctx *context.Context) { | |||
| func NotebookDel(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| task, err := models.GetCloudbrainByJobID(jobID) | |||
| if err != nil { | |||
| ctx.ServerError("GetCloudbrainByJobID failed", err) | |||
| return | |||
| } | |||
| task := ctx.Cloudbrain | |||
| if task.Status != string(models.ModelArtsCreateFailed) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsStopped) { | |||
| log.Error("the job(%s) has not been stopped", task.JobName) | |||
| @@ -337,7 +328,7 @@ func NotebookDel(ctx *context.Context) { | |||
| return | |||
| } | |||
| _, err = modelarts.DelNotebook(jobID) | |||
| _, err := modelarts.DelNotebook(jobID) | |||
| if err != nil { | |||
| log.Error("DelJob(%s) failed:%v", task.JobName, err.Error()) | |||
| ctx.ServerError("DelJob failed", err) | |||
| @@ -1421,14 +1412,9 @@ func TrainJobDel(ctx *context.Context) { | |||
| func TrainJobStop(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| task, err := models.GetCloudbrainByJobID(jobID) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) | |||
| return | |||
| } | |||
| task := ctx.Cloudbrain | |||
| _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) | |||
| _, err := modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) | |||
| if err != nil { | |||
| log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) | |||