Browse Source

Merge pull request '云脑相关内部代码优化' (#1278) from cb-opt into V20220110

Reviewed-on: https://git.openi.org.cn/OpenI/aiforge/pulls/1278
Reviewed-by: ychao_1983 <ychao_1983@sina.com>
tags/v1.22.1.1^2
ychao_1983 3 years ago
parent
commit
a5666dd7de
5 changed files with 65 additions and 86 deletions
  1. +25
    -0
      models/cloudbrain.go
  2. +23
    -7
      modules/cloudbrain/cloudbrain.go
  3. +1
    -0
      modules/context/context.go
  4. +12
    -61
      routers/repo/cloudbrain.go
  5. +4
    -18
      routers/repo/modelarts.go

+ 25
- 0
models/cloudbrain.go View File

@@ -1210,3 +1210,28 @@ func GetCloudbrainTrainJobCountByUserID(userID int64) (int, error) {
And("job_type = ? and user_id = ? and type = ?", JobTypeTrain, userID, TypeCloudBrainTwo).Count(new(Cloudbrain)) And("job_type = ? and user_id = ? and type = ?", JobTypeTrain, userID, TypeCloudBrainTwo).Count(new(Cloudbrain))
return int(count), err return int(count), err
} }

func RestartCloudbrain(old *Cloudbrain, new *Cloudbrain) (err error) {
sess := x.NewSession()
defer sess.Close()

if err = sess.Begin(); err != nil {
return err
}

if _, err = sess.Delete(old); err != nil {
sess.Rollback()
return err
}

if _, err = sess.Insert(new); err != nil {
sess.Rollback()
return err
}

if err = sess.Commit(); err != nil {
return err
}

return nil
}

+ 23
- 7
modules/cloudbrain/cloudbrain.go View File

@@ -82,7 +82,7 @@ func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) {
var jobID = ctx.Params(":jobid") var jobID = ctx.Params(":jobid")


job, err := models.GetCloudbrainByJobID(jobID) job, err := models.GetCloudbrainByJobID(jobID)
ctx.Cloudbrain = job
if !isAdminOrOwnerOrJobCreater(ctx, job, err) { if !isAdminOrOwnerOrJobCreater(ctx, job, err) {


ctx.NotFound(ctx.Req.URL.RequestURI(), nil) ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
@@ -94,6 +94,7 @@ func AdminOrJobCreaterRight(ctx *context.Context) {


var jobID = ctx.Params(":jobid") var jobID = ctx.Params(":jobid")
job, err := models.GetCloudbrainByJobID(jobID) job, err := models.GetCloudbrainByJobID(jobID)
ctx.Cloudbrain = job
if !isAdminOrJobCreater(ctx, job, err) { if !isAdminOrJobCreater(ctx, job, err) {


ctx.NotFound(ctx.Req.URL.RequestURI(), nil) ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
@@ -222,7 +223,7 @@ func GenerateTask(ctx *context.Context, jobName, image, command, uuid, codePath,
return nil return nil
} }


func RestartTask(ctx *context.Context, task *models.Cloudbrain) error {
func RestartTask(ctx *context.Context, task *models.Cloudbrain, newJobID *string) error {
dataActualPath := setting.Attachment.Minio.RealPath + dataActualPath := setting.Attachment.Minio.RealPath +
setting.Attachment.Minio.Bucket + "/" + setting.Attachment.Minio.Bucket + "/" +
setting.Attachment.Minio.BasePath + setting.Attachment.Minio.BasePath +
@@ -312,7 +313,7 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain) error {
}, },
}) })
if err != nil { if err != nil {
log.Error("CreateJob failed:", err.Error(), ctx.Data["MsgID"])
log.Error("CreateJob failed:%v", err.Error(), ctx.Data["MsgID"])
return err return err
} }
if jobResult.Code != Success { if jobResult.Code != Success {
@@ -321,14 +322,29 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain) error {
} }


var jobID = jobResult.Payload["jobId"].(string) var jobID = jobResult.Payload["jobId"].(string)
task.JobID = jobID
task.Status = string(models.JobWaiting)
err = models.UpdateJob(task)
newTask := &models.Cloudbrain{
Status: string(models.JobWaiting),
UserID: task.UserID,
RepoID: task.RepoID,
JobID: jobID,
JobName: task.JobName,
SubTaskName: task.SubTaskName,
JobType: task.JobType,
Type: task.Type,
Uuid: task.Uuid,
Image: task.Image,
GpuQueue: task.GpuQueue,
ResourceSpecId: task.ResourceSpecId,
ComputeResource: task.ComputeResource,
}


err = models.RestartCloudbrain(task, newTask)
if err != nil { if err != nil {
log.Error("UpdateJob(%s) failed:%v", jobName, err.Error(), ctx.Data["MsgID"])
log.Error("RestartCloudbrain(%s) failed:%v", jobName, err.Error(), ctx.Data["MsgID"])
return err return err
} }


*newJobID = jobID

return nil return nil
} }

+ 1
- 0
modules/context/context.go View File

@@ -47,6 +47,7 @@ type Context struct {


Repo *Repository Repo *Repository
Org *Organization Org *Organization
Cloudbrain *models.Cloudbrain
} }


// IsUserSiteAdmin returns true if current user is a site admin // IsUserSiteAdmin returns true if current user is a site admin


+ 12
- 61
routers/repo/cloudbrain.go View File

@@ -251,17 +251,10 @@ func CloudBrainRestart(ctx *context.Context) {
var jobID = ctx.Params(":jobid") var jobID = ctx.Params(":jobid")
var resultCode = "0" var resultCode = "0"
var errorMsg = "" var errorMsg = ""
var status = ""
var status = string(models.JobWaiting)


task := ctx.Cloudbrain
for { for {
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "system error"
break
}

if task.Status != string(models.JobStopped) && task.Status != string(models.JobSucceeded) && task.Status != string(models.JobFailed) { if task.Status != string(models.JobStopped) && task.Status != string(models.JobSucceeded) && task.Status != string(models.JobFailed) {
log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"])
resultCode = "-1" resultCode = "-1"
@@ -298,7 +291,7 @@ func CloudBrainRestart(ctx *context.Context) {
} }
} }


err = cloudbrain.RestartTask(ctx, task)
err = cloudbrain.RestartTask(ctx, task, &jobID)
if err != nil { if err != nil {
log.Error("RestartTask failed:%v", err.Error(), ctx.Data["MsgID"]) log.Error("RestartTask failed:%v", err.Error(), ctx.Data["MsgID"])
resultCode = "-1" resultCode = "-1"
@@ -306,9 +299,6 @@ func CloudBrainRestart(ctx *context.Context) {
break break
} }


status = task.Status
jobID = task.JobID

break break
} }


@@ -369,46 +359,19 @@ func CloudBrainShow(ctx *context.Context) {
} }


func CloudBrainDebug(ctx *context.Context) { func CloudBrainDebug(ctx *context.Context) {
var jobID = ctx.Params(":jobid")
if !ctx.IsSigned {
log.Error("the user has not signed in")
ctx.Error(http.StatusForbidden, "", "the user has not signed in")
return
}
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
ctx.ServerError("GetCloudbrainByJobID failed", err)
return
}

debugUrl := setting.DebugServerHost + "jpylab_" + task.JobID + "_" + task.SubTaskName
debugUrl := setting.DebugServerHost + "jpylab_" + ctx.Cloudbrain.JobID + "_" + ctx.Cloudbrain.SubTaskName
ctx.Redirect(debugUrl) ctx.Redirect(debugUrl)
} }


func CloudBrainCommitImage(ctx *context.Context, form auth.CommitImageCloudBrainForm) { func CloudBrainCommitImage(ctx *context.Context, form auth.CommitImageCloudBrainForm) {
var jobID = ctx.Params(":jobid")
if !ctx.IsSigned {
log.Error("the user has not signed in")
ctx.Error(http.StatusForbidden, "", "the user has not signed in")
return
}
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
ctx.JSON(200, map[string]string{
"result_code": "-1",
"error_msg": "GetCloudbrainByJobID failed",
})
return
}

err = cloudbrain.CommitImage(jobID, models.CommitImageParams{
Ip: task.ContainerIp,
TaskContainerId: task.ContainerID,
err := cloudbrain.CommitImage(ctx.Cloudbrain.JobID, models.CommitImageParams{
Ip: ctx.Cloudbrain.ContainerIp,
TaskContainerId: ctx.Cloudbrain.ContainerID,
ImageDescription: form.Description, ImageDescription: form.Description,
ImageTag: form.Tag, ImageTag: form.Tag,
}) })
if err != nil { if err != nil {
log.Error("CommitImage(%s) failed:%v", task.JobName, err.Error(), ctx.Data["msgID"])
log.Error("CommitImage(%s) failed:%v", ctx.Cloudbrain.JobName, err.Error(), ctx.Data["msgID"])
ctx.JSON(200, map[string]string{ ctx.JSON(200, map[string]string{
"result_code": "-1", "result_code": "-1",
"error_msg": "CommitImage failed", "error_msg": "CommitImage failed",
@@ -428,15 +391,8 @@ func CloudBrainStop(ctx *context.Context) {
var errorMsg = "" var errorMsg = ""
var status = "" var status = ""


task := ctx.Cloudbrain
for { for {
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err, ctx.Data["msgID"])
resultCode = "-1"
errorMsg = "system error"
break
}

if task.Status == string(models.JobStopped) || task.Status == string(models.JobFailed) { if task.Status == string(models.JobStopped) || task.Status == string(models.JobFailed) {
log.Error("the job(%s) has been stopped", task.JobName, ctx.Data["msgID"]) log.Error("the job(%s) has been stopped", task.JobName, ctx.Data["msgID"])
resultCode = "-1" resultCode = "-1"
@@ -444,7 +400,7 @@ func CloudBrainStop(ctx *context.Context) {
break break
} }


err = cloudbrain.StopJob(jobID)
err := cloudbrain.StopJob(jobID)
if err != nil { if err != nil {
log.Error("StopJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"]) log.Error("StopJob(%s) failed:%v", task.JobName, err, ctx.Data["msgID"])
resultCode = "-1" resultCode = "-1"
@@ -554,12 +510,7 @@ func logErrorAndUpdateJobStatus(err error, taskInfo *models.Cloudbrain) {
} }


func CloudBrainDel(ctx *context.Context) { func CloudBrainDel(ctx *context.Context) {
var jobID = ctx.Params(":jobid")
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
ctx.ServerError("GetCloudbrainByJobID failed", err)
return
}
task := ctx.Cloudbrain


if task.Status != string(models.JobStopped) && task.Status != string(models.JobFailed) { if task.Status != string(models.JobStopped) && task.Status != string(models.JobFailed) {
log.Error("the job(%s) has not been stopped", task.JobName, ctx.Data["msgID"]) log.Error("the job(%s) has not been stopped", task.JobName, ctx.Data["msgID"])
@@ -567,7 +518,7 @@ func CloudBrainDel(ctx *context.Context) {
return return
} }


err = models.DeleteJob(task)
err := models.DeleteJob(task)
if err != nil { if err != nil {
ctx.ServerError("DeleteJob failed", err) ctx.ServerError("DeleteJob failed", err)
return return


+ 4
- 18
routers/repo/modelarts.go View File

@@ -192,11 +192,6 @@ func NotebookShow(ctx *context.Context) {


func NotebookDebug(ctx *context.Context) { func NotebookDebug(ctx *context.Context) {
var jobID = ctx.Params(":jobid") var jobID = ctx.Params(":jobid")
_, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
ctx.ServerError("GetCloudbrainByJobID failed", err)
return
}


result, err := modelarts.GetJob(jobID) result, err := modelarts.GetJob(jobID)
if err != nil { if err != nil {
@@ -325,11 +320,7 @@ func NotebookManage(ctx *context.Context) {


func NotebookDel(ctx *context.Context) { func NotebookDel(ctx *context.Context) {
var jobID = ctx.Params(":jobid") var jobID = ctx.Params(":jobid")
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
ctx.ServerError("GetCloudbrainByJobID failed", err)
return
}
task := ctx.Cloudbrain


if task.Status != string(models.ModelArtsCreateFailed) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsStopped) { if task.Status != string(models.ModelArtsCreateFailed) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsStopped) {
log.Error("the job(%s) has not been stopped", task.JobName) log.Error("the job(%s) has not been stopped", task.JobName)
@@ -337,7 +328,7 @@ func NotebookDel(ctx *context.Context) {
return return
} }


_, err = modelarts.DelNotebook(jobID)
_, err := modelarts.DelNotebook(jobID)
if err != nil { if err != nil {
log.Error("DelJob(%s) failed:%v", task.JobName, err.Error()) log.Error("DelJob(%s) failed:%v", task.JobName, err.Error())
ctx.ServerError("DelJob failed", err) ctx.ServerError("DelJob failed", err)
@@ -1421,14 +1412,9 @@ func TrainJobDel(ctx *context.Context) {


func TrainJobStop(ctx *context.Context) { func TrainJobStop(ctx *context.Context) {
var jobID = ctx.Params(":jobid") var jobID = ctx.Params(":jobid")
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error())
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)
return
}
task := ctx.Cloudbrain


_, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
_, err := modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10))
if err != nil { if err != nil {
log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error())
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil)


Loading…
Cancel
Save