Browse Source

update

tags/V1.22.3.1^2
liuzx 3 years ago
parent
commit
deb66c4af4
7 changed files with 84 additions and 135 deletions
  1. +2
    -2
      models/cloudbrain.go
  2. +1
    -0
      modules/auth/cloudbrain.go
  3. +3
    -0
      modules/modelarts/modelarts.go
  4. +22
    -2
      routers/repo/cloudbrain.go
  5. +53
    -128
      routers/repo/modelarts.go
  6. +1
    -1
      templates/repo/debugjob/index.tmpl
  7. +2
    -2
      templates/repo/modelarts/notebook/show.tmpl

+ 2
- 2
models/cloudbrain.go View File

@@ -1327,9 +1327,9 @@ func GetCloudbrainsNeededStopByRepoID(repoID int64) ([]*Cloudbrain, error) {
return cloudBrains, err
}

func GetCloudbrainsByRepoIDAndDisplayName(repoID int64, jobType string, displayJobName string) ([]*Cloudbrain, error) {
func GetCloudbrainsByRepoIDAndJobType(repoID int64, jobType string) ([]*Cloudbrain, error) {
cloudBrains := make([]*Cloudbrain, 0)
err := x.Cols("job_id", "job_name", "repo_id", "user_id", "job_type", "display_job_name").Where("repo_id=? AND job_type =? AND display_job_name LIKE ?", repoID, jobType, displayJobName+"%").Find(&cloudBrains)
err := x.Cols("job_id", "job_name", "repo_id", "user_id", "job_type", "display_job_name").Where("repo_id=? AND job_type =?", repoID, jobType).Find(&cloudBrains)
return cloudBrains, err
}



+ 1
- 0
modules/auth/cloudbrain.go View File

@@ -7,6 +7,7 @@ import (

type CreateCloudBrainForm struct {
JobName string `form:"job_name" binding:"Required"`
DisplayJobName string `form:"display_job_name" binding:"Required"`
Image string `form:"image" binding:"Required"`
Command string `form:"command" binding:"Required"`
Attachment string `form:"attachment" binding:"Required"`


+ 3
- 0
modules/modelarts/modelarts.go View File

@@ -332,6 +332,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
RepoID: ctx.Repo.Repository.ID,
JobID: jobId,
JobName: req.JobName,
DisplayJobName: req.DisplayJobName,
JobType: string(models.JobTypeTrain),
Type: models.TypeCloudBrainTwo,
VersionID: jobResult.VersionID,
@@ -416,6 +417,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
RepoID: ctx.Repo.Repository.ID,
JobID: strconv.FormatInt(jobResult.JobID, 10),
JobName: req.JobName,
DisplayJobName: req.DisplayJobName,
JobType: string(models.JobTypeTrain),
Type: models.TypeCloudBrainTwo,
VersionID: jobResult.VersionID,
@@ -552,6 +554,7 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
RepoID: ctx.Repo.Repository.ID,
JobID: jobID,
JobName: req.JobName,
DisplayJobName: req.DisplayJobName,
JobType: string(models.JobTypeInference),
Type: models.TypeCloudBrainTwo,
VersionID: jobResult.VersionID,


+ 22
- 2
routers/repo/cloudbrain.go View File

@@ -176,7 +176,8 @@ func CloudBrainNew(ctx *context.Context) {

func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
ctx.Data["PageIsCloudBrain"] = true
jobName := form.JobName
displayJobName := form.DisplayJobName
jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
image := form.Image
uuid := form.Attachment
jobType := form.JobType
@@ -184,6 +185,26 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
gpuQueue := form.GpuType
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
resourceSpecId := form.ResourceSpecId
repo := ctx.Repo.Repository

tasks, err := models.GetCloudbrainsByRepoIDAndJobType(repo.ID, string(models.JobTypeDebug))
if err == nil {
for _, task := range tasks {
if strings.EqualFold(task.DisplayJobName, displayJobName) {
log.Error("the job name did already exist", ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("the job name did already exist", tplCloudBrainNew, &form)
return
}
}
} else {
if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("system error", tplCloudBrainNew, &form)
return
}
}

if !jobNamePattern.MatchString(jobName) {
ctx.RenderWithErr(ctx.Tr("repo.cloudbrain_jobname_err"), tplCloudBrainNew, &form)
@@ -226,7 +247,6 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
return
}
}
repo := ctx.Repo.Repository
downloadCode(repo, codePath)
uploadCodeToMinio(codePath+"/", jobName, cloudbrain.CodeMountPath+"/")



+ 53
- 128
routers/repo/modelarts.go View File

@@ -187,37 +187,7 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm
description := form.Description
flavor := form.Flavor
imageId := form.ImageId

//判断项目内任务名称是否重复
// var jobTypes []string
// jobTypes = append(jobTypes, string(models.JobTypeTrain))
// tasks, _, err := models.Cloudbrains(&models.CloudbrainsOptions{
// ListOptions: models.ListOptions{
// PageSize: setting.UI.IssuePagingNum,
// },
// RepoID: repo.ID,
// Type: models.TypeCloudBrainTwo,
// JobTypeNot: false,
// JobTypes: jobTypes,
// IsLatestVersion: modelarts.IsLatestVersion,
// })
// if err == nil {
// for _, task := range tasks {
// if strings.EqualFold(task.JobName, displayJobName) {
// log.Error("the job name did already exist", ctx.Data["MsgID"])
// trainJobErrorNewDataPrepare(ctx, form)
// ctx.RenderWithErr("任务名称已经被使用!", tplModelArtsTrainJobNew, &form)
// return
// }
// }
// } else {
// if !models.IsErrJobNotExist(err) {
// log.Error("system error, %v", err, ctx.Data["MsgID"])
// trainJobErrorNewDataPrepare(ctx, form)
// ctx.RenderWithErr("system error", tplModelArtsTrainJobNew, &form)
// return
// }
// }
repo := ctx.Repo.Repository

count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
if err != nil {
@@ -233,12 +203,17 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm
return
}
}
_, err = models.GetCloudbrainByName(jobName)

tasks, err := models.GetCloudbrainsByRepoIDAndJobType(repo.ID, string(models.JobTypeDebug))
if err == nil {
log.Error("the job name did already exist", ctx.Data["MsgID"])
notebookNewDataPrepare(ctx)
ctx.RenderWithErr("the job name did already exist", tplModelArtsNotebookNew, &form)
return
for _, task := range tasks {
if strings.EqualFold(task.DisplayJobName, displayJobName) {
log.Error("the job name did already exist", ctx.Data["MsgID"])
notebookNewDataPrepare(ctx)
ctx.RenderWithErr("the job name did already exist", tplModelArtsNotebookNew, &form)
return
}
}
} else {
if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"])
@@ -902,20 +877,35 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
VersionCount := modelarts.VersionCount
EngineName := form.EngineName

count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr("system error", tplModelArtsTrainJobNew, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsTrainJobNew, &form)
return
}
}

if err := paramCheckCreateTrainJob(form); err != nil {
log.Error("paramCheckCreateTrainJob failed:(%v)", err)
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form)
return
}
//判断项目内任务名称是否重复
tasks, err := models.GetCloudbrainsByRepoIDAndDisplayName(repo.ID, string(models.JobTypeTrain), displayJobName)
//Determine whether the task name of the task in the project is duplicated
tasks, err := models.GetCloudbrainsByRepoIDAndJobType(repo.ID, string(models.JobTypeTrain))
if err == nil {
for _, task := range tasks {
if strings.EqualFold(task.DisplayJobName, displayJobName) {
log.Error("the job name did already exist", ctx.Data["MsgID"])
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr("任务名称已经被使用!", tplModelArtsTrainJobNew, &form)
ctx.RenderWithErr("the job name did already exist", tplModelArtsTrainJobNew, &form)
return
}
}
@@ -928,52 +918,6 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
}
}

// //判断项目内任务名称是否重复
// var jobTypes []string
// jobTypes = append(jobTypes, string(models.JobTypeTrain))
// tasks, _, err := models.Cloudbrains(&models.CloudbrainsOptions{
// ListOptions: models.ListOptions{
// PageSize: setting.UI.IssuePagingNum,
// },
// RepoID: repo.ID,
// Type: models.TypeCloudBrainTwo,
// JobTypeNot: false,
// JobTypes: jobTypes,
// IsLatestVersion: modelarts.IsLatestVersion,
// })
// if err == nil {
// for _, task := range tasks {
// if strings.EqualFold(task.JobName, displayJobName) {
// log.Error("the job name did already exist", ctx.Data["MsgID"])
// trainJobErrorNewDataPrepare(ctx, form)
// ctx.RenderWithErr("任务名称已经被使用!", tplModelArtsTrainJobNew, &form)
// return
// }
// }
// } else {
// if !models.IsErrJobNotExist(err) {
// log.Error("system error, %v", err, ctx.Data["MsgID"])
// trainJobErrorNewDataPrepare(ctx, form)
// ctx.RenderWithErr("system error", tplModelArtsTrainJobNew, &form)
// return
// }
// }

count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr("system error", tplModelArtsTrainJobNew, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsTrainJobNew, &form)
return
}
}

//todo: del the codeLocalPath
_, err = ioutil.ReadDir(codeLocalPath)
if err == nil {
@@ -986,9 +930,9 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{
Branch: branch_name,
}); err != nil {
log.Error("创建任务失败,服务器超时!: %s (%v)", repo.FullName(), err)
log.Error("Create task failed, server timed out: %s (%v)", repo.FullName(), err)
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr("创建任务失败,服务器超时!", tplModelArtsTrainJobNew, &form)
ctx.RenderWithErr("Create task failed, server timed out", tplModelArtsTrainJobNew, &form)
return
}

@@ -1172,7 +1116,6 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
EngineName := form.EngineName
isLatestVersion := modelarts.IsLatestVersion

//判断权限
canNewJob, _ := canUserCreateTrainJobVersion(ctx, latestTask.UserID)
if !canNewJob {
ctx.RenderWithErr("user cann't new trainjob", tplModelArtsTrainJobVersionNew, &form)
@@ -1772,6 +1715,21 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference

ckptUrl := form.TrainUrl + form.CkptName

count, err := models.GetCloudbrainInferenceJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr("system error", tplModelArtsInferenceJobNew, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting inference task", ctx.Data["MsgID"])
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr("you have already a running or waiting inference task, can not create more", tplModelArtsInferenceJobNew, &form)
return
}
}

if err := paramCheckCreateInferenceJob(form); err != nil {
log.Error("paramCheckCreateInferenceJob failed:(%v)", err)
inferenceJobErrorNewDataPrepare(ctx, form)
@@ -1779,25 +1737,14 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
return
}

//判断项目内任务名称是否重复
var jobTypes []string
jobTypes = append(jobTypes, string(models.JobTypeTrain))
tasks, _, err := models.Cloudbrains(&models.CloudbrainsOptions{
ListOptions: models.ListOptions{
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
Type: models.TypeCloudBrainTwo,
JobTypeNot: false,
JobTypes: jobTypes,
IsLatestVersion: modelarts.IsLatestVersion,
})
//Determine whether the task name of the task in the project is duplicated
tasks, err := models.GetCloudbrainsByRepoIDAndJobType(repo.ID, string(models.JobTypeInference))
if err == nil {
for _, task := range tasks {
if strings.EqualFold(task.JobName, displayJobName) {
if strings.EqualFold(task.DisplayJobName, displayJobName) {
log.Error("the job name did already exist", ctx.Data["MsgID"])
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr("任务名称已经被使用!", tplModelArtsInferenceJobNew, &form)
ctx.RenderWithErr("the job name did already exist", tplModelArtsInferenceJobNew, &form)
return
}
}
@@ -1810,21 +1757,6 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
}
}

count, err := models.GetCloudbrainInferenceJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr("system error", tplModelArtsInferenceJobNew, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting inference task", ctx.Data["MsgID"])
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr("you have already a running or waiting inference task, can not create more", tplModelArtsInferenceJobNew, &form)
return
}
}

//todo: del the codeLocalPath
_, err = ioutil.ReadDir(codeLocalPath)
if err == nil {
@@ -1837,9 +1769,9 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{
Branch: branch_name,
}); err != nil {
log.Error("创建任务失败,服务器超时!: %s (%v)", repo.FullName(), err)
log.Error("Create task failed, server timed out: %s (%v)", repo.FullName(), err)
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr("创建任务失败,服务器超时!", tplModelArtsInferenceJobNew, &form)
ctx.RenderWithErr("Create task failed, server timed out", tplModelArtsInferenceJobNew, &form)
return
}

@@ -1909,7 +1841,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
LogUrl: logObsPath,
PoolID: poolID,
Uuid: uuid,
Parameters: param, //modelarts训练时用到
Parameters: param, //modelarts train parameters
CommitID: commitID,
BranchName: branch_name,
Params: form.Params,
@@ -1925,13 +1857,6 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
ResultUrl: resultObsPath,
}

//将params转换Parameters.Parameter,出错时返回给前端
// var Parameters modelarts.Parameters
// if err := json.Unmarshal([]byte(params), &Parameters); err != nil {
// ctx.ServerError("json.Unmarshal failed:", err)
// return
// }

err = modelarts.GenerateInferenceJob(ctx, req)
if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error())


+ 1
- 1
templates/repo/debugjob/index.tmpl View File

@@ -288,7 +288,7 @@
<!-- 任务名 -->
<div class="four wide column">
<a class="title" href='{{if eq .ComputeResource "CPU/GPU"}}{{$.RepoLink}}/cloudbrain/{{.JobName}}{{else}}{{$.RepoLink}}/modelarts/notebook/{{.JobID}}{{end}}' title="{{.JobName}}" style="font-size: 14px;">
<span class="fitted text_over" style="width: 90%;vertical-align: middle;">{{.JobName}}</span>
<span class="fitted text_over" style="width: 90%;vertical-align: middle;">{{.DisplayJobName}}</span>
</a>
</div>
<div class="two wide column text center">


+ 2
- 2
templates/repo/modelarts/notebook/show.tmpl View File

@@ -16,14 +16,14 @@
</a>
<div class="divider"> / </div>
{{with .task}}
<div class="active section">{{.JobName}}</div>
<div class="active section">{{.DisplayJobName}}</div>
{{end}}
</div>
</h4>
<div>
<div class="ui yellow segment">
{{with .task}}
<p>任务名称: {{.JobName}}</p>
<p>任务名称: {{.DisplayJobName}}</p>
{{end}}
</div>
<div class="ui green segment">


Loading…
Cancel
Save