Browse Source

fix bug

tags/v1.22.9.1^2
lewis 3 years ago
parent
commit
586e27d2fb
2 changed files with 24 additions and 29 deletions
  1. +3
    -3
      models/cloudbrain.go
  2. +21
    -26
      routers/repo/modelarts.go

+ 3
- 3
models/cloudbrain.go View File

@@ -1493,7 +1493,7 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
if (opts.Cluster) != "" {
if opts.Cluster == "resource_cluster_openi" {
cond = cond.And(
builder.Or(builder.Eq{"cloudbrain.type": TypeCloudBrainOne}, builder.Eq{"cloudbrain.type": TypeCloudBrainTwo}),
builder.Or(builder.Eq{"cloudbrain.type": TypeCloudBrainOne}, builder.Eq{"cloudbrain.type": TypeCloudBrainTwo}, builder.Eq{"cloudbrain.type": TypeCDCenter}),
)
}
if opts.Cluster == "resource_cluster_c2net" {
@@ -1944,9 +1944,9 @@ func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTy
return sess.Count(new(Cloudbrain))
}

func GetCloudbrainNotebookCountByUserID(userID int64, typeCloudbrain int) (int, error) {
func GetCloudbrainNotebookCountByUserID(userID int64) (int, error) {
count, err := x.In("status", ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsRestarting).
And("job_type = ? and user_id = ? and type = ?", JobTypeDebug, userID, typeCloudbrain).Count(new(Cloudbrain))
And("job_type = ? and user_id = ? and type in (?,?)", JobTypeDebug, userID, TypeCloudBrainTwo, TypeCDCenter).Count(new(Cloudbrain))
return int(count), err
}



+ 21
- 26
routers/repo/modelarts.go View File

@@ -151,12 +151,7 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm
imageId := form.ImageId
repo := ctx.Repo.Repository

typeCloudbrain := models.TypeCloudBrainTwo
if setting.ModelartsCD.Enabled {
typeCloudbrain = models.TypeCDCenter
}

count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID, typeCloudbrain)
count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
notebookNewDataPrepare(ctx)
@@ -381,7 +376,7 @@ func NotebookRestart(ctx *context.Context) {
break
}

count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID, task.Type)
count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
errorMsg = "system error"
@@ -713,7 +708,7 @@ func trainJobNewDataPrepare(ctx *context.Context) error {
ctx.Data["WaitCount"] = waitCount

setMultiNodeIfConfigureMatch(ctx)
return nil
}

@@ -1079,8 +1074,8 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
VersionCount := modelarts.VersionCountOne
EngineName := form.EngineName

errStr:=checkMultiNode(ctx.User.ID,form.WorkServerNumber)
if errStr!=""{
errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber)
if errStr != "" {
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobNew, &form)
return
@@ -1320,31 +1315,31 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
}

func checkMultiNode(userId int64, serverNum int) string{
if serverNum==1{
func checkMultiNode(userId int64, serverNum int) string {
if serverNum == 1 {
return ""
}
modelarts.InitMultiNode()
var isServerNumValid=false
var isServerNumValid = false
if modelarts.MultiNodeConfig != nil {
for _, info := range modelarts.MultiNodeConfig.Info {
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, userId); isInOrg {
if isInNodes(info.Node,serverNum){
isServerNumValid=true
if isInNodes(info.Node, serverNum) {
isServerNumValid = true
break
}

}
}
}
if isServerNumValid{
if isServerNumValid {
return ""
}else{
} else {
return "repo.modelarts.no_node_right"
}
}
func checkInferenceJobMultiNode(userId int64, serverNum int) string{
if serverNum==1{
func checkInferenceJobMultiNode(userId int64, serverNum int) string {
if serverNum == 1 {
return ""
}

@@ -1353,8 +1348,8 @@ func checkInferenceJobMultiNode(userId int64, serverNum int) string{
}

func isInNodes(nodes []int, num int) bool {
for _, node:=range nodes{
if node==num{
for _, node := range nodes {
if node == num {
return true
}
}
@@ -1396,8 +1391,8 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
ctx.Data["PageIsTrainJob"] = true
var jobID = ctx.Params(":jobid")

errStr:=checkMultiNode(ctx.User.ID,form.WorkServerNumber)
if errStr!=""{
errStr := checkMultiNode(ctx.User.ID, form.WorkServerNumber)
if errStr != "" {
versionErrorDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobVersionNew, &form)
return
@@ -1738,7 +1733,7 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error {
log.Error("the boot file(%s) must be a python file", strings.TrimSpace(form.BootFile))
return errors.New("启动文件必须是python文件")
}
if form.BranchName == "" {
log.Error("the branch must not be null!", form.BranchName)
return errors.New("代码分支不能为空!")
@@ -2037,8 +2032,8 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
ckptUrl := "/" + form.TrainUrl + form.CkptName
log.Info("ckpt url:" + ckptUrl)

errStr:=checkInferenceJobMultiNode(ctx.User.ID,form.WorkServerNumber)
if errStr!=""{
errStr := checkInferenceJobMultiNode(ctx.User.ID, form.WorkServerNumber)
if errStr != "" {
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsInferenceJobNew, &form)
return


Loading…
Cancel
Save