Browse Source

提交代码

tags/v1.22.9.1^2
ychao_1983 3 years ago
parent
commit
af0bada5d3
3 changed files with 68 additions and 3 deletions
  1. +1
    -0
      options/locale/locale_en-US.ini
  2. +1
    -0
      options/locale/locale_zh-CN.ini
  3. +66
    -3
      routers/repo/modelarts.go

+ 1
- 0
options/locale/locale_en-US.ini View File

@@ -1213,6 +1213,7 @@ modelarts.infer_job.select_model = Select Model
modelarts.infer_job.boot_file_helper=The startup file is the entry file for your program execution and must end in.py.Such as inference.py, main.py, example/inference.py, case/main.py.
modelarts.infer_job.tooltip = The model has been deleted and cannot be viewed.
modelarts.download_log=Download log file
modelarts.no_node_right = The value of 'Amount of Compute Node' is wrong, you have no right to use the current value of 'Amount of Compute Node'.


debug_task_not_created = Debug task has not been created


+ 1
- 0
options/locale/locale_zh-CN.ini View File

@@ -1226,6 +1226,7 @@ modelarts.infer_job.select_model = 选择模型
modelarts.infer_job.boot_file_helper=启动文件是您程序执行的入口文件,必须是以.py结尾的文件。比如inference.py、main.py、example/inference.py、case/main.py。
modelarts.infer_job.tooltip = 该模型已删除,无法查看。
modelarts.download_log=下载日志文件
modelarts.no_node_right = 计算节点数的值配置错误,您没有权限使用当前配置的计算节点数。


debug_task_not_created = 未创建过调试任务


+ 66
- 3
routers/repo/modelarts.go View File

@@ -1130,6 +1130,13 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
VersionCount := modelarts.VersionCountOne
EngineName := form.EngineName

errStr:=checkMultiNode(ctx.User.ID,form.WorkServerNumber)
if errStr!=""{
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobNew, &form)
return
}

count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
@@ -1160,7 +1167,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
return
}

errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain))
errStr = checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain))
if errStr != "" {
trainJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobNew, &form)
@@ -1364,6 +1371,48 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job")
}

func checkMultiNode(userId int64, serverNum int) string{
if serverNum==1{
return ""
}
modelarts.InitMultiNode()
var isServerNumValid=false
if modelarts.MultiNodeConfig != nil {
for _, info := range modelarts.MultiNodeConfig.Info {
if isInOrg, _ := models.IsOrganizationMemberByOrgName(info.Org, userId); isInOrg {
if isInNodes(info.Node,serverNum){
isServerNumValid=true
break
}

}
}
}
if isServerNumValid{
return ""
}else{
return "repo.modelarts.no_node_right"
}
}
func checkInferenceJobMultiNode(userId int64, serverNum int) string{
if serverNum==1{
return ""
}

return "repo.modelarts.no_node_right"

}

func isInNodes(nodes []int, num int) bool {
for _, node:=range nodes{
if node==num{
return true
}
}
return false

}

func getUserCommand(engineId int, req *modelarts.GenerateTrainJobReq) (string, string) {
userImageUrl := ""
userCommand := ""
@@ -1398,6 +1447,13 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
ctx.Data["PageIsTrainJob"] = true
var jobID = ctx.Params(":jobid")

errStr:=checkMultiNode(ctx.User.ID,form.WorkServerNumber)
if errStr!=""{
versionErrorDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobVersionNew, &form)
return
}

count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainTrainJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
@@ -1465,7 +1521,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
return
}

errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain))
errStr = checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeTrain))
if errStr != "" {
versionErrorDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsTrainJobVersionNew, &form)
@@ -2036,6 +2092,13 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
ckptUrl := "/" + form.TrainUrl + form.CkptName
log.Info("ckpt url:" + ckptUrl)

errStr:=checkInferenceJobMultiNode(ctx.User.ID,form.WorkServerNumber)
if errStr!=""{
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsInferenceJobNew, &form)
return
}

count, err := models.GetCloudbrainInferenceJobCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"])
@@ -2084,7 +2147,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
}
}

errStr := checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeInference))
errStr = checkModelArtsSpecialPool(ctx, flavorCode, string(models.JobTypeInference))
if errStr != "" {
inferenceJobErrorNewDataPrepare(ctx, form)
ctx.RenderWithErr(ctx.Tr(errStr), tplModelArtsInferenceJobNew, &form)


Loading…
Cancel
Save