From 170cbacbf48e2b28b424060eba4ddb4bdc7d7de6 Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Fri, 19 Aug 2022 18:02:51 +0800 Subject: [PATCH] #2701 update --- models/resource_specification.go | 8 ++- modules/auth/cloudbrain.go | 2 + modules/cloudbrain/cloudbrain.go | 52 +++---------------- routers/repo/cloudbrain.go | 48 ++++++++++++++++- .../resource/resource_specification.go | 25 ++++++++- 5 files changed, 83 insertions(+), 52 deletions(-) diff --git a/models/resource_specification.go b/models/resource_specification.go index 60e59b253..8ef95a8da 100644 --- a/models/resource_specification.go +++ b/models/resource_specification.go @@ -146,6 +146,7 @@ type FindSpecsOptions struct { ComputeResource string Cluster string AiCenterCode string + SpecId int64 } type Specification struct { @@ -315,7 +316,7 @@ func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceS return sess.Commit() } -func FindAvailableSpecs(opts FindSpecsOptions) ([]Specification, error) { +func FindAvailableSpecs(opts FindSpecsOptions) ([]*Specification, error) { var cond = builder.NewCond() if opts.JobType != "" { cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType}) @@ -329,9 +330,12 @@ func FindAvailableSpecs(opts FindSpecsOptions) ([]Specification, error) { if opts.AiCenterCode != "" { cond = cond.And(builder.Eq{"resource_queue.ai_center_code": opts.AiCenterCode}) } + if opts.SpecId > 0 { + cond = cond.And(builder.Eq{"resource_specification.id": opts.SpecId}) + } cond = cond.And(builder.Or(builder.Eq{"resource_scene.delete_time": 0}, builder.IsNull{"resource_scene.delete_time"})) - r := make([]Specification, 0) + r := make([]*Specification, 0) err := x.Where(cond). Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id"). Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id"). diff --git a/modules/auth/cloudbrain.go b/modules/auth/cloudbrain.go index 39685990d..5bd294f2a 100755 --- a/modules/auth/cloudbrain.go +++ b/modules/auth/cloudbrain.go @@ -24,6 +24,7 @@ type CreateCloudBrainForm struct { Params string `form:"run_para_list"` BranchName string `form:"branch_name"` DatasetName string `form:"dataset_name"` + SpecId int64 `form:"spec_id"` } type CommitImageCloudBrainForm struct { @@ -72,6 +73,7 @@ type CreateCloudBrainInferencForm struct { CkptName string `form:"ckpt_name" binding:"Required"` LabelName string `form:"label_names" binding:"Required"` DatasetName string `form:"dataset_name"` + SpecId int64 `form:"spec_id"` } func (f *CreateCloudBrainForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 1872375da..03b73e559 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -79,6 +79,7 @@ type GenerateCloudBrainTaskReq struct { ModelVersion string CkptName string LabelName string + Spec *models.Specification } func GetCloudbrainDebugCommand() string { @@ -227,50 +228,9 @@ func AdminOrImageCreaterRight(ctx *context.Context) { } func GenerateTask(req GenerateCloudBrainTaskReq) error { - var resourceSpec *models.ResourceSpec var versionCount int if req.JobType == string(models.JobTypeTrain) { versionCount = 1 - if TrainResourceSpecs == nil { - json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) - } - for _, spec := range TrainResourceSpecs.ResourceSpec { - if req.ResourceSpecId == spec.Id { - resourceSpec = spec - break - } - } - } else if req.JobType == string(models.JobTypeInference) { - if InferenceResourceSpecs == nil { - json.Unmarshal([]byte(setting.InferenceResourceSpecs), &InferenceResourceSpecs) - } - for _, spec := range InferenceResourceSpecs.ResourceSpec { - if req.ResourceSpecId == spec.Id { - resourceSpec = spec - break - } - } - - } else { - if ResourceSpecs == nil { - json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) - } - for _, spec := range ResourceSpecs.ResourceSpec { - if req.ResourceSpecId == spec.Id { - resourceSpec = spec - break - } - } - - } - //如果没有匹配到spec信息,尝试从专属资源池获取 - if resourceSpec == nil && SpecialPools != nil { - resourceSpec = geMatchResourceSpec(req.JobType, req.GpuQueue, req.ResourceSpecId) - } - - if resourceSpec == nil { - log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"]) - return errors.New("no such resourceSpec") } volumes := []models.Volume{ @@ -342,7 +302,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { jobResult, err := CreateJob(req.JobName, models.CreateJobParams{ JobName: req.JobName, RetryCount: 1, - GpuType: req.GpuQueue, + GpuType: req.Spec.QueueCode, Image: req.Image, TaskRoles: []models.TaskRole{ { @@ -350,10 +310,10 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { TaskNumber: 1, MinSucceededTaskCount: 1, MinFailedTaskCount: 1, - CPUNumber: resourceSpec.CpuNum, - GPUNumber: resourceSpec.GpuNum, - MemoryMB: resourceSpec.MemMiB, - ShmMB: resourceSpec.ShareMemMiB, + CPUNumber: req.Spec.CpuCores, + GPUNumber: req.Spec.AccCardsNum, + MemoryMB: int(req.Spec.MemGiB * 1024), + ShmMB: int(req.Spec.ShareMemGiB * 1024), Command: req.Command, NeedIBDevice: false, IsMainRole: false, diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 6423960ab..ec7ab858c 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -368,6 +368,17 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobType(jobType), + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne}) + if err != nil || spec == nil { + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("Illegal resource specification", tpl, &form) + return + } + req := cloudbrain.GenerateCloudBrainTaskReq{ Ctx: ctx, DisplayJobName: displayJobName, @@ -393,6 +404,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { BenchmarkChildTypeID: 0, ResourceSpecId: resourceSpecId, ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), + Spec: spec, } err = cloudbrain.GenerateTask(req) @@ -515,7 +527,16 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } - + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeInference, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne}) + if err != nil || spec == nil { + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("Illegal resource specification", tpl, &form) + return + } req := cloudbrain.GenerateCloudBrainTaskReq{ Ctx: ctx, DisplayJobName: displayJobName, @@ -544,6 +565,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra CkptName: form.CkptName, TrainUrl: form.TrainUrl, LabelName: labelName, + Spec: spec, } err = cloudbrain.GenerateTask(req) @@ -2453,6 +2475,17 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo return } + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeBenchmark, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne}) + if err != nil || spec == nil { + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("Illegal resource specification", tplCloudBrainBenchmarkNew, &form) + return + } + req := cloudbrain.GenerateCloudBrainTaskReq{ Ctx: ctx, DisplayJobName: displayJobName, @@ -2478,6 +2511,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo BenchmarkChildTypeID: benchmarkChildTypeID, ResourceSpecId: resourceSpecId, ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), + Spec: spec, } err = cloudbrain.GenerateTask(req) @@ -2581,7 +2615,16 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) return } - + spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ + JobType: models.JobTypeBenchmark, + ComputeResource: models.GPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainOne}) + if err != nil || spec == nil { + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("Illegal resource specification", tpl, &form) + return + } req := cloudbrain.GenerateCloudBrainTaskReq{ Ctx: ctx, DisplayJobName: displayJobName, @@ -2607,6 +2650,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) BenchmarkChildTypeID: benchmarkChildTypeID, ResourceSpecId: resourceSpecId, ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), + Spec: spec, } err = cloudbrain.GenerateTask(req) diff --git a/services/cloudbrain/resource/resource_specification.go b/services/cloudbrain/resource/resource_specification.go index 31c8b3b25..db104a9ac 100644 --- a/services/cloudbrain/resource/resource_specification.go +++ b/services/cloudbrain/resource/resource_specification.go @@ -185,18 +185,23 @@ func AddSpecOperateLog(doerId int64, operateType string, newValue, oldValue *mod }) } -func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]models.Specification, error) { +func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]*models.Specification, error) { r, err := models.FindAvailableSpecs(opts) if err != nil { log.Error("FindAvailableSpecs error.%v", err) return nil, err } - specs := make([]models.Specification, 0, len(r)) + specs := make([]*models.Specification, 0, len(r)) + specMap := make(map[int64]string, 0) //filter exclusive spec for i := 0; i < len(r); i++ { spec := r[i] + if _, has := specMap[spec.ID]; has { + continue + } if !spec.IsExclusive { specs = append(specs, spec) + specMap[spec.ID] = "" continue } orgs := strings.Split(spec.ExclusiveOrg, ";") @@ -204,8 +209,24 @@ func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]models.Sp isMember, _ := models.IsOrganizationMemberByOrgName(org, userId) if isMember { specs = append(specs, spec) + specMap[spec.ID] = "" } } } return specs, err } + +func GetAndCheckSpec(userId int64, specId int64, opts models.FindSpecsOptions) (*models.Specification, error) { + if specId == 0 { + return nil, nil + } + opts.SpecId = specId + r, err := FindAvailableSpecs(userId, opts) + if err != nil { + return nil, err + } + if r == nil || len(r) == 0 { + return nil, nil + } + return r[0], nil +}