Browse Source

#2701

update
tags/v1.22.9.1^2^2
chenyifan01 3 years ago
parent
commit
170cbacbf4
5 changed files with 83 additions and 52 deletions
  1. +6
    -2
      models/resource_specification.go
  2. +2
    -0
      modules/auth/cloudbrain.go
  3. +6
    -46
      modules/cloudbrain/cloudbrain.go
  4. +46
    -2
      routers/repo/cloudbrain.go
  5. +23
    -2
      services/cloudbrain/resource/resource_specification.go

+ 6
- 2
models/resource_specification.go View File

@@ -146,6 +146,7 @@ type FindSpecsOptions struct {
ComputeResource string ComputeResource string
Cluster string Cluster string
AiCenterCode string AiCenterCode string
SpecId int64
} }


type Specification struct { type Specification struct {
@@ -315,7 +316,7 @@ func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceS
return sess.Commit() return sess.Commit()
} }


func FindAvailableSpecs(opts FindSpecsOptions) ([]Specification, error) {
func FindAvailableSpecs(opts FindSpecsOptions) ([]*Specification, error) {
var cond = builder.NewCond() var cond = builder.NewCond()
if opts.JobType != "" { if opts.JobType != "" {
cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType}) cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType})
@@ -329,9 +330,12 @@ func FindAvailableSpecs(opts FindSpecsOptions) ([]Specification, error) {
if opts.AiCenterCode != "" { if opts.AiCenterCode != "" {
cond = cond.And(builder.Eq{"resource_queue.ai_center_code": opts.AiCenterCode}) cond = cond.And(builder.Eq{"resource_queue.ai_center_code": opts.AiCenterCode})
} }
if opts.SpecId > 0 {
cond = cond.And(builder.Eq{"resource_specification.id": opts.SpecId})
}
cond = cond.And(builder.Or(builder.Eq{"resource_scene.delete_time": 0}, builder.IsNull{"resource_scene.delete_time"})) cond = cond.And(builder.Or(builder.Eq{"resource_scene.delete_time": 0}, builder.IsNull{"resource_scene.delete_time"}))


r := make([]Specification, 0)
r := make([]*Specification, 0)
err := x.Where(cond). err := x.Where(cond).
Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id"). Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id").
Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id"). Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id").


+ 2
- 0
modules/auth/cloudbrain.go View File

@@ -24,6 +24,7 @@ type CreateCloudBrainForm struct {
Params string `form:"run_para_list"` Params string `form:"run_para_list"`
BranchName string `form:"branch_name"` BranchName string `form:"branch_name"`
DatasetName string `form:"dataset_name"` DatasetName string `form:"dataset_name"`
SpecId int64 `form:"spec_id"`
} }


type CommitImageCloudBrainForm struct { type CommitImageCloudBrainForm struct {
@@ -72,6 +73,7 @@ type CreateCloudBrainInferencForm struct {
CkptName string `form:"ckpt_name" binding:"Required"` CkptName string `form:"ckpt_name" binding:"Required"`
LabelName string `form:"label_names" binding:"Required"` LabelName string `form:"label_names" binding:"Required"`
DatasetName string `form:"dataset_name"` DatasetName string `form:"dataset_name"`
SpecId int64 `form:"spec_id"`
} }


func (f *CreateCloudBrainForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { func (f *CreateCloudBrainForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors {


+ 6
- 46
modules/cloudbrain/cloudbrain.go View File

@@ -79,6 +79,7 @@ type GenerateCloudBrainTaskReq struct {
ModelVersion string ModelVersion string
CkptName string CkptName string
LabelName string LabelName string
Spec *models.Specification
} }


func GetCloudbrainDebugCommand() string { func GetCloudbrainDebugCommand() string {
@@ -227,50 +228,9 @@ func AdminOrImageCreaterRight(ctx *context.Context) {
} }


func GenerateTask(req GenerateCloudBrainTaskReq) error { func GenerateTask(req GenerateCloudBrainTaskReq) error {
var resourceSpec *models.ResourceSpec
var versionCount int var versionCount int
if req.JobType == string(models.JobTypeTrain) { if req.JobType == string(models.JobTypeTrain) {
versionCount = 1 versionCount = 1
if TrainResourceSpecs == nil {
json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs)
}
for _, spec := range TrainResourceSpecs.ResourceSpec {
if req.ResourceSpecId == spec.Id {
resourceSpec = spec
break
}
}
} else if req.JobType == string(models.JobTypeInference) {
if InferenceResourceSpecs == nil {
json.Unmarshal([]byte(setting.InferenceResourceSpecs), &InferenceResourceSpecs)
}
for _, spec := range InferenceResourceSpecs.ResourceSpec {
if req.ResourceSpecId == spec.Id {
resourceSpec = spec
break
}
}

} else {
if ResourceSpecs == nil {
json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs)
}
for _, spec := range ResourceSpecs.ResourceSpec {
if req.ResourceSpecId == spec.Id {
resourceSpec = spec
break
}
}

}
//如果没有匹配到spec信息,尝试从专属资源池获取
if resourceSpec == nil && SpecialPools != nil {
resourceSpec = geMatchResourceSpec(req.JobType, req.GpuQueue, req.ResourceSpecId)
}

if resourceSpec == nil {
log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"])
return errors.New("no such resourceSpec")
} }


volumes := []models.Volume{ volumes := []models.Volume{
@@ -342,7 +302,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
jobResult, err := CreateJob(req.JobName, models.CreateJobParams{ jobResult, err := CreateJob(req.JobName, models.CreateJobParams{
JobName: req.JobName, JobName: req.JobName,
RetryCount: 1, RetryCount: 1,
GpuType: req.GpuQueue,
GpuType: req.Spec.QueueCode,
Image: req.Image, Image: req.Image,
TaskRoles: []models.TaskRole{ TaskRoles: []models.TaskRole{
{ {
@@ -350,10 +310,10 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
TaskNumber: 1, TaskNumber: 1,
MinSucceededTaskCount: 1, MinSucceededTaskCount: 1,
MinFailedTaskCount: 1, MinFailedTaskCount: 1,
CPUNumber: resourceSpec.CpuNum,
GPUNumber: resourceSpec.GpuNum,
MemoryMB: resourceSpec.MemMiB,
ShmMB: resourceSpec.ShareMemMiB,
CPUNumber: req.Spec.CpuCores,
GPUNumber: req.Spec.AccCardsNum,
MemoryMB: int(req.Spec.MemGiB * 1024),
ShmMB: int(req.Spec.ShareMemGiB * 1024),
Command: req.Command, Command: req.Command,
NeedIBDevice: false, NeedIBDevice: false,
IsMainRole: false, IsMainRole: false,


+ 46
- 2
routers/repo/cloudbrain.go View File

@@ -368,6 +368,17 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {


commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName)


spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobType(jobType),
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne})
if err != nil || spec == nil {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("Illegal resource specification", tpl, &form)
return
}

req := cloudbrain.GenerateCloudBrainTaskReq{ req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx, Ctx: ctx,
DisplayJobName: displayJobName, DisplayJobName: displayJobName,
@@ -393,6 +404,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
BenchmarkChildTypeID: 0, BenchmarkChildTypeID: 0,
ResourceSpecId: resourceSpecId, ResourceSpecId: resourceSpecId,
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
Spec: spec,
} }


err = cloudbrain.GenerateTask(req) err = cloudbrain.GenerateTask(req)
@@ -515,7 +527,16 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form)
return return
} }

spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeInference,
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne})
if err != nil || spec == nil {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("Illegal resource specification", tpl, &form)
return
}
req := cloudbrain.GenerateCloudBrainTaskReq{ req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx, Ctx: ctx,
DisplayJobName: displayJobName, DisplayJobName: displayJobName,
@@ -544,6 +565,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra
CkptName: form.CkptName, CkptName: form.CkptName,
TrainUrl: form.TrainUrl, TrainUrl: form.TrainUrl,
LabelName: labelName, LabelName: labelName,
Spec: spec,
} }


err = cloudbrain.GenerateTask(req) err = cloudbrain.GenerateTask(req)
@@ -2453,6 +2475,17 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
return return
} }


spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeBenchmark,
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne})
if err != nil || spec == nil {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("Illegal resource specification", tplCloudBrainBenchmarkNew, &form)
return
}

req := cloudbrain.GenerateCloudBrainTaskReq{ req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx, Ctx: ctx,
DisplayJobName: displayJobName, DisplayJobName: displayJobName,
@@ -2478,6 +2511,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
BenchmarkChildTypeID: benchmarkChildTypeID, BenchmarkChildTypeID: benchmarkChildTypeID,
ResourceSpecId: resourceSpecId, ResourceSpecId: resourceSpecId,
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
Spec: spec,
} }


err = cloudbrain.GenerateTask(req) err = cloudbrain.GenerateTask(req)
@@ -2581,7 +2615,16 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form)
return return
} }

spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{
JobType: models.JobTypeBenchmark,
ComputeResource: models.GPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainOne})
if err != nil || spec == nil {
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("Illegal resource specification", tpl, &form)
return
}
req := cloudbrain.GenerateCloudBrainTaskReq{ req := cloudbrain.GenerateCloudBrainTaskReq{
Ctx: ctx, Ctx: ctx,
DisplayJobName: displayJobName, DisplayJobName: displayJobName,
@@ -2607,6 +2650,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
BenchmarkChildTypeID: benchmarkChildTypeID, BenchmarkChildTypeID: benchmarkChildTypeID,
ResourceSpecId: resourceSpecId, ResourceSpecId: resourceSpecId,
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
Spec: spec,
} }


err = cloudbrain.GenerateTask(req) err = cloudbrain.GenerateTask(req)


+ 23
- 2
services/cloudbrain/resource/resource_specification.go View File

@@ -185,18 +185,23 @@ func AddSpecOperateLog(doerId int64, operateType string, newValue, oldValue *mod
}) })
} }


func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]models.Specification, error) {
func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]*models.Specification, error) {
r, err := models.FindAvailableSpecs(opts) r, err := models.FindAvailableSpecs(opts)
if err != nil { if err != nil {
log.Error("FindAvailableSpecs error.%v", err) log.Error("FindAvailableSpecs error.%v", err)
return nil, err return nil, err
} }
specs := make([]models.Specification, 0, len(r))
specs := make([]*models.Specification, 0, len(r))
specMap := make(map[int64]string, 0)
//filter exclusive spec //filter exclusive spec
for i := 0; i < len(r); i++ { for i := 0; i < len(r); i++ {
spec := r[i] spec := r[i]
if _, has := specMap[spec.ID]; has {
continue
}
if !spec.IsExclusive { if !spec.IsExclusive {
specs = append(specs, spec) specs = append(specs, spec)
specMap[spec.ID] = ""
continue continue
} }
orgs := strings.Split(spec.ExclusiveOrg, ";") orgs := strings.Split(spec.ExclusiveOrg, ";")
@@ -204,8 +209,24 @@ func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]models.Sp
isMember, _ := models.IsOrganizationMemberByOrgName(org, userId) isMember, _ := models.IsOrganizationMemberByOrgName(org, userId)
if isMember { if isMember {
specs = append(specs, spec) specs = append(specs, spec)
specMap[spec.ID] = ""
} }
} }
} }
return specs, err return specs, err
} }

func GetAndCheckSpec(userId int64, specId int64, opts models.FindSpecsOptions) (*models.Specification, error) {
if specId == 0 {
return nil, nil
}
opts.SpecId = specId
r, err := FindAvailableSpecs(userId, opts)
if err != nil {
return nil, err
}
if r == nil || len(r) == 0 {
return nil, nil
}
return r[0], nil
}

Loading…
Cancel
Save