@@ -146,6 +146,7 @@ type FindSpecsOptions struct { | |||
ComputeResource string | |||
Cluster string | |||
AiCenterCode string | |||
SpecId int64 | |||
} | |||
type Specification struct { | |||
@@ -315,7 +316,7 @@ func SyncGrampusSpecs(updateList []ResourceSpecification, insertList []ResourceS | |||
return sess.Commit() | |||
} | |||
func FindAvailableSpecs(opts FindSpecsOptions) ([]Specification, error) { | |||
func FindAvailableSpecs(opts FindSpecsOptions) ([]*Specification, error) { | |||
var cond = builder.NewCond() | |||
if opts.JobType != "" { | |||
cond = cond.And(builder.Eq{"resource_scene.job_type": opts.JobType}) | |||
@@ -329,9 +330,12 @@ func FindAvailableSpecs(opts FindSpecsOptions) ([]Specification, error) { | |||
if opts.AiCenterCode != "" { | |||
cond = cond.And(builder.Eq{"resource_queue.ai_center_code": opts.AiCenterCode}) | |||
} | |||
if opts.SpecId > 0 { | |||
cond = cond.And(builder.Eq{"resource_specification.id": opts.SpecId}) | |||
} | |||
cond = cond.And(builder.Or(builder.Eq{"resource_scene.delete_time": 0}, builder.IsNull{"resource_scene.delete_time"})) | |||
r := make([]Specification, 0) | |||
r := make([]*Specification, 0) | |||
err := x.Where(cond). | |||
Join("INNER", "resource_scene_spec", "resource_scene_spec.spec_id = resource_specification.id"). | |||
Join("INNER", "resource_scene", "resource_scene_spec.scene_id = resource_scene.id"). | |||
@@ -24,6 +24,7 @@ type CreateCloudBrainForm struct { | |||
Params string `form:"run_para_list"` | |||
BranchName string `form:"branch_name"` | |||
DatasetName string `form:"dataset_name"` | |||
SpecId int64 `form:"spec_id"` | |||
} | |||
type CommitImageCloudBrainForm struct { | |||
@@ -72,6 +73,7 @@ type CreateCloudBrainInferencForm struct { | |||
CkptName string `form:"ckpt_name" binding:"Required"` | |||
LabelName string `form:"label_names" binding:"Required"` | |||
DatasetName string `form:"dataset_name"` | |||
SpecId int64 `form:"spec_id"` | |||
} | |||
func (f *CreateCloudBrainForm) Validate(ctx *macaron.Context, errs binding.Errors) binding.Errors { | |||
@@ -79,6 +79,7 @@ type GenerateCloudBrainTaskReq struct { | |||
ModelVersion string | |||
CkptName string | |||
LabelName string | |||
Spec *models.Specification | |||
} | |||
func GetCloudbrainDebugCommand() string { | |||
@@ -227,50 +228,9 @@ func AdminOrImageCreaterRight(ctx *context.Context) { | |||
} | |||
func GenerateTask(req GenerateCloudBrainTaskReq) error { | |||
var resourceSpec *models.ResourceSpec | |||
var versionCount int | |||
if req.JobType == string(models.JobTypeTrain) { | |||
versionCount = 1 | |||
if TrainResourceSpecs == nil { | |||
json.Unmarshal([]byte(setting.TrainResourceSpecs), &TrainResourceSpecs) | |||
} | |||
for _, spec := range TrainResourceSpecs.ResourceSpec { | |||
if req.ResourceSpecId == spec.Id { | |||
resourceSpec = spec | |||
break | |||
} | |||
} | |||
} else if req.JobType == string(models.JobTypeInference) { | |||
if InferenceResourceSpecs == nil { | |||
json.Unmarshal([]byte(setting.InferenceResourceSpecs), &InferenceResourceSpecs) | |||
} | |||
for _, spec := range InferenceResourceSpecs.ResourceSpec { | |||
if req.ResourceSpecId == spec.Id { | |||
resourceSpec = spec | |||
break | |||
} | |||
} | |||
} else { | |||
if ResourceSpecs == nil { | |||
json.Unmarshal([]byte(setting.ResourceSpecs), &ResourceSpecs) | |||
} | |||
for _, spec := range ResourceSpecs.ResourceSpec { | |||
if req.ResourceSpecId == spec.Id { | |||
resourceSpec = spec | |||
break | |||
} | |||
} | |||
} | |||
//如果没有匹配到spec信息,尝试从专属资源池获取 | |||
if resourceSpec == nil && SpecialPools != nil { | |||
resourceSpec = geMatchResourceSpec(req.JobType, req.GpuQueue, req.ResourceSpecId) | |||
} | |||
if resourceSpec == nil { | |||
log.Error("no such resourceSpecId(%d)", req.ResourceSpecId, req.Ctx.Data["MsgID"]) | |||
return errors.New("no such resourceSpec") | |||
} | |||
volumes := []models.Volume{ | |||
@@ -342,7 +302,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { | |||
jobResult, err := CreateJob(req.JobName, models.CreateJobParams{ | |||
JobName: req.JobName, | |||
RetryCount: 1, | |||
GpuType: req.GpuQueue, | |||
GpuType: req.Spec.QueueCode, | |||
Image: req.Image, | |||
TaskRoles: []models.TaskRole{ | |||
{ | |||
@@ -350,10 +310,10 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { | |||
TaskNumber: 1, | |||
MinSucceededTaskCount: 1, | |||
MinFailedTaskCount: 1, | |||
CPUNumber: resourceSpec.CpuNum, | |||
GPUNumber: resourceSpec.GpuNum, | |||
MemoryMB: resourceSpec.MemMiB, | |||
ShmMB: resourceSpec.ShareMemMiB, | |||
CPUNumber: req.Spec.CpuCores, | |||
GPUNumber: req.Spec.AccCardsNum, | |||
MemoryMB: int(req.Spec.MemGiB * 1024), | |||
ShmMB: int(req.Spec.ShareMemGiB * 1024), | |||
Command: req.Command, | |||
NeedIBDevice: false, | |||
IsMainRole: false, | |||
@@ -368,6 +368,17 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
commitID, _ := ctx.Repo.GitRepo.GetBranchCommitID(branchName) | |||
spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ | |||
JobType: models.JobType(jobType), | |||
ComputeResource: models.GPU, | |||
Cluster: models.OpenICluster, | |||
AiCenterCode: models.AICenterOfCloudBrainOne}) | |||
if err != nil || spec == nil { | |||
cloudBrainNewDataPrepare(ctx) | |||
ctx.RenderWithErr("Illegal resource specification", tpl, &form) | |||
return | |||
} | |||
req := cloudbrain.GenerateCloudBrainTaskReq{ | |||
Ctx: ctx, | |||
DisplayJobName: displayJobName, | |||
@@ -393,6 +404,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||
BenchmarkChildTypeID: 0, | |||
ResourceSpecId: resourceSpecId, | |||
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), | |||
Spec: spec, | |||
} | |||
err = cloudbrain.GenerateTask(req) | |||
@@ -515,7 +527,16 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) | |||
return | |||
} | |||
spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ | |||
JobType: models.JobTypeInference, | |||
ComputeResource: models.GPU, | |||
Cluster: models.OpenICluster, | |||
AiCenterCode: models.AICenterOfCloudBrainOne}) | |||
if err != nil || spec == nil { | |||
cloudBrainNewDataPrepare(ctx) | |||
ctx.RenderWithErr("Illegal resource specification", tpl, &form) | |||
return | |||
} | |||
req := cloudbrain.GenerateCloudBrainTaskReq{ | |||
Ctx: ctx, | |||
DisplayJobName: displayJobName, | |||
@@ -544,6 +565,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||
CkptName: form.CkptName, | |||
TrainUrl: form.TrainUrl, | |||
LabelName: labelName, | |||
Spec: spec, | |||
} | |||
err = cloudbrain.GenerateTask(req) | |||
@@ -2453,6 +2475,17 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
return | |||
} | |||
spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ | |||
JobType: models.JobTypeBenchmark, | |||
ComputeResource: models.GPU, | |||
Cluster: models.OpenICluster, | |||
AiCenterCode: models.AICenterOfCloudBrainOne}) | |||
if err != nil || spec == nil { | |||
cloudBrainNewDataPrepare(ctx) | |||
ctx.RenderWithErr("Illegal resource specification", tplCloudBrainBenchmarkNew, &form) | |||
return | |||
} | |||
req := cloudbrain.GenerateCloudBrainTaskReq{ | |||
Ctx: ctx, | |||
DisplayJobName: displayJobName, | |||
@@ -2478,6 +2511,7 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo | |||
BenchmarkChildTypeID: benchmarkChildTypeID, | |||
ResourceSpecId: resourceSpecId, | |||
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), | |||
Spec: spec, | |||
} | |||
err = cloudbrain.GenerateTask(req) | |||
@@ -2581,7 +2615,16 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) | |||
ctx.RenderWithErr(ctx.Tr("cloudbrain.error.dataset_select"), tpl, &form) | |||
return | |||
} | |||
spec, err := resource.GetAndCheckSpec(ctx.User.ID, form.SpecId, models.FindSpecsOptions{ | |||
JobType: models.JobTypeBenchmark, | |||
ComputeResource: models.GPU, | |||
Cluster: models.OpenICluster, | |||
AiCenterCode: models.AICenterOfCloudBrainOne}) | |||
if err != nil || spec == nil { | |||
cloudBrainNewDataPrepare(ctx) | |||
ctx.RenderWithErr("Illegal resource specification", tpl, &form) | |||
return | |||
} | |||
req := cloudbrain.GenerateCloudBrainTaskReq{ | |||
Ctx: ctx, | |||
DisplayJobName: displayJobName, | |||
@@ -2607,6 +2650,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) | |||
BenchmarkChildTypeID: benchmarkChildTypeID, | |||
ResourceSpecId: resourceSpecId, | |||
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), | |||
Spec: spec, | |||
} | |||
err = cloudbrain.GenerateTask(req) | |||
@@ -185,18 +185,23 @@ func AddSpecOperateLog(doerId int64, operateType string, newValue, oldValue *mod | |||
}) | |||
} | |||
func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]models.Specification, error) { | |||
func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]*models.Specification, error) { | |||
r, err := models.FindAvailableSpecs(opts) | |||
if err != nil { | |||
log.Error("FindAvailableSpecs error.%v", err) | |||
return nil, err | |||
} | |||
specs := make([]models.Specification, 0, len(r)) | |||
specs := make([]*models.Specification, 0, len(r)) | |||
specMap := make(map[int64]string, 0) | |||
//filter exclusive spec | |||
for i := 0; i < len(r); i++ { | |||
spec := r[i] | |||
if _, has := specMap[spec.ID]; has { | |||
continue | |||
} | |||
if !spec.IsExclusive { | |||
specs = append(specs, spec) | |||
specMap[spec.ID] = "" | |||
continue | |||
} | |||
orgs := strings.Split(spec.ExclusiveOrg, ";") | |||
@@ -204,8 +209,24 @@ func FindAvailableSpecs(userId int64, opts models.FindSpecsOptions) ([]models.Sp | |||
isMember, _ := models.IsOrganizationMemberByOrgName(org, userId) | |||
if isMember { | |||
specs = append(specs, spec) | |||
specMap[spec.ID] = "" | |||
} | |||
} | |||
} | |||
return specs, err | |||
} | |||
func GetAndCheckSpec(userId int64, specId int64, opts models.FindSpecsOptions) (*models.Specification, error) { | |||
if specId == 0 { | |||
return nil, nil | |||
} | |||
opts.SpecId = specId | |||
r, err := FindAvailableSpecs(userId, opts) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if r == nil || len(r) == 0 { | |||
return nil, nil | |||
} | |||
return r[0], nil | |||
} |