diff --git a/models/resource_specification.go b/models/resource_specification.go index b2eadf2a1..f7354303e 100644 --- a/models/resource_specification.go +++ b/models/resource_specification.go @@ -2,6 +2,7 @@ package models import ( "code.gitea.io/gitea/modules/timeutil" + "fmt" "xorm.io/builder" ) @@ -423,6 +424,10 @@ func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specifi return nil, err } sess.Commit() + return BuildSpecification(queue, spec), nil +} + +func BuildSpecification(queue ResourceQueue, spec ResourceSpecification) *Specification { return &Specification{ ID: spec.ID, SourceSpecId: spec.SourceSpecId, @@ -439,7 +444,7 @@ func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specifi Cluster: queue.Cluster, AiCenterCode: queue.AiCenterCode, AiCenterName: queue.AiCenterName, - }, nil + } } func GetCloudbrainOneAccCardType(queueCode string) string { @@ -454,3 +459,69 @@ func GetCloudbrainOneAccCardType(queueCode string) string { } return "" } + +var cloudbrainTwoSpecsInitFlag = false +var cloudbrainTwoSpecs map[string]*Specification + +func GetCloudbrainTwoSpecs() (map[string]*Specification, error) { + if !cloudbrainTwoSpecsInitFlag { + r, err := InitCloudbrainTwoSpecs() + if err != nil { + return nil, err + } + cloudbrainTwoSpecsInitFlag = true + cloudbrainTwoSpecs = r + } + return cloudbrainTwoSpecs, nil +} + +func InitCloudbrainTwoSpecs() (map[string]*Specification, error) { + r := make(map[string]*Specification, 0) + + queue, err := GetResourceQueue(&ResourceQueue{QueueCode: "openisupport"}) + if err != nil { + return nil, err + } + if queue == nil { + queue = &ResourceQueue{ + QueueCode: "openisupport", + Cluster: OpenICluster, + AiCenterCode: AICenterOfCloudBrainTwo, + AiCenterName: "云脑二", + ComputeResource: NPU, + AccCardType: "ASCEND910", + Remark: "处理历史云脑任务时自动生成", + } + _, err = x.InsertOne(queue) + if err != nil { + return nil, err + } + } + for i := 1; i <= 8; i = i * 2 { + sourceSpecId := "modelarts.bm.910.arm.public." + fmt.Sprint(i) + spec, err := GetResourceSpecification(&ResourceSpecification{ + SourceSpecId: sourceSpecId, + QueueId: queue.ID, + }) + if err != nil { + return nil, err + } + if spec == nil { + spec = &ResourceSpecification{ + QueueId: queue.ID, + SourceSpecId: sourceSpecId, + AccCardsNum: i, + CpuCores: i * 24, + MemGiB: float32(i * 256), + GPUMemGiB: float32(32), + Status: SpecOffShelf, + } + _, err = x.Insert(spec) + if err != nil { + return nil, err + } + } + r[sourceSpecId] = BuildSpecification(*queue, *spec) + } + return r, nil +} diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index f764a2dae..0741241fa 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -917,14 +917,7 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { } ctx.Data["engine_versions"] = versionInfos.Version - var flavorInfos modelarts.Flavor - if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { - ctx.ServerError("json.Unmarshal failed:", err) - return err - } - ctx.Data["flavor_infos"] = flavorInfos.Info - - setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain)) + prepareCloudbrainTwoTrainSpecs(ctx) var Parameters modelarts.Parameters if err = json.Unmarshal([]byte(task.Parameters), &Parameters); err != nil { diff --git a/services/cloudbrain/resource/resource_specification.go b/services/cloudbrain/resource/resource_specification.go index bab0d3096..4107d6345 100644 --- a/services/cloudbrain/resource/resource_specification.go +++ b/services/cloudbrain/resource/resource_specification.go @@ -5,12 +5,14 @@ import ( "code.gitea.io/gitea/modules/cloudbrain" "code.gitea.io/gitea/modules/grampus" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/modelarts" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/routers/response" "code.gitea.io/gitea/services/admin/operate_log" "encoding/json" "errors" "fmt" + "strconv" "strings" "time" ) @@ -296,6 +298,7 @@ func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) { continue } success++ + time.Sleep(500 * time.Millisecond) } } else { @@ -318,13 +321,13 @@ func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) { break } for _, task := range list { - time.Sleep(1 * time.Second) err = RefreshOneHistorySpec(task) if err != nil { log.Error("RefreshOneHistorySpec error.%v", err) continue } success++ + time.Sleep(500 * time.Millisecond) } if len(list) < pageSize { log.Info("RefreshHistorySpec. list < pageSize") @@ -342,6 +345,8 @@ func RefreshOneHistorySpec(task *models.Cloudbrain) error { switch task.Type { case models.TypeCloudBrainOne: spec, err = getCloudbrainOneSpec(task) + case models.TypeCloudBrainTwo: + spec, err = getCloudbrainTwoSpec(task) } if err != nil { log.Error("find spec error,task.ID=%d err=%v", task.ID, err) @@ -506,6 +511,37 @@ func getCloudbrainOneSpec(task *models.Cloudbrain) (*models.Specification, error } +func getCloudbrainTwoSpec(task *models.Cloudbrain) (*models.Specification, error) { + specMap, err := models.GetCloudbrainTwoSpecs() + if err != nil { + log.Error("InitCloudbrainTwoSpecs err.%v", err) + return nil, err + } + if task.FlavorCode != "" { + return specMap[task.FlavorCode], nil + } + if task.JobType == string(models.JobTypeDebug) { + result, err := modelarts.GetNotebook2(task.JobID) + if err != nil { + log.Error("getCloudbrainTwoSpec GetNotebook2 error.%v", err) + return nil, err + } + if result != nil { + return specMap[result.Flavor], nil + } + } else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) { + result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) + if err != nil { + log.Error("getCloudbrainTwoSpec GetTrainJob error:%v", task.JobName, err) + return nil, err + } + if result != nil { + return specMap[result.Flavor.Code], nil + } + } + return nil, nil +} + func RefreshCloudbrainTwoSpec(task *models.Cloudbrain) error { return nil }