@@ -2,6 +2,7 @@ package models | |||
import ( | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"fmt" | |||
"xorm.io/builder" | |||
) | |||
@@ -423,6 +424,10 @@ func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specifi | |||
return nil, err | |||
} | |||
sess.Commit() | |||
return BuildSpecification(queue, spec), nil | |||
} | |||
func BuildSpecification(queue ResourceQueue, spec ResourceSpecification) *Specification { | |||
return &Specification{ | |||
ID: spec.ID, | |||
SourceSpecId: spec.SourceSpecId, | |||
@@ -439,7 +444,7 @@ func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specifi | |||
Cluster: queue.Cluster, | |||
AiCenterCode: queue.AiCenterCode, | |||
AiCenterName: queue.AiCenterName, | |||
}, nil | |||
} | |||
} | |||
func GetCloudbrainOneAccCardType(queueCode string) string { | |||
@@ -454,3 +459,69 @@ func GetCloudbrainOneAccCardType(queueCode string) string { | |||
} | |||
return "" | |||
} | |||
var cloudbrainTwoSpecsInitFlag = false | |||
var cloudbrainTwoSpecs map[string]*Specification | |||
func GetCloudbrainTwoSpecs() (map[string]*Specification, error) { | |||
if !cloudbrainTwoSpecsInitFlag { | |||
r, err := InitCloudbrainTwoSpecs() | |||
if err != nil { | |||
return nil, err | |||
} | |||
cloudbrainTwoSpecsInitFlag = true | |||
cloudbrainTwoSpecs = r | |||
} | |||
return cloudbrainTwoSpecs, nil | |||
} | |||
func InitCloudbrainTwoSpecs() (map[string]*Specification, error) { | |||
r := make(map[string]*Specification, 0) | |||
queue, err := GetResourceQueue(&ResourceQueue{QueueCode: "openisupport"}) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if queue == nil { | |||
queue = &ResourceQueue{ | |||
QueueCode: "openisupport", | |||
Cluster: OpenICluster, | |||
AiCenterCode: AICenterOfCloudBrainTwo, | |||
AiCenterName: "云脑二", | |||
ComputeResource: NPU, | |||
AccCardType: "ASCEND910", | |||
Remark: "处理历史云脑任务时自动生成", | |||
} | |||
_, err = x.InsertOne(queue) | |||
if err != nil { | |||
return nil, err | |||
} | |||
} | |||
for i := 1; i <= 8; i = i * 2 { | |||
sourceSpecId := "modelarts.bm.910.arm.public." + fmt.Sprint(i) | |||
spec, err := GetResourceSpecification(&ResourceSpecification{ | |||
SourceSpecId: sourceSpecId, | |||
QueueId: queue.ID, | |||
}) | |||
if err != nil { | |||
return nil, err | |||
} | |||
if spec == nil { | |||
spec = &ResourceSpecification{ | |||
QueueId: queue.ID, | |||
SourceSpecId: sourceSpecId, | |||
AccCardsNum: i, | |||
CpuCores: i * 24, | |||
MemGiB: float32(i * 256), | |||
GPUMemGiB: float32(32), | |||
Status: SpecOffShelf, | |||
} | |||
_, err = x.Insert(spec) | |||
if err != nil { | |||
return nil, err | |||
} | |||
} | |||
r[sourceSpecId] = BuildSpecification(*queue, *spec) | |||
} | |||
return r, nil | |||
} |
@@ -917,14 +917,7 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { | |||
} | |||
ctx.Data["engine_versions"] = versionInfos.Version | |||
var flavorInfos modelarts.Flavor | |||
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { | |||
ctx.ServerError("json.Unmarshal failed:", err) | |||
return err | |||
} | |||
ctx.Data["flavor_infos"] = flavorInfos.Info | |||
setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain)) | |||
prepareCloudbrainTwoTrainSpecs(ctx) | |||
var Parameters modelarts.Parameters | |||
if err = json.Unmarshal([]byte(task.Parameters), &Parameters); err != nil { | |||
@@ -5,12 +5,14 @@ import ( | |||
"code.gitea.io/gitea/modules/cloudbrain" | |||
"code.gitea.io/gitea/modules/grampus" | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/modelarts" | |||
"code.gitea.io/gitea/modules/setting" | |||
"code.gitea.io/gitea/routers/response" | |||
"code.gitea.io/gitea/services/admin/operate_log" | |||
"encoding/json" | |||
"errors" | |||
"fmt" | |||
"strconv" | |||
"strings" | |||
"time" | |||
) | |||
@@ -296,6 +298,7 @@ func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) { | |||
continue | |||
} | |||
success++ | |||
time.Sleep(500 * time.Millisecond) | |||
} | |||
} else { | |||
@@ -318,13 +321,13 @@ func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) { | |||
break | |||
} | |||
for _, task := range list { | |||
time.Sleep(1 * time.Second) | |||
err = RefreshOneHistorySpec(task) | |||
if err != nil { | |||
log.Error("RefreshOneHistorySpec error.%v", err) | |||
continue | |||
} | |||
success++ | |||
time.Sleep(500 * time.Millisecond) | |||
} | |||
if len(list) < pageSize { | |||
log.Info("RefreshHistorySpec. list < pageSize") | |||
@@ -342,6 +345,8 @@ func RefreshOneHistorySpec(task *models.Cloudbrain) error { | |||
switch task.Type { | |||
case models.TypeCloudBrainOne: | |||
spec, err = getCloudbrainOneSpec(task) | |||
case models.TypeCloudBrainTwo: | |||
spec, err = getCloudbrainTwoSpec(task) | |||
} | |||
if err != nil { | |||
log.Error("find spec error,task.ID=%d err=%v", task.ID, err) | |||
@@ -506,6 +511,37 @@ func getCloudbrainOneSpec(task *models.Cloudbrain) (*models.Specification, error | |||
} | |||
func getCloudbrainTwoSpec(task *models.Cloudbrain) (*models.Specification, error) { | |||
specMap, err := models.GetCloudbrainTwoSpecs() | |||
if err != nil { | |||
log.Error("InitCloudbrainTwoSpecs err.%v", err) | |||
return nil, err | |||
} | |||
if task.FlavorCode != "" { | |||
return specMap[task.FlavorCode], nil | |||
} | |||
if task.JobType == string(models.JobTypeDebug) { | |||
result, err := modelarts.GetNotebook2(task.JobID) | |||
if err != nil { | |||
log.Error("getCloudbrainTwoSpec GetNotebook2 error.%v", err) | |||
return nil, err | |||
} | |||
if result != nil { | |||
return specMap[result.Flavor], nil | |||
} | |||
} else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) { | |||
result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||
if err != nil { | |||
log.Error("getCloudbrainTwoSpec GetTrainJob error:%v", task.JobName, err) | |||
return nil, err | |||
} | |||
if result != nil { | |||
return specMap[result.Flavor.Code], nil | |||
} | |||
} | |||
return nil, nil | |||
} | |||
func RefreshCloudbrainTwoSpec(task *models.Cloudbrain) error { | |||
return nil | |||
} | |||