@@ -2,6 +2,7 @@ package models | |||||
import ( | import ( | ||||
"code.gitea.io/gitea/modules/timeutil" | "code.gitea.io/gitea/modules/timeutil" | ||||
"fmt" | |||||
"xorm.io/builder" | "xorm.io/builder" | ||||
) | ) | ||||
@@ -423,6 +424,10 @@ func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specifi | |||||
return nil, err | return nil, err | ||||
} | } | ||||
sess.Commit() | sess.Commit() | ||||
return BuildSpecification(queue, spec), nil | |||||
} | |||||
func BuildSpecification(queue ResourceQueue, spec ResourceSpecification) *Specification { | |||||
return &Specification{ | return &Specification{ | ||||
ID: spec.ID, | ID: spec.ID, | ||||
SourceSpecId: spec.SourceSpecId, | SourceSpecId: spec.SourceSpecId, | ||||
@@ -439,7 +444,7 @@ func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specifi | |||||
Cluster: queue.Cluster, | Cluster: queue.Cluster, | ||||
AiCenterCode: queue.AiCenterCode, | AiCenterCode: queue.AiCenterCode, | ||||
AiCenterName: queue.AiCenterName, | AiCenterName: queue.AiCenterName, | ||||
}, nil | |||||
} | |||||
} | } | ||||
func GetCloudbrainOneAccCardType(queueCode string) string { | func GetCloudbrainOneAccCardType(queueCode string) string { | ||||
@@ -454,3 +459,69 @@ func GetCloudbrainOneAccCardType(queueCode string) string { | |||||
} | } | ||||
return "" | return "" | ||||
} | } | ||||
var cloudbrainTwoSpecsInitFlag = false | |||||
var cloudbrainTwoSpecs map[string]*Specification | |||||
func GetCloudbrainTwoSpecs() (map[string]*Specification, error) { | |||||
if !cloudbrainTwoSpecsInitFlag { | |||||
r, err := InitCloudbrainTwoSpecs() | |||||
if err != nil { | |||||
return nil, err | |||||
} | |||||
cloudbrainTwoSpecsInitFlag = true | |||||
cloudbrainTwoSpecs = r | |||||
} | |||||
return cloudbrainTwoSpecs, nil | |||||
} | |||||
func InitCloudbrainTwoSpecs() (map[string]*Specification, error) { | |||||
r := make(map[string]*Specification, 0) | |||||
queue, err := GetResourceQueue(&ResourceQueue{QueueCode: "openisupport"}) | |||||
if err != nil { | |||||
return nil, err | |||||
} | |||||
if queue == nil { | |||||
queue = &ResourceQueue{ | |||||
QueueCode: "openisupport", | |||||
Cluster: OpenICluster, | |||||
AiCenterCode: AICenterOfCloudBrainTwo, | |||||
AiCenterName: "云脑二", | |||||
ComputeResource: NPU, | |||||
AccCardType: "ASCEND910", | |||||
Remark: "处理历史云脑任务时自动生成", | |||||
} | |||||
_, err = x.InsertOne(queue) | |||||
if err != nil { | |||||
return nil, err | |||||
} | |||||
} | |||||
for i := 1; i <= 8; i = i * 2 { | |||||
sourceSpecId := "modelarts.bm.910.arm.public." + fmt.Sprint(i) | |||||
spec, err := GetResourceSpecification(&ResourceSpecification{ | |||||
SourceSpecId: sourceSpecId, | |||||
QueueId: queue.ID, | |||||
}) | |||||
if err != nil { | |||||
return nil, err | |||||
} | |||||
if spec == nil { | |||||
spec = &ResourceSpecification{ | |||||
QueueId: queue.ID, | |||||
SourceSpecId: sourceSpecId, | |||||
AccCardsNum: i, | |||||
CpuCores: i * 24, | |||||
MemGiB: float32(i * 256), | |||||
GPUMemGiB: float32(32), | |||||
Status: SpecOffShelf, | |||||
} | |||||
_, err = x.Insert(spec) | |||||
if err != nil { | |||||
return nil, err | |||||
} | |||||
} | |||||
r[sourceSpecId] = BuildSpecification(*queue, *spec) | |||||
} | |||||
return r, nil | |||||
} |
@@ -917,14 +917,7 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { | |||||
} | } | ||||
ctx.Data["engine_versions"] = versionInfos.Version | ctx.Data["engine_versions"] = versionInfos.Version | ||||
var flavorInfos modelarts.Flavor | |||||
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil { | |||||
ctx.ServerError("json.Unmarshal failed:", err) | |||||
return err | |||||
} | |||||
ctx.Data["flavor_infos"] = flavorInfos.Info | |||||
setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain)) | |||||
prepareCloudbrainTwoTrainSpecs(ctx) | |||||
var Parameters modelarts.Parameters | var Parameters modelarts.Parameters | ||||
if err = json.Unmarshal([]byte(task.Parameters), &Parameters); err != nil { | if err = json.Unmarshal([]byte(task.Parameters), &Parameters); err != nil { | ||||
@@ -5,12 +5,14 @@ import ( | |||||
"code.gitea.io/gitea/modules/cloudbrain" | "code.gitea.io/gitea/modules/cloudbrain" | ||||
"code.gitea.io/gitea/modules/grampus" | "code.gitea.io/gitea/modules/grampus" | ||||
"code.gitea.io/gitea/modules/log" | "code.gitea.io/gitea/modules/log" | ||||
"code.gitea.io/gitea/modules/modelarts" | |||||
"code.gitea.io/gitea/modules/setting" | "code.gitea.io/gitea/modules/setting" | ||||
"code.gitea.io/gitea/routers/response" | "code.gitea.io/gitea/routers/response" | ||||
"code.gitea.io/gitea/services/admin/operate_log" | "code.gitea.io/gitea/services/admin/operate_log" | ||||
"encoding/json" | "encoding/json" | ||||
"errors" | "errors" | ||||
"fmt" | "fmt" | ||||
"strconv" | |||||
"strings" | "strings" | ||||
"time" | "time" | ||||
) | ) | ||||
@@ -296,6 +298,7 @@ func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) { | |||||
continue | continue | ||||
} | } | ||||
success++ | success++ | ||||
time.Sleep(500 * time.Millisecond) | |||||
} | } | ||||
} else { | } else { | ||||
@@ -318,13 +321,13 @@ func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) { | |||||
break | break | ||||
} | } | ||||
for _, task := range list { | for _, task := range list { | ||||
time.Sleep(1 * time.Second) | |||||
err = RefreshOneHistorySpec(task) | err = RefreshOneHistorySpec(task) | ||||
if err != nil { | if err != nil { | ||||
log.Error("RefreshOneHistorySpec error.%v", err) | log.Error("RefreshOneHistorySpec error.%v", err) | ||||
continue | continue | ||||
} | } | ||||
success++ | success++ | ||||
time.Sleep(500 * time.Millisecond) | |||||
} | } | ||||
if len(list) < pageSize { | if len(list) < pageSize { | ||||
log.Info("RefreshHistorySpec. list < pageSize") | log.Info("RefreshHistorySpec. list < pageSize") | ||||
@@ -342,6 +345,8 @@ func RefreshOneHistorySpec(task *models.Cloudbrain) error { | |||||
switch task.Type { | switch task.Type { | ||||
case models.TypeCloudBrainOne: | case models.TypeCloudBrainOne: | ||||
spec, err = getCloudbrainOneSpec(task) | spec, err = getCloudbrainOneSpec(task) | ||||
case models.TypeCloudBrainTwo: | |||||
spec, err = getCloudbrainTwoSpec(task) | |||||
} | } | ||||
if err != nil { | if err != nil { | ||||
log.Error("find spec error,task.ID=%d err=%v", task.ID, err) | log.Error("find spec error,task.ID=%d err=%v", task.ID, err) | ||||
@@ -506,6 +511,37 @@ func getCloudbrainOneSpec(task *models.Cloudbrain) (*models.Specification, error | |||||
} | } | ||||
func getCloudbrainTwoSpec(task *models.Cloudbrain) (*models.Specification, error) { | |||||
specMap, err := models.GetCloudbrainTwoSpecs() | |||||
if err != nil { | |||||
log.Error("InitCloudbrainTwoSpecs err.%v", err) | |||||
return nil, err | |||||
} | |||||
if task.FlavorCode != "" { | |||||
return specMap[task.FlavorCode], nil | |||||
} | |||||
if task.JobType == string(models.JobTypeDebug) { | |||||
result, err := modelarts.GetNotebook2(task.JobID) | |||||
if err != nil { | |||||
log.Error("getCloudbrainTwoSpec GetNotebook2 error.%v", err) | |||||
return nil, err | |||||
} | |||||
if result != nil { | |||||
return specMap[result.Flavor], nil | |||||
} | |||||
} else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) { | |||||
result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||||
if err != nil { | |||||
log.Error("getCloudbrainTwoSpec GetTrainJob error:%v", task.JobName, err) | |||||
return nil, err | |||||
} | |||||
if result != nil { | |||||
return specMap[result.Flavor.Code], nil | |||||
} | |||||
} | |||||
return nil, nil | |||||
} | |||||
func RefreshCloudbrainTwoSpec(task *models.Cloudbrain) error { | func RefreshCloudbrainTwoSpec(task *models.Cloudbrain) error { | ||||
return nil | return nil | ||||
} | } | ||||