Browse Source

#2701

update
tags/v1.22.9.1^2^2
chenyifan01 3 years ago
parent
commit
3f9be66278
3 changed files with 110 additions and 10 deletions
  1. +72
    -1
      models/resource_specification.go
  2. +1
    -8
      routers/repo/modelarts.go
  3. +37
    -1
      services/cloudbrain/resource/resource_specification.go

+ 72
- 1
models/resource_specification.go View File

@@ -2,6 +2,7 @@ package models


import ( import (
"code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/timeutil"
"fmt"
"xorm.io/builder" "xorm.io/builder"
) )


@@ -423,6 +424,10 @@ func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specifi
return nil, err return nil, err
} }
sess.Commit() sess.Commit()
return BuildSpecification(queue, spec), nil
}

func BuildSpecification(queue ResourceQueue, spec ResourceSpecification) *Specification {
return &Specification{ return &Specification{
ID: spec.ID, ID: spec.ID,
SourceSpecId: spec.SourceSpecId, SourceSpecId: spec.SourceSpecId,
@@ -439,7 +444,7 @@ func InitQueueAndSpec(queue ResourceQueue, spec ResourceSpecification) (*Specifi
Cluster: queue.Cluster, Cluster: queue.Cluster,
AiCenterCode: queue.AiCenterCode, AiCenterCode: queue.AiCenterCode,
AiCenterName: queue.AiCenterName, AiCenterName: queue.AiCenterName,
}, nil
}
} }


func GetCloudbrainOneAccCardType(queueCode string) string { func GetCloudbrainOneAccCardType(queueCode string) string {
@@ -454,3 +459,69 @@ func GetCloudbrainOneAccCardType(queueCode string) string {
} }
return "" return ""
} }

var cloudbrainTwoSpecsInitFlag = false
var cloudbrainTwoSpecs map[string]*Specification

func GetCloudbrainTwoSpecs() (map[string]*Specification, error) {
if !cloudbrainTwoSpecsInitFlag {
r, err := InitCloudbrainTwoSpecs()
if err != nil {
return nil, err
}
cloudbrainTwoSpecsInitFlag = true
cloudbrainTwoSpecs = r
}
return cloudbrainTwoSpecs, nil
}

func InitCloudbrainTwoSpecs() (map[string]*Specification, error) {
r := make(map[string]*Specification, 0)

queue, err := GetResourceQueue(&ResourceQueue{QueueCode: "openisupport"})
if err != nil {
return nil, err
}
if queue == nil {
queue = &ResourceQueue{
QueueCode: "openisupport",
Cluster: OpenICluster,
AiCenterCode: AICenterOfCloudBrainTwo,
AiCenterName: "云脑二",
ComputeResource: NPU,
AccCardType: "ASCEND910",
Remark: "处理历史云脑任务时自动生成",
}
_, err = x.InsertOne(queue)
if err != nil {
return nil, err
}
}
for i := 1; i <= 8; i = i * 2 {
sourceSpecId := "modelarts.bm.910.arm.public." + fmt.Sprint(i)
spec, err := GetResourceSpecification(&ResourceSpecification{
SourceSpecId: sourceSpecId,
QueueId: queue.ID,
})
if err != nil {
return nil, err
}
if spec == nil {
spec = &ResourceSpecification{
QueueId: queue.ID,
SourceSpecId: sourceSpecId,
AccCardsNum: i,
CpuCores: i * 24,
MemGiB: float32(i * 256),
GPUMemGiB: float32(32),
Status: SpecOffShelf,
}
_, err = x.Insert(spec)
if err != nil {
return nil, err
}
}
r[sourceSpecId] = BuildSpecification(*queue, *spec)
}
return r, nil
}

+ 1
- 8
routers/repo/modelarts.go View File

@@ -917,14 +917,7 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error {
} }
ctx.Data["engine_versions"] = versionInfos.Version ctx.Data["engine_versions"] = versionInfos.Version


var flavorInfos modelarts.Flavor
if err = json.Unmarshal([]byte(setting.TrainJobFLAVORINFOS), &flavorInfos); err != nil {
ctx.ServerError("json.Unmarshal failed:", err)
return err
}
ctx.Data["flavor_infos"] = flavorInfos.Info

setSpecBySpecialPoolConfig(ctx, string(models.JobTypeTrain))
prepareCloudbrainTwoTrainSpecs(ctx)


var Parameters modelarts.Parameters var Parameters modelarts.Parameters
if err = json.Unmarshal([]byte(task.Parameters), &Parameters); err != nil { if err = json.Unmarshal([]byte(task.Parameters), &Parameters); err != nil {


+ 37
- 1
services/cloudbrain/resource/resource_specification.go View File

@@ -5,12 +5,14 @@ import (
"code.gitea.io/gitea/modules/cloudbrain" "code.gitea.io/gitea/modules/cloudbrain"
"code.gitea.io/gitea/modules/grampus" "code.gitea.io/gitea/modules/grampus"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/routers/response" "code.gitea.io/gitea/routers/response"
"code.gitea.io/gitea/services/admin/operate_log" "code.gitea.io/gitea/services/admin/operate_log"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"strconv"
"strings" "strings"
"time" "time"
) )
@@ -296,6 +298,7 @@ func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) {
continue continue
} }
success++ success++
time.Sleep(500 * time.Millisecond)
} }


} else { } else {
@@ -318,13 +321,13 @@ func RefreshHistorySpec(scopeAll bool, ids []int64) (int64, int64, error) {
break break
} }
for _, task := range list { for _, task := range list {
time.Sleep(1 * time.Second)
err = RefreshOneHistorySpec(task) err = RefreshOneHistorySpec(task)
if err != nil { if err != nil {
log.Error("RefreshOneHistorySpec error.%v", err) log.Error("RefreshOneHistorySpec error.%v", err)
continue continue
} }
success++ success++
time.Sleep(500 * time.Millisecond)
} }
if len(list) < pageSize { if len(list) < pageSize {
log.Info("RefreshHistorySpec. list < pageSize") log.Info("RefreshHistorySpec. list < pageSize")
@@ -342,6 +345,8 @@ func RefreshOneHistorySpec(task *models.Cloudbrain) error {
switch task.Type { switch task.Type {
case models.TypeCloudBrainOne: case models.TypeCloudBrainOne:
spec, err = getCloudbrainOneSpec(task) spec, err = getCloudbrainOneSpec(task)
case models.TypeCloudBrainTwo:
spec, err = getCloudbrainTwoSpec(task)
} }
if err != nil { if err != nil {
log.Error("find spec error,task.ID=%d err=%v", task.ID, err) log.Error("find spec error,task.ID=%d err=%v", task.ID, err)
@@ -506,6 +511,37 @@ func getCloudbrainOneSpec(task *models.Cloudbrain) (*models.Specification, error


} }


func getCloudbrainTwoSpec(task *models.Cloudbrain) (*models.Specification, error) {
specMap, err := models.GetCloudbrainTwoSpecs()
if err != nil {
log.Error("InitCloudbrainTwoSpecs err.%v", err)
return nil, err
}
if task.FlavorCode != "" {
return specMap[task.FlavorCode], nil
}
if task.JobType == string(models.JobTypeDebug) {
result, err := modelarts.GetNotebook2(task.JobID)
if err != nil {
log.Error("getCloudbrainTwoSpec GetNotebook2 error.%v", err)
return nil, err
}
if result != nil {
return specMap[result.Flavor], nil
}
} else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) {
result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
if err != nil {
log.Error("getCloudbrainTwoSpec GetTrainJob error:%v", task.JobName, err)
return nil, err
}
if result != nil {
return specMap[result.Flavor.Code], nil
}
}
return nil, nil
}

func RefreshCloudbrainTwoSpec(task *models.Cloudbrain) error { func RefreshCloudbrainTwoSpec(task *models.Cloudbrain) error {
return nil return nil
} }


Loading…
Cancel
Save