Browse Source

fix: update ResourceSpecs of ai modelarts

pull/413/head
qiwang 9 months ago
parent
commit
e12815dea8
1 changed files with 13 additions and 9 deletions
  1. +13
    -9
      internal/storeLink/modelarts.go

+ 13
- 9
internal/storeLink/modelarts.go View File

@@ -930,6 +930,7 @@ func (m *ModelArtsLink) CheckImageExist(ctx context.Context, option *option.Infe

func (m *ModelArtsLink) GetResourceSpecs(ctx context.Context) (*collector.ResourceSpec, error) {
var wg sync.WaitGroup
//查询modelarts资源规格
req := &modelarts.GetResourceFlavorsReq{}
resp, err := m.modelArtsRpc.GetResourceFlavors(ctx, req)
if err != nil {
@@ -947,46 +948,49 @@ func (m *ModelArtsLink) GetResourceSpecs(ctx context.Context) (*collector.Resour
var BalanceValue float64 = -1
var RateValue float64 = 0.930000
var StorageValue int64 = 1024
var AvailableValue int64 = 886
for _, Flavors := range resp.Items {
if Flavors.Metadata.Name == "modelarts.kat1.8xlarge" {
MoUsage.CpuSize, err = strconv.ParseInt(Flavors.Spec.Cpu, 10, 64) //CPU的值
if err != nil {
// 如果转换失败,处理错误
fmt.Println("转换错误:", err)
return nil, err
}
cpusum = MoUsage.CpuSize
MoUsage.NpuSize, err = strconv.ParseInt(Flavors.Spec.Npu.Size, 10, 64) //NPU的值
if err != nil {
// 如果转换失败,处理错误
fmt.Println("转换错误:", err)
return nil, err
}
npusum = MoUsage.NpuSize
re := regexp.MustCompile(`\d+`)
numberStr := re.FindString(Flavors.Spec.Memory)
MoUsage.MemorySize, err = strconv.ParseInt(numberStr, 10, 64)
//MoUsage.MemorySize, err = strconv.ParseInt(Flavors.Spec.Memory, 10, 64) //Memory的值
numberStr := re.FindString(Flavors.Spec.Memory) //正则表达式去单位
MoUsage.MemorySize, err = strconv.ParseInt(numberStr, 10, 64) //内存的值
if err != nil {
// 如果转换失败,处理错误
fmt.Println("转换错误:", err)
return nil, err
}
memorysum = MoUsage.MemorySize * 1024
}
}
//查询获取训练作业支持的公共规格
//查询获取训练作业支持的公共规格(包括1,2,4,8卡的选择和显存的数值)
reqJobFlavors := &modelarts.TrainingJobFlavorsReq{
Platform: m.platform,
}
respJobFlavors, err := m.modelArtsRpc.GetTrainingJobFlavors(ctx, reqJobFlavors)
if err != nil {
wg.Done()
return nil, err
}

for _, TrainLists := range respJobFlavors.Flavors {
if TrainLists.FlavorId == "modelarts.kat1.8xlarge" {
re := regexp.MustCompile(`\d+`)
numberStr := re.FindString(string(TrainLists.FlavorInfo.Npu.Memory))
MoUsage.VMemorySize, err = strconv.ParseInt(numberStr, 10, 64) //NPU的值
numberStr := re.FindString(string(TrainLists.FlavorInfo.Npu.Memory)) //正则表达式去单位
MoUsage.VMemorySize, err = strconv.ParseInt(numberStr, 10, 64) //显存的值
VMemorysum = MoUsage.VMemorySize * int64(TrainLists.FlavorInfo.Npu.UnitNum)
}
}
@@ -999,7 +1003,7 @@ func (m *ModelArtsLink) GetResourceSpecs(ctx context.Context) (*collector.Resour
respList, err := m.modelArtsRpc.GetListTrainingJobs(ctx, reqTraining)
if err != nil {
wg.Done()
return nil, err
}
var CoreNum int32 = 0
var NpuNum int32 = 0
@@ -1030,7 +1034,7 @@ func (m *ModelArtsLink) GetResourceSpecs(ctx context.Context) (*collector.Resour
RunningTask := &collector.Usage{Type: strings.ToUpper(RUNNINGTASK), Total: &collector.UnitValue{Unit: NUMBER, Value: RunningTaskNum}}
Balance := &collector.Usage{Type: strings.ToUpper(BALANCE), Total: &collector.UnitValue{Unit: RMB, Value: BalanceValue}}
Rate := &collector.Usage{Type: strings.ToUpper(RATE), Total: &collector.UnitValue{Unit: PERHOUR, Value: RateValue}}
Storage := &collector.Usage{Type: strings.ToUpper(STORAGE), Total: &collector.UnitValue{Unit: GIGABYTE, Value: StorageValue}}
Storage := &collector.Usage{Type: strings.ToUpper(STORAGE), Total: &collector.UnitValue{Unit: GIGABYTE, Value: StorageValue}, Name: strings.ToUpper(DISK), Available: &collector.UnitValue{Unit: GIGABYTE, Value: AvailableValue}}

resUsage := &collector.ResourceSpec{
ClusterId: strconv.FormatInt(m.participantId, 10),


Loading…
Cancel
Save