|
|
|
@@ -26,6 +26,8 @@ import ( |
|
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" |
|
|
|
"strconv" |
|
|
|
"strings" |
|
|
|
"sync" |
|
|
|
"time" |
|
|
|
) |
|
|
|
|
|
|
|
const ( |
|
|
|
@@ -266,96 +268,144 @@ func (s *ShuguangAi) QuerySpecs(ctx context.Context) (interface{}, error) { |
|
|
|
} |
|
|
|
|
|
|
|
func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) { |
|
|
|
//balance |
|
|
|
userReq := &hpcAC.GetUserInfoReq{} |
|
|
|
userinfo, err := s.aCRpc.GetUserInfo(ctx, userReq) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
var wg sync.WaitGroup |
|
|
|
wg.Add(4) |
|
|
|
var cBalance = make(chan float64) |
|
|
|
var cMemTotal = make(chan float64) |
|
|
|
var cTotalCpu = make(chan int64) |
|
|
|
|
|
|
|
resourceStats := &collector.ResourceStats{ |
|
|
|
ClusterId: strconv.FormatInt(s.participantId, 10), |
|
|
|
Name: s.platform, |
|
|
|
} |
|
|
|
balance, _ := strconv.ParseFloat(userinfo.Data.AccountBalance, 64) |
|
|
|
|
|
|
|
//resource limit |
|
|
|
limitReq := &hpcAC.QueueReq{} |
|
|
|
limitResp, err := s.aCRpc.QueryUserQuotasLimit(ctx, limitReq) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
dcu := &collector.Card{ |
|
|
|
Platform: SHUGUANGAI, |
|
|
|
Type: CARD, |
|
|
|
Name: DCU, |
|
|
|
TOpsAtFp16: DCU_TOPS, |
|
|
|
} |
|
|
|
totalCpu := limitResp.Data.AccountMaxCpu |
|
|
|
totalDcu := limitResp.Data.AccountMaxDcu |
|
|
|
|
|
|
|
//balance |
|
|
|
go func() { |
|
|
|
userReq := &hpcAC.GetUserInfoReq{} |
|
|
|
userinfo, err := s.aCRpc.GetUserInfo(ctx, userReq) |
|
|
|
if err != nil { |
|
|
|
return |
|
|
|
} |
|
|
|
balance, _ := strconv.ParseFloat(userinfo.Data.AccountBalance, 64) |
|
|
|
resourceStats.Balance = balance |
|
|
|
|
|
|
|
cBalance <- balance |
|
|
|
}() |
|
|
|
|
|
|
|
//resource limit |
|
|
|
go func() { |
|
|
|
limitReq := &hpcAC.QueueReq{} |
|
|
|
limitResp, err := s.aCRpc.QueryUserQuotasLimit(ctx, limitReq) |
|
|
|
if err != nil { |
|
|
|
wg.Done() |
|
|
|
return |
|
|
|
} |
|
|
|
totalCpu := limitResp.Data.AccountMaxCpu |
|
|
|
totalDcu := limitResp.Data.AccountMaxDcu |
|
|
|
|
|
|
|
dcu.CardNum = int32(totalDcu) |
|
|
|
resourceStats.CpuCoreTotal = totalCpu |
|
|
|
|
|
|
|
cTotalCpu <- totalCpu |
|
|
|
wg.Done() |
|
|
|
}() |
|
|
|
|
|
|
|
//disk |
|
|
|
diskReq := &hpcAC.ParaStorQuotaReq{} |
|
|
|
diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
go func() { |
|
|
|
diskReq := &hpcAC.ParaStorQuotaReq{} |
|
|
|
diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq) |
|
|
|
if err != nil { |
|
|
|
wg.Done() |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3) |
|
|
|
availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3) |
|
|
|
|
|
|
|
totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3) |
|
|
|
availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3) |
|
|
|
resourceStats.DiskTotal = totalDisk |
|
|
|
resourceStats.DiskAvail = availDisk |
|
|
|
wg.Done() |
|
|
|
}() |
|
|
|
|
|
|
|
//memory |
|
|
|
nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
memSize := common.RoundFloat(float64(nodeResp.Data.MemorySize)*KB*KB, 3) // MB to BYTES |
|
|
|
go func() { |
|
|
|
nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil) |
|
|
|
if err != nil { |
|
|
|
wg.Done() |
|
|
|
return |
|
|
|
} |
|
|
|
memSize := common.RoundFloat(float64(nodeResp.Data.MemorySize)*KB*KB, 3) // MB to BYTES |
|
|
|
|
|
|
|
resourceStats.MemTotal = memSize |
|
|
|
cMemTotal <- memSize |
|
|
|
wg.Done() |
|
|
|
}() |
|
|
|
|
|
|
|
//resources being occupied |
|
|
|
memberJobResp, err := s.aCRpc.GetMemberJobs(ctx, nil) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
var CpuCoreAvail int64 |
|
|
|
var MemAvail float64 |
|
|
|
if len(memberJobResp.Data) != 0 { |
|
|
|
CpuCoreAvail = totalCpu |
|
|
|
MemAvail = memSize |
|
|
|
} else { |
|
|
|
var cpuCoreUsed int64 |
|
|
|
var memUsed float64 |
|
|
|
for _, datum := range memberJobResp.Data { |
|
|
|
cpuCoreUsed += datum.CpuCore |
|
|
|
} |
|
|
|
memUsed = float64(cpuCoreUsed * 2 * KB * KB * KB) // 2 GB per cpu core |
|
|
|
if cpuCoreUsed > totalCpu { |
|
|
|
CpuCoreAvail = 0 |
|
|
|
} else { |
|
|
|
CpuCoreAvail = totalCpu - cpuCoreUsed |
|
|
|
go func() { |
|
|
|
memSize := <-cMemTotal |
|
|
|
totalCpu := <-cTotalCpu |
|
|
|
memberJobResp, err := s.aCRpc.GetMemberJobs(ctx, nil) |
|
|
|
if err != nil { |
|
|
|
wg.Done() |
|
|
|
return |
|
|
|
} |
|
|
|
if memUsed > memSize { |
|
|
|
MemAvail = 0 |
|
|
|
var cpuCoreAvail int64 |
|
|
|
var memAvail float64 |
|
|
|
if len(memberJobResp.Data) != 0 { |
|
|
|
cpuCoreAvail = totalCpu |
|
|
|
memAvail = memSize |
|
|
|
} else { |
|
|
|
MemAvail = memSize - memUsed |
|
|
|
var cpuCoreUsed int64 |
|
|
|
var memUsed float64 |
|
|
|
for _, datum := range memberJobResp.Data { |
|
|
|
cpuCoreUsed += datum.CpuCore |
|
|
|
} |
|
|
|
memUsed = float64(cpuCoreUsed * 2 * KB * KB * KB) // 2 GB per cpu core |
|
|
|
if cpuCoreUsed > totalCpu { |
|
|
|
cpuCoreAvail = 0 |
|
|
|
} else { |
|
|
|
cpuCoreAvail = totalCpu - cpuCoreUsed |
|
|
|
} |
|
|
|
if memUsed > memSize { |
|
|
|
memAvail = 0 |
|
|
|
} else { |
|
|
|
memAvail = memSize - memUsed |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
resourceStats.CpuCoreAvail = cpuCoreAvail |
|
|
|
resourceStats.MemAvail = memAvail |
|
|
|
wg.Done() |
|
|
|
}() |
|
|
|
|
|
|
|
//usable hours |
|
|
|
var balance float64 |
|
|
|
|
|
|
|
select { |
|
|
|
case v := <-cBalance: |
|
|
|
balance = v |
|
|
|
case <-time.After(2 * time.Second): |
|
|
|
return nil, errors.New("get balance rpc call failed") |
|
|
|
} |
|
|
|
|
|
|
|
var cards []*collector.Card |
|
|
|
cardHours := common.RoundFloat(balance/DCUPRICEPERHOUR, 3) |
|
|
|
cpuHours := common.RoundFloat(balance/CPUCOREPRICEPERHOUR, 3) |
|
|
|
|
|
|
|
dcu := &collector.Card{ |
|
|
|
Platform: SHUGUANGAI, |
|
|
|
Type: CARD, |
|
|
|
Name: DCU, |
|
|
|
TOpsAtFp16: DCU_TOPS, |
|
|
|
CardHours: cardHours, |
|
|
|
CardNum: int32(totalDcu), |
|
|
|
} |
|
|
|
dcu.CardHours = cardHours |
|
|
|
resourceStats.CpuCoreHours = cpuHours |
|
|
|
|
|
|
|
wg.Wait() |
|
|
|
|
|
|
|
cards = append(cards, dcu) |
|
|
|
resourceStats := &collector.ResourceStats{ |
|
|
|
ClusterId: strconv.FormatInt(s.participantId, 10), |
|
|
|
Name: s.platform, |
|
|
|
Balance: balance, |
|
|
|
CpuCoreTotal: totalCpu, |
|
|
|
CpuCoreAvail: CpuCoreAvail, |
|
|
|
DiskTotal: totalDisk, |
|
|
|
DiskAvail: availDisk, |
|
|
|
MemTotal: memSize, |
|
|
|
MemAvail: MemAvail, |
|
|
|
CpuCoreHours: cpuHours, |
|
|
|
CardsAvail: cards, |
|
|
|
} |
|
|
|
resourceStats.CardsAvail = cards |
|
|
|
|
|
|
|
return resourceStats, nil |
|
|
|
} |
|
|
|
|