|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- package stat
-
- import (
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
- "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
- "net/http"
- "strconv"
- "sync"
- )
-
- func UpdateClusterResources(svc *svc.ServiceContext, list []*types.AdapterInfo) {
- var wg sync.WaitGroup
- for _, adapter := range list {
- clusters, err := svc.Scheduler.AiStorages.GetClustersByAdapterId(adapter.Id)
- if err != nil {
- continue
- }
- for _, cluster := range clusters.List {
- c := cluster
- clusterResource, err := svc.Scheduler.AiStorages.GetClusterResourcesById(c.Id)
- if err != nil {
- continue
- }
- wg.Add(1)
- go func() {
- _, ok := svc.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][c.Id]
- if !ok {
- wg.Done()
- return
- }
- h := http.Request{}
- stat, err := svc.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][c.Id].GetResourceStats(h.Context())
- if err != nil {
- wg.Done()
- return
- }
- if stat == nil {
- wg.Done()
- return
- }
- clusterType, err := strconv.ParseInt(adapter.Type, 10, 64)
- if err != nil {
- wg.Done()
- return
- }
- var cardTotal int64
- var topsTotal float64
- var cardHours float64
- for _, card := range stat.CardsAvail {
- cardTotal += int64(card.CardNum)
- topsTotal += card.TOpsAtFp16 * float64(card.CardNum)
- cardHours += card.CardHours
- }
-
- if (models.TClusterResource{} == *clusterResource) {
- err = svc.Scheduler.AiStorages.SaveClusterResources(adapter.Id, c.Id, c.Name, clusterType, float64(stat.CpuCoreAvail), float64(stat.CpuCoreTotal),
- stat.MemAvail, stat.MemTotal, stat.DiskAvail, stat.DiskTotal, float64(stat.GpuAvail), float64(stat.GpuTotal), cardTotal, topsTotal, cardHours,
- stat.Balance, stat.TaskCompleted)
- if err != nil {
- wg.Done()
- return
- }
- } else {
- if stat.CpuCoreTotal == 0 || stat.MemTotal == 0 || stat.DiskTotal == 0 {
- wg.Done()
- return
- }
- clusterResource.CardTotal = cardTotal
- clusterResource.CardTopsTotal = topsTotal
- clusterResource.CpuAvail = float64(stat.CpuCoreAvail)
- clusterResource.CpuTotal = float64(stat.CpuCoreTotal)
- clusterResource.MemAvail = stat.MemAvail
- clusterResource.MemTotal = stat.MemTotal
- clusterResource.DiskAvail = stat.DiskAvail
- clusterResource.DiskTotal = stat.DiskTotal
- clusterResource.CardHours = cardHours
- clusterResource.Balance = stat.Balance
- clusterResource.TaskCompleted = stat.TaskCompleted
-
- err := svc.Scheduler.AiStorages.UpdateClusterResources(clusterResource)
- if err != nil {
- wg.Done()
- return
- }
- }
- wg.Done()
- }()
- }
- }
- wg.Wait()
- return
- }
|