You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

clusterResources.go 2.7 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. package stat
  2. import (
  3. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
  4. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
  5. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  6. "net/http"
  7. "strconv"
  8. "sync"
  9. )
  10. func UpdateClusterResources(svc *svc.ServiceContext, list []*types.AdapterInfo) {
  11. var wg sync.WaitGroup
  12. for _, adapter := range list {
  13. clusters, err := svc.Scheduler.AiStorages.GetClustersByAdapterId(adapter.Id)
  14. if err != nil {
  15. continue
  16. }
  17. for _, cluster := range clusters.List {
  18. c := cluster
  19. clusterResource, err := svc.Scheduler.AiStorages.GetClusterResourcesById(c.Id)
  20. if err != nil {
  21. continue
  22. }
  23. wg.Add(1)
  24. go func() {
  25. _, ok := svc.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][c.Id]
  26. if !ok {
  27. wg.Done()
  28. return
  29. }
  30. h := http.Request{}
  31. stat, err := svc.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][c.Id].GetResourceStats(h.Context())
  32. if err != nil {
  33. wg.Done()
  34. return
  35. }
  36. if stat == nil {
  37. wg.Done()
  38. return
  39. }
  40. clusterType, err := strconv.ParseInt(adapter.Type, 10, 64)
  41. if err != nil {
  42. wg.Done()
  43. return
  44. }
  45. var cardTotal int64
  46. var topsTotal float64
  47. var cardHours float64
  48. for _, card := range stat.CardsAvail {
  49. cardTotal += int64(card.CardNum)
  50. topsTotal += card.TOpsAtFp16 * float64(card.CardNum)
  51. cardHours += card.CardHours
  52. }
  53. if (models.TClusterResource{} == *clusterResource) {
  54. err = svc.Scheduler.AiStorages.SaveClusterResources(adapter.Id, c.Id, c.Name, clusterType, float64(stat.CpuCoreAvail), float64(stat.CpuCoreTotal),
  55. stat.MemAvail, stat.MemTotal, stat.DiskAvail, stat.DiskTotal, float64(stat.GpuAvail), float64(stat.GpuTotal), cardTotal, topsTotal, cardHours,
  56. stat.Balance, stat.TaskCompleted)
  57. if err != nil {
  58. wg.Done()
  59. return
  60. }
  61. } else {
  62. if stat.CpuCoreTotal == 0 || stat.MemTotal == 0 || stat.DiskTotal == 0 {
  63. wg.Done()
  64. return
  65. }
  66. clusterResource.CardTotal = cardTotal
  67. clusterResource.CardTopsTotal = topsTotal
  68. clusterResource.CpuAvail = float64(stat.CpuCoreAvail)
  69. clusterResource.CpuTotal = float64(stat.CpuCoreTotal)
  70. clusterResource.MemAvail = stat.MemAvail
  71. clusterResource.MemTotal = stat.MemTotal
  72. clusterResource.DiskAvail = stat.DiskAvail
  73. clusterResource.DiskTotal = stat.DiskTotal
  74. clusterResource.CardHours = cardHours
  75. clusterResource.Balance = stat.Balance
  76. clusterResource.TaskCompleted = stat.TaskCompleted
  77. err := svc.Scheduler.AiStorages.UpdateClusterResources(clusterResource)
  78. if err != nil {
  79. wg.Done()
  80. return
  81. }
  82. }
  83. wg.Done()
  84. }()
  85. }
  86. }
  87. wg.Wait()
  88. return
  89. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.