You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

getcentertasklistlogic.go 3.2 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. package ai
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
  7. "strconv"
  8. "sync"
  9. "time"
  10. "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
  11. "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
  12. "github.com/zeromicro/go-zero/core/logx"
  13. )
  14. type GetCenterTaskListLogic struct {
  15. logx.Logger
  16. ctx context.Context
  17. svcCtx *svc.ServiceContext
  18. }
  19. func NewGetCenterTaskListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterTaskListLogic {
  20. return &GetCenterTaskListLogic{
  21. Logger: logx.WithContext(ctx),
  22. ctx: ctx,
  23. svcCtx: svcCtx,
  24. }
  25. }
  26. func (l *GetCenterTaskListLogic) GetCenterTaskList() (resp *types.CenterTaskListResp, err error) {
  27. resp = &types.CenterTaskListResp{}
  28. var mu sync.RWMutex
  29. ch := make(chan struct{})
  30. adapterList, err := l.svcCtx.Scheduler.AiStorages.GetAdaptersByType("1")
  31. if err != nil {
  32. return nil, err
  33. }
  34. go l.updateAiTaskStatus(&mu, ch, adapterList)
  35. for _, adapter := range adapterList {
  36. mu.RLock()
  37. taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id)
  38. mu.RUnlock()
  39. if err != nil {
  40. continue
  41. }
  42. if len(taskList) == 0 {
  43. continue
  44. }
  45. for _, task := range taskList {
  46. var elapsed time.Duration
  47. switch task.Status {
  48. case constants.Completed:
  49. end, err := time.ParseInLocation(constants.Layout, task.EndTime, time.Local)
  50. if err != nil {
  51. elapsed = time.Duration(0)
  52. }
  53. start, err := time.ParseInLocation(constants.Layout, task.StartTime, time.Local)
  54. if err != nil {
  55. elapsed = time.Duration(0)
  56. }
  57. elapsed = end.Sub(start)
  58. case constants.Running:
  59. elapsed = time.Now().Sub(task.CommitTime)
  60. default:
  61. elapsed = 0
  62. }
  63. t := &types.AiTask{
  64. Name: task.Name,
  65. Status: task.Status,
  66. Cluster: task.ClusterName,
  67. Card: task.Card,
  68. TimeElapsed: int32(elapsed.Seconds()),
  69. }
  70. resp.List = append(resp.List, t)
  71. }
  72. }
  73. select {
  74. case _ = <-ch:
  75. return resp, nil
  76. case <-time.After(2 * time.Second):
  77. return resp, nil
  78. }
  79. }
  80. func (l *GetCenterTaskListLogic) updateAiTaskStatus(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) {
  81. var wg sync.WaitGroup
  82. for _, adapter := range list {
  83. taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id)
  84. if err != nil {
  85. continue
  86. }
  87. if len(taskList) == 0 {
  88. continue
  89. }
  90. for _, task := range taskList {
  91. t := task
  92. if t.Status == constants.Completed {
  93. continue
  94. }
  95. wg.Add(1)
  96. go func() {
  97. trainingTask, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][strconv.FormatInt(t.ClusterId, 10)].GetTrainingTask(l.ctx, t.JobId)
  98. if err != nil {
  99. msg := fmt.Sprintf("AiTaskId: %v, clusterId: %v , JobId: %v, error: %v \n", t.Id, t.ClusterId, t.JobId, err.Error())
  100. logx.Errorf(errors.New(msg).Error())
  101. wg.Done()
  102. return
  103. }
  104. t.Status = trainingTask.Status
  105. t.StartTime = trainingTask.Start
  106. t.EndTime = trainingTask.End
  107. mu.Lock()
  108. err = l.svcCtx.Scheduler.AiStorages.UpdateAiTask(t)
  109. mu.Unlock()
  110. if err != nil {
  111. wg.Done()
  112. return
  113. }
  114. wg.Done()
  115. }()
  116. }
  117. }
  118. wg.Wait()
  119. ch <- struct{}{}
  120. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.