You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

stopallbydeploytaskidlogic.go 3.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. package inference
  2. import (
  3. "context"
  4. "errors"
  5. "fmt"
  6. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status"
  7. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
  8. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
  9. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  10. "strconv"
  11. "sync"
  12. "github.com/zeromicro/go-zero/core/logx"
  13. )
  14. type StopAllByDeployTaskIdLogic struct {
  15. logx.Logger
  16. ctx context.Context
  17. svcCtx *svc.ServiceContext
  18. }
  19. func NewStopAllByDeployTaskIdLogic(ctx context.Context, svcCtx *svc.ServiceContext) *StopAllByDeployTaskIdLogic {
  20. return &StopAllByDeployTaskIdLogic{
  21. Logger: logx.WithContext(ctx),
  22. ctx: ctx,
  23. svcCtx: svcCtx,
  24. }
  25. }
  26. func (l *StopAllByDeployTaskIdLogic) StopAllByDeployTaskId(req *types.StopAllByDeployTaskIdReq) (resp *types.StopAllByDeployTaskIdResp, err error) {
  27. resp = &types.StopAllByDeployTaskIdResp{}
  28. id, err := strconv.ParseInt(req.Id, 10, 64)
  29. list, err := l.svcCtx.Scheduler.AiStorages.GetInstanceListByDeployTaskId(id)
  30. if err != nil {
  31. return nil, err
  32. }
  33. if len(list) == 0 {
  34. return nil, errors.New("instances are empty")
  35. }
  36. err = l.svcCtx.Scheduler.AiStorages.UpdateDeployTaskById(id)
  37. if err != nil {
  38. return nil, err
  39. }
  40. err = l.stopAll(list)
  41. if err != nil {
  42. return nil, err
  43. }
  44. return resp, nil
  45. }
  46. func (l *StopAllByDeployTaskIdLogic) stopAll(list []*models.AiInferDeployInstance) error {
  47. var wg sync.WaitGroup
  48. var errCh = make(chan interface{}, len(list))
  49. var errs []interface{}
  50. buf := make(chan bool, 2)
  51. for _, instance := range list {
  52. wg.Add(1)
  53. ins := instance
  54. buf <- true
  55. go func() {
  56. in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId)
  57. if err != nil {
  58. e := struct {
  59. errTyp uint8
  60. err error
  61. instanceName string
  62. clusterName string
  63. }{
  64. errTyp: 1,
  65. err: err,
  66. instanceName: ins.InstanceName,
  67. clusterName: ins.ClusterName,
  68. }
  69. errCh <- e
  70. wg.Done()
  71. <-buf
  72. return
  73. }
  74. if status.CheckRunningStatus(in) {
  75. success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StopInferDeployInstance(l.ctx, ins.InstanceId)
  76. if !success {
  77. e := struct {
  78. errTyp uint8
  79. err error
  80. instanceName string
  81. clusterName string
  82. }{
  83. errTyp: 2,
  84. err: err,
  85. instanceName: ins.InstanceName,
  86. clusterName: ins.ClusterName,
  87. }
  88. errCh <- e
  89. wg.Done()
  90. <-buf
  91. return
  92. }
  93. }
  94. wg.Done()
  95. <-buf
  96. }()
  97. }
  98. wg.Wait()
  99. close(errCh)
  100. for e := range errCh {
  101. errs = append(errs, e)
  102. }
  103. if len(errs) != 0 {
  104. var msg string
  105. for _, err := range errs {
  106. e := (err).(struct {
  107. errTyp uint8
  108. err error
  109. instanceName string
  110. clusterName string
  111. })
  112. switch e.errTyp {
  113. case 1:
  114. msg += fmt.Sprintf("GetInstance Failed # clusterName: %v , instanceName: %v , error: %v \n", e.clusterName, e.instanceName, e.err.Error())
  115. case 2:
  116. msg += fmt.Sprintf("StopInstance Failed # clusterName: %v , instanceName: %v , error: %v \n", e.clusterName, e.instanceName, e.err.Error())
  117. }
  118. }
  119. return errors.New(msg)
  120. }
  121. return nil
  122. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.