You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

scheduleruntasklogic.go 6.1 kB

11 months ago
11 months ago
11 months ago
10 months ago
11 months ago
11 months ago
11 months ago
10 months ago
11 months ago
11 months ago
10 months ago
11 months ago
11 months ago
11 months ago
11 months ago
11 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. package schedule
  2. import (
  3. "context"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "github.com/zeromicro/go-zero/core/logx"
  8. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler"
  9. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers"
  10. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option"
  11. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/strategy"
  12. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
  13. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
  14. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
  15. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  16. "gopkg.in/yaml.v2"
  17. "strings"
  18. )
  19. type ScheduleRunTaskLogic struct {
  20. logx.Logger
  21. ctx context.Context
  22. svcCtx *svc.ServiceContext
  23. }
  24. func NewScheduleRunTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ScheduleRunTaskLogic {
  25. return &ScheduleRunTaskLogic{
  26. Logger: logx.WithContext(ctx),
  27. ctx: ctx,
  28. svcCtx: svcCtx,
  29. }
  30. }
  31. func (l *ScheduleRunTaskLogic) ScheduleRunTask(req *types.RunTaskReq) (resp *types.RunTaskResp, err error) {
  32. // find task
  33. task, err := l.svcCtx.Scheduler.AiStorages.GetTaskById(req.TaskID)
  34. if err != nil {
  35. return nil, err
  36. }
  37. if task == nil {
  38. return nil, errors.New("task not found ")
  39. }
  40. if task.Status == constants.Cancelled {
  41. return nil, errors.New("task has been cancelled ")
  42. }
  43. var clusters []*strategy.AssignedCluster
  44. err = yaml.Unmarshal([]byte(task.YamlString), &clusters)
  45. if err != nil {
  46. return nil, err
  47. }
  48. opt := &option.AiOption{
  49. AdapterId: ADAPTERID,
  50. TaskName: task.Name,
  51. }
  52. // update assignedClusters
  53. err = updateClustersByScheduledDatas(task.Id, &clusters, req.ScheduledDatas)
  54. if err != nil {
  55. return nil, err
  56. }
  57. aiSchdl, err := schedulers.NewAiScheduler(l.ctx, "", l.svcCtx.Scheduler, opt)
  58. if err != nil {
  59. return nil, err
  60. }
  61. results, err := l.svcCtx.Scheduler.AssignAndSchedule(aiSchdl, scheduler.SUBMIT_MODE_STORAGE_SCHEDULE, clusters)
  62. if err != nil {
  63. return nil, err
  64. }
  65. rs := (results).([]*schedulers.AiResult)
  66. err = l.SaveResult(task, rs, opt)
  67. if err != nil {
  68. return nil, err
  69. }
  70. return
  71. }
  72. func (l *ScheduleRunTaskLogic) SaveResult(task *models.Task, results []*schedulers.AiResult, opt *option.AiOption) error {
  73. for _, r := range results {
  74. opt.ComputeCard = strings.ToUpper(r.Card)
  75. adapterName, err := l.svcCtx.Scheduler.AiStorages.GetAdapterNameById(r.AdapterId)
  76. if err != nil {
  77. return err
  78. }
  79. clusterName, _ := l.svcCtx.Scheduler.AiStorages.GetClusterNameById(r.ClusterId)
  80. err = l.svcCtx.Scheduler.AiStorages.SaveAiTask(task.Id, opt, adapterName, r.ClusterId, clusterName, r.JobId, constants.Saved, r.Msg)
  81. if err != nil {
  82. return err
  83. }
  84. l.svcCtx.Scheduler.AiStorages.AddNoticeInfo(r.AdapterId, adapterName, r.ClusterId, clusterName, r.TaskName, "create", "任务创建中")
  85. }
  86. return nil
  87. }
  88. func updateClustersByScheduledDatas(taskId int64, assignedClusters *[]*strategy.AssignedCluster, scheduledDatas []*types.DataScheduleResults) error {
  89. for _, cluster := range *assignedClusters {
  90. for _, data := range scheduledDatas {
  91. switch data.DataType {
  92. case "dataset":
  93. for _, result := range data.Results {
  94. if !result.Status {
  95. continue
  96. }
  97. for _, c := range result.Clusters {
  98. if cluster.ClusterId == c.ClusterID {
  99. if c.JsonData == "" {
  100. continue
  101. }
  102. jsonData := struct {
  103. Name string `json:"name"`
  104. Id string `json:"id"`
  105. }{}
  106. err := json.Unmarshal([]byte(c.JsonData), &jsonData)
  107. if err != nil {
  108. return fmt.Errorf("jsonData convert failed, task %d, cluster %s, datatype %s", taskId, cluster.ClusterId, "dataset")
  109. }
  110. cluster.DatasetId = jsonData.Id
  111. }
  112. }
  113. }
  114. case "image":
  115. for _, result := range data.Results {
  116. if !result.Status {
  117. continue
  118. }
  119. for _, c := range result.Clusters {
  120. if cluster.ClusterId == c.ClusterID {
  121. if c.JsonData == "" {
  122. continue
  123. }
  124. jsonData := struct {
  125. Name string `json:"name"`
  126. Id string `json:"id"`
  127. }{}
  128. err := json.Unmarshal([]byte(c.JsonData), &jsonData)
  129. if err != nil {
  130. return fmt.Errorf("jsonData convert failed, task %d, cluster %s, datatype %s", taskId, cluster.ClusterId, "image")
  131. }
  132. cluster.ImageId = jsonData.Id
  133. }
  134. }
  135. }
  136. case "code":
  137. for _, result := range data.Results {
  138. if !result.Status {
  139. continue
  140. }
  141. for _, c := range result.Clusters {
  142. if cluster.ClusterId == c.ClusterID {
  143. if c.JsonData == "" {
  144. continue
  145. }
  146. jsonData := struct {
  147. Name string `json:"name"`
  148. Id string `json:"id"`
  149. }{}
  150. err := json.Unmarshal([]byte(c.JsonData), &jsonData)
  151. if err != nil {
  152. return fmt.Errorf("jsonData convert failed, task %d, cluster %s, datatype %s", taskId, cluster.ClusterId, "code")
  153. }
  154. cluster.CodeId = jsonData.Id
  155. }
  156. }
  157. }
  158. case "model":
  159. for _, result := range data.Results {
  160. if !result.Status {
  161. continue
  162. }
  163. for _, c := range result.Clusters {
  164. if cluster.ClusterId == c.ClusterID {
  165. if c.JsonData == "" {
  166. continue
  167. }
  168. jsonData := struct {
  169. Name string `json:"name"`
  170. Id string `json:"id"`
  171. }{}
  172. err := json.Unmarshal([]byte(c.JsonData), &jsonData)
  173. if err != nil {
  174. return fmt.Errorf("jsonData convert failed, task %d, cluster %s, datatype %s", taskId, cluster.ClusterId, "model")
  175. }
  176. cluster.ModelId = jsonData.Id
  177. }
  178. }
  179. }
  180. }
  181. }
  182. }
  183. for _, cluster := range *assignedClusters {
  184. if cluster.DatasetId == "" {
  185. return fmt.Errorf("failed to run task %d, cluster %s cannot find %s", taskId, cluster.ClusterId, "DatasetId")
  186. }
  187. if cluster.ImageId == "" {
  188. return fmt.Errorf("failed to run task %d, cluster %s cannot find %s", taskId, cluster.ClusterId, "ImageId")
  189. }
  190. if cluster.CodeId == "" {
  191. return fmt.Errorf("failed to run task %d, cluster %s cannot find %s", taskId, cluster.ClusterId, "CodeId")
  192. }
  193. }
  194. return nil
  195. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.