You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

scheduler.go 6.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. /*
  2. Copyright (c) [2023] [pcm]
  3. [pcm-coordinator] is licensed under Mulan PSL v2.
  4. You can use this software according to the terms and conditions of the Mulan PSL v2.
  5. You may obtain a copy of Mulan PSL v2 at:
  6. http://license.coscl.org.cn/MulanPSL2
  7. THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
  8. EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
  9. MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
  10. See the Mulan PSL v2 for more details.
  11. */
  12. package scheduler
  13. import (
  14. "encoding/json"
  15. "fmt"
  16. "github.com/pkg/errors"
  17. "github.com/zeromicro/go-zero/core/logx"
  18. "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response"
  19. "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algo"
  20. tool "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
  21. "gitlink.org.cn/jcce-pcm/pcm-coordinator/rpc/client/participantservice"
  22. "gorm.io/gorm"
  23. "math/rand"
  24. "strconv"
  25. "strings"
  26. "time"
  27. )
  28. type scheduler struct {
  29. task *response.TaskInfo
  30. participantIds []int64
  31. scheduleService scheduleService
  32. dbEngin *gorm.DB
  33. result map[int64]string //pID:子任务yamlstring 键值对
  34. participantRpc participantservice.ParticipantService
  35. }
  36. func NewScheduler(scheduleService scheduleService, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*scheduler, error) {
  37. var task *response.TaskInfo
  38. err := json.Unmarshal([]byte(val), &task)
  39. if err != nil {
  40. return nil, errors.New("create scheduler failed : " + err.Error())
  41. }
  42. return &scheduler{task: task, scheduleService: scheduleService, dbEngin: dbEngin, participantRpc: participantRpc, result: make(map[int64]string, 0)}, nil
  43. }
  44. func (s *scheduler) SpecifyClusters() {
  45. // 如果已指定集群名,通过数据库查询后返回p端ip列表
  46. if len(s.task.Clusters) != 0 {
  47. s.dbEngin.Raw("select id from sc_participant_phy_info where `name` in (?)", s.task.Clusters).Scan(&s.participantIds)
  48. return
  49. }
  50. }
  51. func (s *scheduler) SpecifyNsID() {
  52. // 未指定集群名,只指定nsID
  53. if len(s.task.Clusters) == 0 {
  54. if len(s.task.NsID) != 0 {
  55. var clusters string
  56. s.dbEngin.Raw("select clusters from sc_tenant_info where `tenant_name` = ?", s.task.NsID).Scan(&clusters)
  57. clusterArr := strings.Split(clusters, ",")
  58. s.dbEngin.Raw("select id from sc_participant_phy_info where `name` in (?)", clusterArr).Scan(&s.participantIds)
  59. }
  60. } else {
  61. return
  62. }
  63. }
  64. func (s *scheduler) MatchLabels() {
  65. var ids []int64
  66. count := 0
  67. // 集群和nsID都未指定,则通过标签匹配
  68. if len(s.task.Clusters) == 0 && len(s.task.NsID) == 0 {
  69. //如果集群列表或nsID均未指定
  70. for key := range s.task.MatchLabels {
  71. var participantIds []int64
  72. s.dbEngin.Raw("select participant_id from sc_participant_label_info where `key` = ? and value = ?", key, s.task.MatchLabels[key]).Scan(&participantIds)
  73. if count == 0 {
  74. ids = participantIds
  75. }
  76. ids = intersect(ids, participantIds)
  77. count++
  78. }
  79. s.participantIds = ids
  80. } else {
  81. return
  82. }
  83. }
  84. // TempAssign todo 屏蔽原调度算法
  85. func (s *scheduler) TempAssign() error {
  86. //需要判断task中的资源类型,针对metadata中的多个kind做不同处理
  87. //输入副本数和集群列表,最终结果输出为pID对应副本数量列表,针对多个kind需要做拆分和重新拼接组合
  88. var resources []interface{}
  89. tool.Convert(s.task.Metadata, &resources)
  90. for _, resource := range resources {
  91. //如果是Deployment,需要对副本数做分发
  92. if resource.(map[string]interface{})["kind"].(string) == "Deployment" || resource.(map[string]interface{})["kind"].(string) == "Replicaset" {
  93. //replicas := int(resource.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64))
  94. rand.Seed(time.Now().UnixNano())
  95. //// 生成pID对应副本数 数组
  96. //arrReplica := make(map[int64]int, len(s.participantIds))
  97. //for i := 0; i < len(s.participantIds)-1; i++ {
  98. // arrReplica[s.participantIds[i]] = rand.Intn(replicas)
  99. // replicas -= arrReplica[s.participantIds[i]] // 更新剩余的和
  100. //}
  101. //arrReplica[s.participantIds[len(s.participantIds)-1]] = replicas
  102. //
  103. ////将副本数依次写入新的yaml中并生成result数据
  104. //yamlArray := make(map[int64]string, len(s.participantIds))
  105. //
  106. //for i := 0; i < len(s.participantIds)-1; i++ {
  107. // //调整yaml
  108. // yamlArray[s.participantIds[i]] = "sds"
  109. //}
  110. }
  111. s.result[s.participantIds[0]] = ""
  112. }
  113. //var crd = s.task.Metadata
  114. //for i := 0; i < len(s.task.Metadata); i++ {
  115. //
  116. //}
  117. return nil
  118. }
  119. func (s *scheduler) AssignAndSchedule() error {
  120. // 已指定 ParticipantId
  121. if s.task.ParticipantId != 0 {
  122. return nil
  123. }
  124. // 标签匹配以及后,未找到ParticipantIds
  125. if len(s.participantIds) == 0 {
  126. return errors.New("未找到匹配的ParticipantIds")
  127. }
  128. // 指定或者标签匹配的结果只有一个集群,给任务信息指定
  129. if len(s.participantIds) == 1 {
  130. s.task.ParticipantId = s.participantIds[0]
  131. replicas := s.task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64)
  132. result := make(map[int64]string)
  133. result[s.participantIds[0]] = strconv.FormatFloat(replicas, 'f', 2, 64)
  134. s.result = result
  135. return nil
  136. }
  137. //生成算法所需参数
  138. task, providerList, err := s.obtainParamsforStrategy()
  139. if err != nil {
  140. return err
  141. }
  142. //集群数量不满足,指定到标签匹配后第一个集群
  143. if len(providerList) < 2 {
  144. s.task.ParticipantId = s.participantIds[0]
  145. return nil
  146. }
  147. //调度算法
  148. strategy, err := s.scheduleService.pickOptimalStrategy(task, providerList...)
  149. if err != nil {
  150. return err
  151. }
  152. //调度结果
  153. err = s.assignReplicasToResult(strategy, providerList)
  154. if err != nil {
  155. return err
  156. }
  157. return nil
  158. }
  159. func (s *scheduler) SaveToDb() error {
  160. for key, value := range s.result {
  161. num, err := strconv.Atoi(value)
  162. if err != nil {
  163. fmt.Println("转换失败:", err)
  164. }
  165. structForDb, err := s.scheduleService.getNewStructForDb(s.task, int64(key), int32(num))
  166. if err != nil {
  167. return err
  168. }
  169. tx := s.dbEngin.Create(structForDb)
  170. if tx.Error != nil {
  171. logx.Error(tx.Error)
  172. return tx.Error
  173. }
  174. }
  175. return nil
  176. }
  177. func (s *scheduler) obtainParamsforStrategy() (*algo.Task, []*algo.Provider, error) {
  178. task, providerList := s.scheduleService.genTaskAndProviders(s.task, s.dbEngin)
  179. if len(providerList) == 0 {
  180. return nil, nil, errors.New("获取集群失败")
  181. }
  182. return task, providerList, nil
  183. }
  184. func (s *scheduler) assignReplicasToResult(strategy *algo.Strategy, providerList []*algo.Provider) error {
  185. if len(strategy.Tasksolution) == 0 {
  186. return errors.New("调度失败, 未能获取调度结果")
  187. }
  188. for i, e := range strategy.Tasksolution {
  189. if e == 0 {
  190. continue
  191. }
  192. s.result[providerList[i].Pid] = string(e)
  193. }
  194. if len(s.result) == 0 {
  195. return errors.New("可用集群为空")
  196. }
  197. return nil
  198. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.