You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

deployInstance.go 4.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. package status
  2. import (
  3. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference"
  4. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink"
  5. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
  6. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
  7. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  8. "net/http"
  9. "strconv"
  10. "time"
  11. )
  12. func UpdateDeployInstanceStatusBatch(svc *svc.ServiceContext, insList []*models.AiInferDeployInstance, needfilter bool) {
  13. list := make([]*models.AiInferDeployInstance, len(insList))
  14. copy(list, insList)
  15. if needfilter {
  16. for i := len(list) - 1; i >= 0; i-- {
  17. if list[i].Status == constants.Running || list[i].Status == constants.Stopped {
  18. list = append(list[:i], list[i+1:]...)
  19. }
  20. }
  21. }
  22. if len(list) == 0 {
  23. return
  24. }
  25. for _, instance := range list {
  26. go UpdateDeployInstanceStatus(svc, instance, false)
  27. }
  28. }
  29. func UpdateDeployTaskStatus(svc *svc.ServiceContext) {
  30. list, err := svc.Scheduler.AiStorages.GetAllDeployTasks()
  31. if err != nil {
  32. return
  33. }
  34. ins := list[0]
  35. for i := range list {
  36. uTime, _ := time.Parse(time.RFC3339, ins.UpdateTime)
  37. latest, _ := time.Parse(time.RFC3339, list[i].UpdateTime)
  38. if latest.After(uTime) {
  39. ins = list[i]
  40. }
  41. }
  42. inslist, err := svc.Scheduler.AiStorages.GetInstanceListByDeployTaskId(ins.Id)
  43. if err != nil {
  44. return
  45. }
  46. for _, instance := range inslist {
  47. go UpdateDeployInstanceStatus(svc, instance, false)
  48. }
  49. }
  50. func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInferDeployInstance, updatetime bool) {
  51. amap, found := svc.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(instance.AdapterId, 10)]
  52. if !found {
  53. return
  54. }
  55. cmap, found := amap[strconv.FormatInt(instance.ClusterId, 10)]
  56. if !found {
  57. return
  58. }
  59. h := http.Request{}
  60. ins, err := cmap.GetInferDeployInstance(h.Context(), instance.InstanceId)
  61. if err != nil {
  62. return
  63. }
  64. switch instance.ClusterType {
  65. case storeLink.TYPE_OCTOPUS:
  66. switch ins.Status {
  67. case "running":
  68. if instance.Status == constants.Running {
  69. return
  70. }
  71. instance.Status = constants.Running
  72. case "stopped":
  73. if instance.Status == constants.Stopped {
  74. return
  75. }
  76. instance.Status = constants.Stopped
  77. default:
  78. instance.Status = ins.Status
  79. }
  80. case storeLink.TYPE_MODELARTS:
  81. switch ins.Status {
  82. case "running":
  83. if instance.Status == constants.Running {
  84. return
  85. }
  86. instance.Status = constants.Running
  87. case "stopped":
  88. if instance.Status == constants.Stopped {
  89. return
  90. }
  91. instance.Status = constants.Stopped
  92. default:
  93. instance.Status = ins.Status
  94. }
  95. case storeLink.TYPE_SHUGUANGAI:
  96. switch ins.Status {
  97. case "Running":
  98. if instance.Status == constants.Running {
  99. return
  100. }
  101. instance.Status = constants.Running
  102. case "Terminated":
  103. if instance.Status == constants.Stopped {
  104. return
  105. }
  106. instance.Status = constants.Stopped
  107. default:
  108. instance.Status = ins.Status
  109. }
  110. }
  111. err = svc.Scheduler.AiStorages.UpdateInferDeployInstance(instance, updatetime)
  112. if err != nil {
  113. return
  114. }
  115. }
  116. func UpdateAutoStoppedInstance(svc *svc.ServiceContext) {
  117. list, err := svc.Scheduler.AiStorages.GetInferDeployInstanceList()
  118. if err != nil {
  119. return
  120. }
  121. if len(list) == 0 {
  122. return
  123. }
  124. UpdateDeployInstanceStatusBatch(svc, list, false)
  125. }
  126. func CheckStopStatus(in *inference.DeployInstance) bool {
  127. switch in.ClusterType {
  128. case storeLink.TYPE_OCTOPUS:
  129. switch in.Status {
  130. case "stopped":
  131. return true
  132. default:
  133. return false
  134. }
  135. case storeLink.TYPE_MODELARTS:
  136. switch in.Status {
  137. case "stopped":
  138. return true
  139. default:
  140. return false
  141. }
  142. case storeLink.TYPE_SHUGUANGAI:
  143. switch in.Status {
  144. case "Terminated":
  145. return true
  146. default:
  147. return false
  148. }
  149. default:
  150. return false
  151. }
  152. }
  153. func CheckRunningStatus(in *inference.DeployInstance) bool {
  154. switch in.ClusterType {
  155. case storeLink.TYPE_OCTOPUS:
  156. switch in.Status {
  157. case "running":
  158. return true
  159. default:
  160. return false
  161. }
  162. case storeLink.TYPE_MODELARTS:
  163. switch in.Status {
  164. case "running":
  165. return true
  166. default:
  167. return false
  168. }
  169. case storeLink.TYPE_SHUGUANGAI:
  170. switch in.Status {
  171. case "Running":
  172. return true
  173. default:
  174. return false
  175. }
  176. default:
  177. return false
  178. }
  179. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.