You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

deployInstance.go 4.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. package status
  2. import (
  3. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference"
  4. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink"
  5. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
  6. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
  7. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  8. "net/http"
  9. "strconv"
  10. "time"
  11. )
  12. func UpdateDeployInstanceStatusBatch(svc *svc.ServiceContext, insList []*models.AiInferDeployInstance, needfilter bool) {
  13. list := make([]*models.AiInferDeployInstance, len(insList))
  14. copy(list, insList)
  15. if needfilter {
  16. for i := len(list) - 1; i >= 0; i-- {
  17. if list[i].Status == constants.Running || list[i].Status == constants.Stopped {
  18. list = append(list[:i], list[i+1:]...)
  19. }
  20. }
  21. }
  22. if len(list) == 0 {
  23. return
  24. }
  25. buffer := make(chan bool, 3)
  26. for _, instance := range list {
  27. buffer <- true
  28. go UpdateDeployInstanceStatus(svc, instance, false, buffer)
  29. }
  30. }
  31. func UpdateDeployTaskStatus(svc *svc.ServiceContext) {
  32. list, err := svc.Scheduler.AiStorages.GetAllDeployTasks()
  33. if err != nil {
  34. return
  35. }
  36. ins := list[0]
  37. for i := range list {
  38. uTime, _ := time.Parse(time.RFC3339, ins.UpdateTime)
  39. latest, _ := time.Parse(time.RFC3339, list[i].UpdateTime)
  40. if latest.After(uTime) {
  41. ins = list[i]
  42. }
  43. }
  44. inslist, err := svc.Scheduler.AiStorages.GetInstanceListByDeployTaskId(ins.Id)
  45. if err != nil {
  46. return
  47. }
  48. buffer := make(chan bool, 2)
  49. for _, instance := range inslist {
  50. buffer <- true
  51. go UpdateDeployInstanceStatus(svc, instance, false, buffer)
  52. }
  53. }
  54. func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInferDeployInstance, updatetime bool, ch chan bool) {
  55. amap, found := svc.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(instance.AdapterId, 10)]
  56. if !found {
  57. if ch != nil {
  58. <-ch
  59. return
  60. }
  61. return
  62. }
  63. cmap, found := amap[strconv.FormatInt(instance.ClusterId, 10)]
  64. if !found {
  65. if ch != nil {
  66. <-ch
  67. return
  68. }
  69. return
  70. }
  71. h := http.Request{}
  72. ins, err := cmap.GetInferDeployInstance(h.Context(), instance.InstanceId)
  73. if err != nil {
  74. if ch != nil {
  75. <-ch
  76. return
  77. }
  78. return
  79. }
  80. switch instance.ClusterType {
  81. case storeLink.TYPE_OCTOPUS:
  82. switch ins.Status {
  83. case "running":
  84. if instance.Status == constants.Running {
  85. if ch != nil {
  86. <-ch
  87. return
  88. }
  89. return
  90. }
  91. instance.Status = constants.Running
  92. case "stopped":
  93. if instance.Status == constants.Stopped {
  94. if ch != nil {
  95. <-ch
  96. return
  97. }
  98. return
  99. }
  100. instance.Status = constants.Stopped
  101. default:
  102. instance.Status = ins.Status
  103. }
  104. case storeLink.TYPE_MODELARTS:
  105. switch ins.Status {
  106. case "running":
  107. if instance.Status == constants.Running {
  108. if ch != nil {
  109. <-ch
  110. return
  111. }
  112. return
  113. }
  114. instance.Status = constants.Running
  115. case "stopped":
  116. if instance.Status == constants.Stopped {
  117. if ch != nil {
  118. <-ch
  119. return
  120. }
  121. return
  122. }
  123. instance.Status = constants.Stopped
  124. default:
  125. instance.Status = ins.Status
  126. }
  127. case storeLink.TYPE_SHUGUANGAI:
  128. switch ins.Status {
  129. case "Running":
  130. if instance.Status == constants.Running {
  131. if ch != nil {
  132. <-ch
  133. return
  134. }
  135. return
  136. }
  137. instance.Status = constants.Running
  138. case "Terminated":
  139. if instance.Status == constants.Stopped {
  140. if ch != nil {
  141. <-ch
  142. return
  143. }
  144. return
  145. }
  146. instance.Status = constants.Stopped
  147. default:
  148. instance.Status = ins.Status
  149. }
  150. }
  151. err = svc.Scheduler.AiStorages.UpdateInferDeployInstance(instance, updatetime)
  152. if err != nil {
  153. if ch != nil {
  154. <-ch
  155. return
  156. }
  157. return
  158. }
  159. if ch != nil {
  160. <-ch
  161. return
  162. }
  163. }
  164. func UpdateAutoStoppedInstance(svc *svc.ServiceContext) {
  165. list, err := svc.Scheduler.AiStorages.GetInferDeployInstanceList()
  166. if err != nil {
  167. return
  168. }
  169. if len(list) == 0 {
  170. return
  171. }
  172. UpdateDeployInstanceStatusBatch(svc, list, false)
  173. }
  174. func CheckStopStatus(in *inference.DeployInstance) bool {
  175. switch in.ClusterType {
  176. case storeLink.TYPE_OCTOPUS:
  177. switch in.Status {
  178. case "stopped":
  179. return true
  180. default:
  181. return false
  182. }
  183. case storeLink.TYPE_MODELARTS:
  184. switch in.Status {
  185. case "stopped":
  186. return true
  187. default:
  188. return false
  189. }
  190. case storeLink.TYPE_SHUGUANGAI:
  191. switch in.Status {
  192. case "Terminated":
  193. return true
  194. default:
  195. return false
  196. }
  197. default:
  198. return false
  199. }
  200. }
  201. func CheckRunningStatus(in *inference.DeployInstance) bool {
  202. switch in.ClusterType {
  203. case storeLink.TYPE_OCTOPUS:
  204. switch in.Status {
  205. case "running":
  206. return true
  207. default:
  208. return false
  209. }
  210. case storeLink.TYPE_MODELARTS:
  211. switch in.Status {
  212. case "running":
  213. return true
  214. default:
  215. return false
  216. }
  217. case storeLink.TYPE_SHUGUANGAI:
  218. switch in.Status {
  219. case "Running":
  220. return true
  221. default:
  222. return false
  223. }
  224. default:
  225. return false
  226. }
  227. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.