You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

deployInstance.go 5.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. package status
  2. import (
  3. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference"
  4. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink"
  5. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
  6. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
  7. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  8. "net/http"
  9. "strconv"
  10. "time"
  11. )
  12. func UpdateDeployInstanceStatusBatch(svc *svc.ServiceContext, insList []*models.AiInferDeployInstance, needfilter bool) {
  13. list := make([]*models.AiInferDeployInstance, len(insList))
  14. copy(list, insList)
  15. if needfilter {
  16. for i := len(list) - 1; i >= 0; i-- {
  17. if list[i].Status == constants.Running || list[i].Status == constants.Stopped || list[i].Status == constants.Failed {
  18. list = append(list[:i], list[i+1:]...)
  19. }
  20. }
  21. }
  22. if len(list) == 0 {
  23. return
  24. }
  25. buffer := make(chan bool, 3)
  26. for _, instance := range list {
  27. buffer <- true
  28. go UpdateDeployInstanceStatus(svc, instance, false, buffer)
  29. }
  30. }
  31. func UpdateDeployTaskStatus(svc *svc.ServiceContext) {
  32. list, err := svc.Scheduler.AiStorages.GetAllDeployTasks()
  33. if err != nil {
  34. return
  35. }
  36. ins := list[0]
  37. for i := range list {
  38. uTime, _ := time.Parse(time.RFC3339, ins.UpdateTime)
  39. latest, _ := time.Parse(time.RFC3339, list[i].UpdateTime)
  40. if latest.After(uTime) {
  41. ins = list[i]
  42. }
  43. }
  44. inslist, err := svc.Scheduler.AiStorages.GetInstanceListByDeployTaskId(ins.Id)
  45. if err != nil {
  46. return
  47. }
  48. buffer := make(chan bool, 2)
  49. for _, instance := range inslist {
  50. buffer <- true
  51. go UpdateDeployInstanceStatus(svc, instance, false, buffer)
  52. }
  53. }
  54. func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInferDeployInstance, updatetime bool, ch chan bool) {
  55. amap, found := svc.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(instance.AdapterId, 10)]
  56. if !found {
  57. if ch != nil {
  58. <-ch
  59. return
  60. }
  61. return
  62. }
  63. cmap, found := amap[strconv.FormatInt(instance.ClusterId, 10)]
  64. if !found {
  65. if ch != nil {
  66. <-ch
  67. return
  68. }
  69. return
  70. }
  71. h := http.Request{}
  72. ins, err := cmap.GetInferDeployInstance(h.Context(), instance.InstanceId)
  73. if err != nil {
  74. if ch != nil {
  75. <-ch
  76. return
  77. }
  78. return
  79. }
  80. switch instance.ClusterType {
  81. case storeLink.TYPE_OCTOPUS:
  82. switch ins.Status {
  83. case "running":
  84. if instance.Status == constants.Running {
  85. if ch != nil {
  86. <-ch
  87. return
  88. }
  89. return
  90. }
  91. instance.Status = constants.Running
  92. case "stopped":
  93. if instance.Status == constants.Stopped {
  94. if ch != nil {
  95. <-ch
  96. return
  97. }
  98. return
  99. }
  100. instance.Status = constants.Stopped
  101. default:
  102. instance.Status = ins.Status
  103. }
  104. case storeLink.TYPE_MODELARTS:
  105. switch ins.Status {
  106. case "running":
  107. if instance.Status == constants.Running {
  108. if ch != nil {
  109. <-ch
  110. return
  111. }
  112. return
  113. }
  114. instance.Status = constants.Running
  115. case "stopped":
  116. if instance.Status == constants.Stopped {
  117. if ch != nil {
  118. <-ch
  119. return
  120. }
  121. return
  122. }
  123. instance.Status = constants.Stopped
  124. default:
  125. instance.Status = ins.Status
  126. }
  127. case storeLink.TYPE_SHUGUANGAI:
  128. switch ins.Status {
  129. case "Running":
  130. if instance.Status == constants.Running {
  131. if ch != nil {
  132. <-ch
  133. return
  134. }
  135. return
  136. }
  137. instance.Status = constants.Running
  138. case "Terminated":
  139. if instance.Status == constants.Stopped {
  140. if ch != nil {
  141. <-ch
  142. return
  143. }
  144. return
  145. }
  146. instance.Status = constants.Stopped
  147. default:
  148. instance.Status = ins.Status
  149. }
  150. case storeLink.TYPE_OPENI:
  151. switch ins.Status {
  152. case "RUNNING":
  153. if instance.Status == constants.Running {
  154. if ch != nil {
  155. <-ch
  156. return
  157. }
  158. return
  159. }
  160. instance.Status = constants.Running
  161. case "STOPPED":
  162. if instance.Status == constants.Stopped {
  163. if ch != nil {
  164. <-ch
  165. return
  166. }
  167. return
  168. }
  169. instance.Status = constants.Stopped
  170. case "CREATED_FAILED":
  171. if instance.Status == constants.Failed {
  172. if ch != nil {
  173. <-ch
  174. return
  175. }
  176. return
  177. }
  178. instance.Status = constants.Failed
  179. default:
  180. instance.Status = ins.Status
  181. }
  182. }
  183. err = svc.Scheduler.AiStorages.UpdateInferDeployInstance(instance, updatetime)
  184. if err != nil {
  185. if ch != nil {
  186. <-ch
  187. return
  188. }
  189. return
  190. }
  191. if ch != nil {
  192. <-ch
  193. return
  194. }
  195. }
  196. func UpdateAutoStoppedInstance(svc *svc.ServiceContext) {
  197. list, err := svc.Scheduler.AiStorages.GetInferDeployInstanceList()
  198. if err != nil {
  199. return
  200. }
  201. if len(list) == 0 {
  202. return
  203. }
  204. UpdateDeployInstanceStatusBatch(svc, list, false)
  205. }
  206. func CheckStopStatus(in *inference.DeployInstance) bool {
  207. switch in.ClusterType {
  208. case storeLink.TYPE_OCTOPUS:
  209. switch in.Status {
  210. case "stopped":
  211. return true
  212. default:
  213. return false
  214. }
  215. case storeLink.TYPE_MODELARTS:
  216. switch in.Status {
  217. case "stopped":
  218. return true
  219. default:
  220. return false
  221. }
  222. case storeLink.TYPE_SHUGUANGAI:
  223. switch in.Status {
  224. case "Terminated":
  225. return true
  226. default:
  227. return false
  228. }
  229. default:
  230. return false
  231. }
  232. }
  233. func CheckRunningStatus(in *inference.DeployInstance) bool {
  234. switch in.ClusterType {
  235. case storeLink.TYPE_OCTOPUS:
  236. switch in.Status {
  237. case "running":
  238. return true
  239. default:
  240. return false
  241. }
  242. case storeLink.TYPE_MODELARTS:
  243. switch in.Status {
  244. case "running":
  245. return true
  246. default:
  247. return false
  248. }
  249. case storeLink.TYPE_SHUGUANGAI:
  250. switch in.Status {
  251. case "Running":
  252. return true
  253. default:
  254. return false
  255. }
  256. default:
  257. return false
  258. }
  259. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.