You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

aiStorage.go 8.1 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. package database
  2. import (
  3. "github.com/zeromicro/go-zero/core/logx"
  4. "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
  5. "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
  6. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
  7. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  8. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
  9. "gorm.io/gorm"
  10. "strconv"
  11. "time"
  12. )
  13. type AiStorage struct {
  14. DbEngin *gorm.DB
  15. }
  16. func (s *AiStorage) GetParticipants() (*types.ClusterListResp, error) {
  17. var resp types.ClusterListResp
  18. tx := s.DbEngin.Raw("select * from t_cluster where `deleted_at` IS NULL ORDER BY create_time Desc").Scan(&resp.List)
  19. if tx.Error != nil {
  20. logx.Errorf(tx.Error.Error())
  21. return nil, tx.Error
  22. }
  23. return &resp, nil
  24. }
  25. func (s *AiStorage) GetClustersByAdapterId(id string) (*types.ClusterListResp, error) {
  26. var resp types.ClusterListResp
  27. tx := s.DbEngin.Raw("select * from t_cluster where `deleted_at` IS NULL and `adapter_id` = ? ORDER BY create_time Desc", id).Scan(&resp.List)
  28. if tx.Error != nil {
  29. logx.Errorf(tx.Error.Error())
  30. return nil, tx.Error
  31. }
  32. return &resp, nil
  33. }
  34. func (s *AiStorage) GetClusterNameById(id string) (string, error) {
  35. var name string
  36. tx := s.DbEngin.Raw("select `description` from t_cluster where `id` = ?", id).Scan(&name)
  37. if tx.Error != nil {
  38. logx.Errorf(tx.Error.Error())
  39. return "", tx.Error
  40. }
  41. return name, nil
  42. }
  43. func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) {
  44. var list []types.AdapterInfo
  45. var ids []string
  46. db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
  47. db = db.Where("type = ?", adapterType)
  48. err := db.Order("create_time desc").Find(&list).Error
  49. if err != nil {
  50. return nil, err
  51. }
  52. for _, info := range list {
  53. ids = append(ids, info.Id)
  54. }
  55. return ids, nil
  56. }
  57. func (s *AiStorage) GetAdaptersByType(adapterType string) ([]*types.AdapterInfo, error) {
  58. var list []*types.AdapterInfo
  59. db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
  60. db = db.Where("type = ?", adapterType)
  61. err := db.Order("create_time desc").Find(&list).Error
  62. if err != nil {
  63. return nil, err
  64. }
  65. return list, nil
  66. }
  67. func (s *AiStorage) GetAiTasksByAdapterId(adapterId string) ([]*models.TaskAi, error) {
  68. var resp []*models.TaskAi
  69. db := s.DbEngin.Model(&models.TaskAi{}).Table("task_ai")
  70. db = db.Where("adapter_id = ?", adapterId)
  71. err := db.Order("commit_time desc").Find(&resp).Error
  72. if err != nil {
  73. return nil, err
  74. }
  75. return resp, nil
  76. }
  77. func (s *AiStorage) SaveTask(name string, strategyCode int64, synergyStatus int64) (int64, error) {
  78. // 构建主任务结构体
  79. taskModel := models.Task{
  80. Status: constants.Saved,
  81. Description: "ai task",
  82. Name: name,
  83. SynergyStatus: synergyStatus,
  84. Strategy: strategyCode,
  85. AdapterTypeDict: 1,
  86. CommitTime: time.Now(),
  87. }
  88. // 保存任务数据到数据库
  89. tx := s.DbEngin.Create(&taskModel)
  90. if tx.Error != nil {
  91. return 0, tx.Error
  92. }
  93. return taskModel.Id, nil
  94. }
  95. func (s *AiStorage) SaveAiTask(taskId int64, option *option.AiOption, clusterId string, clusterName string, jobId string, status string, msg string) error {
  96. // 构建主任务结构体
  97. aId, err := strconv.ParseInt(option.AdapterId, 10, 64)
  98. if err != nil {
  99. return err
  100. }
  101. cId, err := strconv.ParseInt(clusterId, 10, 64)
  102. if err != nil {
  103. return err
  104. }
  105. aiTaskModel := models.TaskAi{
  106. TaskId: taskId,
  107. AdapterId: aId,
  108. ClusterId: cId,
  109. ClusterName: clusterName,
  110. Name: option.TaskName,
  111. Replica: int64(option.Replica),
  112. JobId: jobId,
  113. TaskType: option.TaskType,
  114. Strategy: option.StrategyName,
  115. Status: status,
  116. Msg: msg,
  117. Card: option.ComputeCard,
  118. CommitTime: time.Now(),
  119. }
  120. // 保存任务数据到数据库
  121. tx := s.DbEngin.Create(&aiTaskModel)
  122. if tx.Error != nil {
  123. return tx.Error
  124. }
  125. return nil
  126. }
  127. func (s *AiStorage) SaveClusterTaskQueue(adapterId string, clusterId string, queueNum int64) error {
  128. aId, err := strconv.ParseInt(adapterId, 10, 64)
  129. if err != nil {
  130. return err
  131. }
  132. cId, err := strconv.ParseInt(clusterId, 10, 64)
  133. if err != nil {
  134. return err
  135. }
  136. taskQueue := models.TClusterTaskQueue{
  137. AdapterId: aId,
  138. ClusterId: cId,
  139. QueueNum: queueNum,
  140. }
  141. tx := s.DbEngin.Create(&taskQueue)
  142. if tx.Error != nil {
  143. return tx.Error
  144. }
  145. return nil
  146. }
  147. func (s *AiStorage) GetClusterTaskQueues(adapterId string, clusterId string) ([]*models.TClusterTaskQueue, error) {
  148. var taskQueues []*models.TClusterTaskQueue
  149. tx := s.DbEngin.Raw("select * from t_cluster_task_queue where `adapter_id` = ? and `cluster_id` = ?", adapterId, clusterId).Scan(&taskQueues)
  150. if tx.Error != nil {
  151. logx.Errorf(tx.Error.Error())
  152. return nil, tx.Error
  153. }
  154. return taskQueues, nil
  155. }
  156. func (s *AiStorage) GetAiTaskIdByClusterIdAndTaskId(clusterId string, taskId string) (string, error) {
  157. var aiTask models.TaskAi
  158. tx := s.DbEngin.Raw("select * from task_ai where `cluster_id` = ? and `task_id` = ?", clusterId, taskId).Scan(&aiTask)
  159. if tx.Error != nil {
  160. logx.Errorf(tx.Error.Error())
  161. return "", tx.Error
  162. }
  163. return aiTask.JobId, nil
  164. }
  165. func (s *AiStorage) GetClusterResourcesById(clusterId string) (*models.TClusterResource, error) {
  166. var clusterResource models.TClusterResource
  167. tx := s.DbEngin.Raw("select * from t_cluster_resource where `cluster_id` = ?", clusterId).Scan(&clusterResource)
  168. if tx.Error != nil {
  169. logx.Errorf(tx.Error.Error())
  170. return nil, tx.Error
  171. }
  172. return &clusterResource, nil
  173. }
  174. func (s *AiStorage) SaveClusterResources(clusterId string, clusterName string, clusterType int64, cpuAvail float64, cpuTotal float64,
  175. memAvail float64, memTotal float64, diskAvail float64, diskTotal float64, gpuAvail float64, gpuTotal float64, cardTotal int64, topsTotal float64) error {
  176. cId, err := strconv.ParseInt(clusterId, 10, 64)
  177. if err != nil {
  178. return err
  179. }
  180. clusterResource := models.TClusterResource{
  181. ClusterId: cId,
  182. ClusterName: clusterName,
  183. ClusterType: clusterType,
  184. CpuAvail: cpuAvail,
  185. CpuTotal: cpuTotal,
  186. MemAvail: memAvail,
  187. MemTotal: memTotal,
  188. DiskAvail: diskAvail,
  189. DiskTotal: diskTotal,
  190. GpuAvail: gpuAvail,
  191. GpuTotal: gpuTotal,
  192. CardTotal: cardTotal,
  193. CardTopsTotal: topsTotal,
  194. }
  195. tx := s.DbEngin.Create(&clusterResource)
  196. if tx.Error != nil {
  197. return tx.Error
  198. }
  199. // prometheus
  200. param := tracker.ClusterLoadRecord{
  201. ClusterName: clusterName,
  202. CpuAvail: cpuAvail,
  203. CpuTotal: cpuTotal,
  204. MemoryAvail: memAvail,
  205. MemoryTotal: memTotal,
  206. DiskAvail: diskAvail,
  207. DiskTotal: diskTotal,
  208. }
  209. tracker.SyncClusterLoad(param)
  210. return nil
  211. }
  212. func (s *AiStorage) UpdateClusterResources(clusterResource *models.TClusterResource) error {
  213. tx := s.DbEngin.Where("cluster_id = ?", clusterResource.ClusterId).Updates(clusterResource)
  214. if tx.Error != nil {
  215. return tx.Error
  216. }
  217. // prometheus
  218. param := tracker.ClusterLoadRecord{
  219. ClusterName: clusterResource.ClusterName,
  220. CpuAvail: clusterResource.CpuAvail,
  221. CpuTotal: clusterResource.CpuTotal,
  222. CpuUtilisation: clusterResource.CpuAvail / clusterResource.CpuTotal,
  223. MemoryAvail: clusterResource.MemAvail,
  224. MemoryTotal: clusterResource.MemTotal,
  225. MemoryUtilisation: clusterResource.MemAvail / clusterResource.MemTotal,
  226. DiskAvail: clusterResource.DiskAvail,
  227. DiskTotal: clusterResource.DiskTotal,
  228. DiskUtilisation: clusterResource.DiskAvail / clusterResource.DiskTotal,
  229. }
  230. tracker.SyncClusterLoad(param)
  231. return nil
  232. }
  233. func (s *AiStorage) UpdateAiTask(task *models.TaskAi) error {
  234. tx := s.DbEngin.Updates(task)
  235. if tx.Error != nil {
  236. return tx.Error
  237. }
  238. return nil
  239. }
  240. func (s *AiStorage) GetStrategyCode(name string) (int64, error) {
  241. var strategy int64
  242. sqlStr := `select t_dict_item.item_value
  243. from t_dict
  244. left join t_dict_item on t_dict.id = t_dict_item.dict_id
  245. where item_text = ?
  246. and t_dict.dict_code = 'schedule_Strategy'`
  247. //查询调度策略
  248. err := s.DbEngin.Raw(sqlStr, name).Scan(&strategy).Error
  249. if err != nil {
  250. return strategy, nil
  251. }
  252. return strategy, nil
  253. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.