You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

aiStorage.go 21 kB

3 months ago
4 months ago
3 months ago
6 months ago
3 months ago
11 months ago
4 months ago
4 months ago
4 months ago
4 months ago
3 months ago
3 months ago
10 months ago
11 months ago
4 months ago
4 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712
  1. package database
  2. import (
  3. "strconv"
  4. "time"
  5. jsoniter "github.com/json-iterator/go"
  6. "github.com/zeromicro/go-zero/core/logx"
  7. clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client"
  8. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option"
  9. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
  10. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
  11. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  12. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
  13. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
  14. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
  15. "gorm.io/gorm"
  16. )
  17. type AiStorage struct {
  18. DbEngin *gorm.DB
  19. }
  20. func (s *AiStorage) GetParticipants() (*types.ClusterListResp, error) {
  21. var resp types.ClusterListResp
  22. tx := s.DbEngin.Raw("select * from t_cluster where `deleted_at` IS NULL ORDER BY create_time Desc").Scan(&resp.List)
  23. if tx.Error != nil {
  24. logx.Errorf(tx.Error.Error())
  25. return nil, tx.Error
  26. }
  27. return &resp, nil
  28. }
  29. func (s *AiStorage) GetClustersByAdapterId(id string) (*types.ClusterListResp, error) {
  30. var resp types.ClusterListResp
  31. tx := s.DbEngin.Raw("select * from t_cluster where `deleted_at` IS NULL and `adapter_id` = ? ORDER BY create_time Desc", id).Scan(&resp.List)
  32. if tx.Error != nil {
  33. logx.Errorf(tx.Error.Error())
  34. return nil, tx.Error
  35. }
  36. return &resp, nil
  37. }
  38. func (s *AiStorage) GetClusterNameById(id string) (string, error) {
  39. var name string
  40. tx := s.DbEngin.Raw("select `description` from t_cluster where `id` = ?", id).Scan(&name)
  41. if tx.Error != nil {
  42. logx.Errorf(tx.Error.Error())
  43. return "", tx.Error
  44. }
  45. return name, nil
  46. }
  47. func (s *AiStorage) GetAdapterNameById(id string) (string, error) {
  48. var name string
  49. tx := s.DbEngin.Raw("select `name` from t_adapter where `id` = ?", id).Scan(&name)
  50. if tx.Error != nil {
  51. logx.Errorf(tx.Error.Error())
  52. return "", tx.Error
  53. }
  54. return name, nil
  55. }
  56. func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) {
  57. var list []types.AdapterInfo
  58. var ids []string
  59. db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
  60. db = db.Where("type = ?", adapterType)
  61. err := db.Order("create_time desc").Find(&list).Error
  62. if err != nil {
  63. return nil, err
  64. }
  65. for _, info := range list {
  66. ids = append(ids, info.Id)
  67. }
  68. return ids, nil
  69. }
  70. func (s *AiStorage) GetAdaptersByType(adapterType string) ([]*types.AdapterInfo, error) {
  71. var list []*types.AdapterInfo
  72. db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
  73. db = db.Where("type = ?", adapterType)
  74. err := db.Order("create_time desc").Find(&list).Error
  75. if err != nil {
  76. return nil, err
  77. }
  78. return list, nil
  79. }
  80. func (s *AiStorage) GetAiTasksByAdapterId(adapterId string) ([]*models.TaskAi, error) {
  81. var resp []*models.TaskAi
  82. db := s.DbEngin.Model(&models.TaskAi{}).Table("task_ai")
  83. db = db.Where("adapter_id = ?", adapterId)
  84. err := db.Order("commit_time desc").Find(&resp).Error
  85. if err != nil {
  86. return nil, err
  87. }
  88. return resp, nil
  89. }
  90. func (s *AiStorage) GetAiTaskListById(id int64) ([]*models.TaskAi, error) {
  91. var aiTaskList []*models.TaskAi
  92. tx := s.DbEngin.Raw("select * from task_ai where `task_id` = ? ", id).Scan(&aiTaskList)
  93. if tx.Error != nil {
  94. return nil, tx.Error
  95. }
  96. return aiTaskList, nil
  97. }
  98. func (s *AiStorage) DoesTaskNameExist(name string, taskType string) (bool, error) {
  99. var total int32
  100. switch taskType {
  101. case "training":
  102. tx := s.DbEngin.Raw("select count(*) from task where `name` = ?", name).Scan(&total)
  103. if tx.Error != nil {
  104. logx.Errorf(tx.Error.Error())
  105. return false, tx.Error
  106. }
  107. case "inference":
  108. tx := s.DbEngin.Raw("select count(*) from ai_deploy_instance_task where `name` = ?", name).Scan(&total)
  109. if tx.Error != nil {
  110. logx.Errorf(tx.Error.Error())
  111. return false, tx.Error
  112. }
  113. }
  114. return total > 0, nil
  115. }
  116. func (s *AiStorage) SaveTask(name string, desc string, userId int64, strategyCode int64, synergyStatus int64, aiType string, yaml string, saveToChain func(task models.Task, id int64) error, userName string) (int64, error) {
  117. startTime := time.Now()
  118. // 构建主任务结构体
  119. taskModel := models.Task{
  120. Id: utils.GenSnowflakeID(),
  121. Status: constants.Saved,
  122. Description: desc,
  123. Name: name,
  124. UserId: userId,
  125. UserName: userName,
  126. SynergyStatus: synergyStatus,
  127. Strategy: strategyCode,
  128. AdapterTypeDict: "1",
  129. TaskTypeDict: aiType,
  130. YamlString: yaml,
  131. StartTime: &startTime,
  132. CommitTime: time.Now(),
  133. }
  134. // 保存任务数据到数据库
  135. tx := s.DbEngin.Create(&taskModel)
  136. if tx.Error != nil {
  137. return 0, tx.Error
  138. }
  139. id := taskModel.Id
  140. // 数据上链
  141. if saveToChain != nil {
  142. err := saveToChain(taskModel, id)
  143. if err != nil {
  144. logx.Error(err)
  145. }
  146. }
  147. return id, nil
  148. }
  149. func (s *AiStorage) UpdateTask(task *types.TaskModel) error {
  150. task.UpdatedTime = time.Now().Format(constants.Layout)
  151. tx := s.DbEngin.Table("task").Model(task).Updates(task)
  152. if tx.Error != nil {
  153. logx.Errorf(tx.Error.Error())
  154. return tx.Error
  155. }
  156. return nil
  157. }
  158. func (s *AiStorage) AllTaskLastMonth() ([]*types.TaskModel, error) {
  159. var list []*types.TaskModel
  160. // 构建数据库查询
  161. db := s.DbEngin.Model(&types.TaskModel{}).Table("task")
  162. now := time.Now()
  163. lastMonth := now.AddDate(0, -1, 0)
  164. db = db.Where("created_time >= ?", lastMonth)
  165. // 查询任务列表
  166. if err := db.Order("created_time desc").Find(&list).Error; err != nil {
  167. return nil, result.NewDefaultError(err.Error())
  168. }
  169. return list, nil
  170. }
  171. func (s *AiStorage) SaveAiTask(taskId int64, opt option.Option, adapterName string, clusterId string, clusterName string, jobId string, status string, msg string) error {
  172. var aiOpt *option.AiOption
  173. switch (opt).(type) {
  174. case *option.AiOption:
  175. aiOpt = (opt).(*option.AiOption)
  176. case *option.InferOption:
  177. inferOpt := (opt).(*option.InferOption)
  178. aiOpt = &option.AiOption{}
  179. aiOpt.TaskName = inferOpt.TaskName
  180. aiOpt.Replica = inferOpt.Replica
  181. aiOpt.AdapterId = inferOpt.AdapterId
  182. aiOpt.TaskType = inferOpt.ModelType
  183. aiOpt.ModelName = inferOpt.ModelName
  184. aiOpt.StrategyName = inferOpt.Strategy
  185. }
  186. // 构建主任务结构体
  187. aId, err := strconv.ParseInt(aiOpt.AdapterId, 10, 64)
  188. if err != nil {
  189. return err
  190. }
  191. cId, err := strconv.ParseInt(clusterId, 10, 64)
  192. if err != nil {
  193. return err
  194. }
  195. resourceSpec, _ := jsoniter.MarshalToString(aiOpt.ResourcesRequired)
  196. aiTaskModel := models.TaskAi{
  197. TaskId: taskId,
  198. AdapterId: aId,
  199. AdapterName: adapterName,
  200. ClusterId: cId,
  201. ClusterName: clusterName,
  202. Name: aiOpt.TaskName,
  203. Replica: int64(aiOpt.Replica),
  204. JobId: jobId,
  205. TaskType: aiOpt.TaskType,
  206. ModelName: aiOpt.ModelName,
  207. Strategy: aiOpt.StrategyName,
  208. Status: status,
  209. Msg: msg,
  210. Output: aiOpt.Output,
  211. Card: aiOpt.ComputeCard,
  212. StartTime: time.Now().Format(time.RFC3339),
  213. CommitTime: time.Now(),
  214. ResourceSpec: resourceSpec,
  215. }
  216. // 保存任务数据到数据库
  217. tx := s.DbEngin.Create(&aiTaskModel)
  218. if tx.Error != nil {
  219. return tx.Error
  220. }
  221. return nil
  222. }
  223. func (s *AiStorage) SaveAiTaskImageSubTask(ta *models.TaskAiSub) error {
  224. tx := s.DbEngin.Table("task_ai_sub").Create(ta)
  225. if tx.Error != nil {
  226. return tx.Error
  227. }
  228. return nil
  229. }
  230. func (s *AiStorage) SaveClusterTaskQueue(adapterId string, clusterId string, queueNum int64) error {
  231. aId, err := strconv.ParseInt(adapterId, 10, 64)
  232. if err != nil {
  233. return err
  234. }
  235. cId, err := strconv.ParseInt(clusterId, 10, 64)
  236. if err != nil {
  237. return err
  238. }
  239. taskQueue := models.TClusterTaskQueue{
  240. AdapterId: aId,
  241. ClusterId: cId,
  242. QueueNum: queueNum,
  243. }
  244. tx := s.DbEngin.Create(&taskQueue)
  245. if tx.Error != nil {
  246. return tx.Error
  247. }
  248. return nil
  249. }
  250. func (s *AiStorage) GetClusterTaskQueues(adapterId string, clusterId string) ([]*models.TClusterTaskQueue, error) {
  251. var taskQueues []*models.TClusterTaskQueue
  252. tx := s.DbEngin.Raw("select * from t_cluster_task_queue where `adapter_id` = ? and `cluster_id` = ?", adapterId, clusterId).Scan(&taskQueues)
  253. if tx.Error != nil {
  254. logx.Errorf(tx.Error.Error())
  255. return nil, tx.Error
  256. }
  257. return taskQueues, nil
  258. }
  259. func (s *AiStorage) GetAiTaskIdByClusterIdAndTaskId(clusterId string, taskId string) (string, error) {
  260. var aiTask models.TaskAi
  261. tx := s.DbEngin.Raw("select * from task_ai where `cluster_id` = ? and `task_id` = ?", clusterId, taskId).Scan(&aiTask)
  262. if tx.Error != nil {
  263. logx.Errorf(tx.Error.Error())
  264. return "", tx.Error
  265. }
  266. return aiTask.JobId, nil
  267. }
  268. func (s *AiStorage) GetClusterResourcesById(clusterId string) (*models.TClusterResource, error) {
  269. var clusterResource models.TClusterResource
  270. tx := s.DbEngin.Raw("select * from t_cluster_resource where `cluster_id` = ?", clusterId).Scan(&clusterResource)
  271. if tx.Error != nil {
  272. logx.Errorf(tx.Error.Error())
  273. return nil, tx.Error
  274. }
  275. return &clusterResource, nil
  276. }
  277. func (s *AiStorage) SaveClusterResources(adapterId string, clusterId string, clusterName string, clusterType int64, cpuAvail float64, cpuTotal float64,
  278. memAvail float64, memTotal float64, diskAvail float64, diskTotal float64, gpuAvail float64, gpuTotal float64, cardTotal int64, topsTotal float64, cardHours float64,
  279. balance float64, taskCompleted int64) error {
  280. cId, err := strconv.ParseInt(clusterId, 10, 64)
  281. if err != nil {
  282. return err
  283. }
  284. aId, err := strconv.ParseInt(adapterId, 10, 64)
  285. if err != nil {
  286. return err
  287. }
  288. clusterResource := models.TClusterResource{
  289. AdapterId: aId,
  290. ClusterId: cId,
  291. ClusterName: clusterName,
  292. ClusterType: clusterType,
  293. CpuAvail: cpuAvail,
  294. CpuTotal: cpuTotal,
  295. MemAvail: memAvail,
  296. MemTotal: memTotal,
  297. DiskAvail: diskAvail,
  298. DiskTotal: diskTotal,
  299. GpuAvail: gpuAvail,
  300. GpuTotal: gpuTotal,
  301. CardTotal: cardTotal,
  302. CardTopsTotal: topsTotal,
  303. CardHours: cardHours,
  304. Balance: balance,
  305. TaskCompleted: taskCompleted,
  306. }
  307. tx := s.DbEngin.Create(&clusterResource)
  308. if tx.Error != nil {
  309. return tx.Error
  310. }
  311. // prometheus
  312. param := tracker.ClusterLoadRecord{
  313. AdapterId: aId,
  314. ClusterName: clusterName,
  315. CpuAvail: cpuAvail,
  316. CpuTotal: cpuTotal,
  317. CpuUtilisation: clusterResource.CpuAvail / clusterResource.CpuTotal,
  318. MemoryAvail: memAvail,
  319. MemoryTotal: memTotal,
  320. MemoryUtilisation: clusterResource.MemAvail / clusterResource.MemTotal,
  321. DiskAvail: diskAvail,
  322. DiskTotal: diskTotal,
  323. DiskUtilisation: clusterResource.DiskAvail / clusterResource.DiskTotal,
  324. }
  325. tracker.SyncClusterLoad(param)
  326. return nil
  327. }
  328. func (s *AiStorage) UpdateClusterResources(clusterResource *models.TClusterResource) error {
  329. tx := s.DbEngin.Where("cluster_id = ?", clusterResource.ClusterId).Updates(clusterResource)
  330. if tx.Error != nil {
  331. return tx.Error
  332. }
  333. // prometheus
  334. param := tracker.ClusterLoadRecord{
  335. AdapterId: clusterResource.AdapterId,
  336. ClusterName: clusterResource.ClusterName,
  337. CpuAvail: clusterResource.CpuAvail,
  338. CpuTotal: clusterResource.CpuTotal,
  339. CpuUtilisation: clusterResource.CpuAvail / clusterResource.CpuTotal,
  340. MemoryAvail: clusterResource.MemAvail,
  341. MemoryTotal: clusterResource.MemTotal,
  342. MemoryUtilisation: clusterResource.MemAvail / clusterResource.MemTotal,
  343. DiskAvail: clusterResource.DiskAvail,
  344. DiskTotal: clusterResource.DiskTotal,
  345. DiskUtilisation: clusterResource.DiskAvail / clusterResource.DiskTotal,
  346. }
  347. tracker.SyncClusterLoad(param)
  348. return nil
  349. }
  350. func (s *AiStorage) UpdateAiTask(task *models.TaskAi) error {
  351. tx := s.DbEngin.Updates(task)
  352. if tx.Error != nil {
  353. return tx.Error
  354. }
  355. return nil
  356. }
  357. func (s *AiStorage) UpdateTaskByModel(task *models.Task) error {
  358. tx := s.DbEngin.Updates(task)
  359. if tx.Error != nil {
  360. return tx.Error
  361. }
  362. return nil
  363. }
  364. func (s *AiStorage) GetStrategyCode(name string) (int64, error) {
  365. var strategy int64
  366. sqlStr := `select t_dict_item.item_value
  367. from t_dict
  368. left join t_dict_item on t_dict.id = t_dict_item.dict_id
  369. where item_text = ?
  370. and t_dict.dict_code = 'schedule_Strategy'`
  371. //查询调度策略
  372. err := s.DbEngin.Raw(sqlStr, name).Scan(&strategy).Error
  373. if err != nil {
  374. return strategy, nil
  375. }
  376. return strategy, nil
  377. }
  378. func (s *AiStorage) AddNoticeInfo(adapterId string, adapterName string, clusterId string, clusterName string, taskName string, noticeType string, incident string) {
  379. aId, err := strconv.ParseInt(adapterId, 10, 64)
  380. if err != nil {
  381. logx.Errorf("adapterId convert failure, err: %v", err)
  382. }
  383. var cId int64
  384. if clusterId != "" {
  385. cId, err = strconv.ParseInt(clusterId, 10, 64)
  386. if err != nil {
  387. logx.Errorf("clusterId convert failure, err: %v", err)
  388. }
  389. }
  390. noticeInfo := clientCore.NoticeInfo{
  391. AdapterId: aId,
  392. AdapterName: adapterName,
  393. ClusterId: cId,
  394. ClusterName: clusterName,
  395. NoticeType: noticeType,
  396. TaskName: taskName,
  397. Incident: incident,
  398. CreatedTime: time.Now(),
  399. }
  400. result := s.DbEngin.Table("t_notice").Create(&noticeInfo)
  401. if result.Error != nil {
  402. logx.Errorf("Task creation failure, err: %v", result.Error)
  403. }
  404. }
  405. func (s *AiStorage) SaveInferDeployInstance(taskId int64, instanceId string, instanceName string, adapterId int64,
  406. adapterName string, clusterId int64, clusterName string, modelName string, modelType string, inferCard string, clusterType string) (int64, error) {
  407. startTime := time.Now().Format(time.RFC3339)
  408. // 构建主任务结构体
  409. insModel := models.AiInferDeployInstance{
  410. DeployInstanceTaskId: taskId,
  411. InstanceId: instanceId,
  412. InstanceName: instanceName,
  413. AdapterId: adapterId,
  414. AdapterName: adapterName,
  415. ClusterId: clusterId,
  416. ClusterName: clusterName,
  417. ModelName: modelName,
  418. ModelType: modelType,
  419. InferCard: inferCard,
  420. ClusterType: clusterType,
  421. Status: constants.Deploying,
  422. CreateTime: startTime,
  423. UpdateTime: startTime,
  424. }
  425. // 保存任务数据到数据库
  426. tx := s.DbEngin.Table("ai_infer_deploy_instance").Create(&insModel)
  427. if tx.Error != nil {
  428. return 0, tx.Error
  429. }
  430. return insModel.Id, nil
  431. }
  432. func (s *AiStorage) UpdateInferDeployInstance(instance *models.AiInferDeployInstance, needUpdateTime bool) error {
  433. if needUpdateTime {
  434. instance.UpdateTime = time.Now().Format(time.RFC3339)
  435. }
  436. tx := s.DbEngin.Table("ai_infer_deploy_instance").Updates(instance)
  437. if tx.Error != nil {
  438. logx.Errorf(tx.Error.Error())
  439. return tx.Error
  440. }
  441. return nil
  442. }
  443. func (s *AiStorage) GetTaskById(id int64) (*models.Task, error) {
  444. var task *models.Task
  445. tx := s.DbEngin.Raw("select * from task where `id` = ?", id).Scan(&task)
  446. if tx.Error != nil {
  447. logx.Errorf(tx.Error.Error())
  448. return nil, tx.Error
  449. }
  450. return task, nil
  451. }
  452. func (s *AiStorage) GetInferDeployInstanceById(id int64) (*models.AiInferDeployInstance, error) {
  453. var deployIns *models.AiInferDeployInstance
  454. tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `id` = ?", id).Scan(&deployIns)
  455. if tx.Error != nil {
  456. logx.Errorf(tx.Error.Error())
  457. return nil, tx.Error
  458. }
  459. return deployIns, nil
  460. }
  461. func (s *AiStorage) GetDeployTaskById(id int64) (*models.AiDeployInstanceTask, error) {
  462. var task *models.AiDeployInstanceTask
  463. tx := s.DbEngin.Raw("select * from ai_deploy_instance_task where `id` = ?", id).Scan(&task)
  464. if tx.Error != nil {
  465. logx.Errorf(tx.Error.Error())
  466. return nil, tx.Error
  467. }
  468. return task, nil
  469. }
  470. func (s *AiStorage) GetDeployTaskListByType(modelType string) ([]*models.AiDeployInstanceTask, error) {
  471. var tasks []*models.AiDeployInstanceTask
  472. tx := s.DbEngin.Raw("select * from ai_deploy_instance_task where `model_type` = ?", modelType).Scan(&tasks)
  473. if tx.Error != nil {
  474. logx.Errorf(tx.Error.Error())
  475. return nil, tx.Error
  476. }
  477. return tasks, nil
  478. }
  479. func (s *AiStorage) GetAllDeployTasks() ([]*models.AiDeployInstanceTask, error) {
  480. var tasks []*models.AiDeployInstanceTask
  481. tx := s.DbEngin.Raw("select * from ai_deploy_instance_task").Scan(&tasks)
  482. if tx.Error != nil {
  483. logx.Errorf(tx.Error.Error())
  484. return nil, tx.Error
  485. }
  486. return tasks, nil
  487. }
  488. func (s *AiStorage) UpdateDeployTask(task *models.AiDeployInstanceTask, needUpdateTime bool) error {
  489. if needUpdateTime {
  490. task.UpdateTime = time.Now().Format(time.RFC3339)
  491. }
  492. tx := s.DbEngin.Table("ai_deploy_instance_task").Updates(task)
  493. if tx.Error != nil {
  494. logx.Errorf(tx.Error.Error())
  495. return tx.Error
  496. }
  497. return nil
  498. }
  499. func (s *AiStorage) DeleteDeployTaskById(id int64) error {
  500. tx := s.DbEngin.Delete(&models.AiDeployInstanceTask{}, id)
  501. if tx.Error != nil {
  502. logx.Errorf(tx.Error.Error())
  503. return tx.Error
  504. }
  505. return nil
  506. }
  507. func (s *AiStorage) UpdateDeployTaskById(id int64) error {
  508. task, err := s.GetDeployTaskById(id)
  509. if err != nil {
  510. return err
  511. }
  512. err = s.UpdateDeployTask(task, true)
  513. if err != nil {
  514. return err
  515. }
  516. return nil
  517. }
  518. func (s *AiStorage) GetInstanceListByDeployTaskId(id int64) ([]*models.AiInferDeployInstance, error) {
  519. var list []*models.AiInferDeployInstance
  520. tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `deploy_instance_task_id` = ?", id).Scan(&list)
  521. if tx.Error != nil {
  522. logx.Errorf(tx.Error.Error())
  523. return nil, tx.Error
  524. }
  525. return list, nil
  526. }
  527. func (s *AiStorage) GetInferDeployInstanceListLastMonth() ([]*models.AiInferDeployInstance, error) {
  528. var list []*models.AiInferDeployInstance
  529. now := time.Now()
  530. lastMonth := now.AddDate(0, -1, 0)
  531. tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `create_time` >= ?", lastMonth).Scan(&list)
  532. if tx.Error != nil {
  533. logx.Errorf(tx.Error.Error())
  534. return nil, tx.Error
  535. }
  536. return list, nil
  537. }
  538. func (s *AiStorage) GetDeployTaskList() ([]*models.AiDeployInstanceTask, error) {
  539. var list []*models.AiDeployInstanceTask
  540. tx := s.DbEngin.Raw("select * from ai_deploy_instance_task").Scan(&list)
  541. if tx.Error != nil {
  542. logx.Errorf(tx.Error.Error())
  543. return nil, tx.Error
  544. }
  545. return list, nil
  546. }
  547. func (s *AiStorage) GetInferDeployInstanceTotalNum() (int32, error) {
  548. var total int32
  549. tx := s.DbEngin.Raw("select count(*) from ai_infer_deploy_instance").Scan(&total)
  550. if tx.Error != nil {
  551. logx.Errorf(tx.Error.Error())
  552. return 0, tx.Error
  553. }
  554. return total, nil
  555. }
  556. func (s *AiStorage) GetInferDeployInstanceRunningNum() (int32, error) {
  557. var total int32
  558. tx := s.DbEngin.Raw("select count(*) from ai_infer_deploy_instance where `status` = 'Running'").Scan(&total)
  559. if tx.Error != nil {
  560. logx.Errorf(tx.Error.Error())
  561. return 0, tx.Error
  562. }
  563. return total, nil
  564. }
  565. func (s *AiStorage) GetInferenceTaskTotalNum() (int32, error) {
  566. var total int32
  567. tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 11 or `task_type_dict` = 12").Scan(&total)
  568. if tx.Error != nil {
  569. logx.Errorf(tx.Error.Error())
  570. return 0, tx.Error
  571. }
  572. return total, nil
  573. }
  574. func (s *AiStorage) GetInferenceTaskRunningNum() (int32, error) {
  575. var total int32
  576. tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 11 and `status` = 'Running'").Scan(&total)
  577. if tx.Error != nil {
  578. logx.Errorf(tx.Error.Error())
  579. return 0, tx.Error
  580. }
  581. return total, nil
  582. }
  583. func (s *AiStorage) GetTrainingTaskTotalNum() (int32, error) {
  584. var total int32
  585. tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 10").Scan(&total)
  586. if tx.Error != nil {
  587. logx.Errorf(tx.Error.Error())
  588. return 0, tx.Error
  589. }
  590. return total, nil
  591. }
  592. func (s *AiStorage) GetTrainingTaskRunningNum() (int32, error) {
  593. var total int32
  594. tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 10 and `status` = 'Running'").Scan(&total)
  595. if tx.Error != nil {
  596. logx.Errorf(tx.Error.Error())
  597. return 0, tx.Error
  598. }
  599. return total, nil
  600. }
  601. func (s *AiStorage) SaveInferDeployTask(taskName string, userId int64, modelName string, modelType string, desc string) (int64, error) {
  602. startTime := time.Now().Format(time.RFC3339)
  603. // 构建主任务结构体
  604. taskModel := models.AiDeployInstanceTask{
  605. Id: utils.GenSnowflakeID(),
  606. Name: taskName,
  607. UserId: userId,
  608. ModelName: modelName,
  609. ModelType: modelType,
  610. Desc: desc,
  611. CreateTime: startTime,
  612. UpdateTime: startTime,
  613. }
  614. // 保存任务数据到数据库
  615. tx := s.DbEngin.Table("ai_deploy_instance_task").Create(&taskModel)
  616. if tx.Error != nil {
  617. return 0, tx.Error
  618. }
  619. return taskModel.Id, nil
  620. }
  621. func (s *AiStorage) GetRunningDeployInstanceById(id int64, adapterId string) ([]*models.AiInferDeployInstance, error) {
  622. var list []*models.AiInferDeployInstance
  623. tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `deploy_instance_task_id` = ? and `adapter_id` = ? and `status` = 'Running'", id, adapterId).Scan(&list)
  624. if tx.Error != nil {
  625. logx.Errorf(tx.Error.Error())
  626. return nil, tx.Error
  627. }
  628. return list, nil
  629. }
  630. func (s *AiStorage) IsDeployTaskNameDuplicated(name string) (bool, error) {
  631. var total int32
  632. tx := s.DbEngin.Raw("select count(*) from ai_deploy_instance_task where `name` = ?", name).Scan(&total)
  633. if tx.Error != nil {
  634. return false, tx.Error
  635. }
  636. if total == 0 {
  637. return false, nil
  638. }
  639. return true, nil
  640. }
  641. func (s *AiStorage) GetClustersById(id string) (*types.ClusterInfo, error) {
  642. var resp types.ClusterInfo
  643. tx := s.DbEngin.Raw("select * from t_cluster where `id` = ? ", id).Scan(&resp)
  644. if tx.Error != nil {
  645. logx.Errorf(tx.Error.Error())
  646. return nil, tx.Error
  647. }
  648. return &resp, nil
  649. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.