You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

aiStorage.go 23 kB

3 months ago
4 months ago
3 months ago
6 months ago
3 months ago
11 months ago
4 months ago
4 months ago
4 months ago
4 months ago
3 months ago
10 months ago
11 months ago
4 months ago
4 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766
  1. package database
  2. import (
  3. "fmt"
  4. "strconv"
  5. "time"
  6. jsoniter "github.com/json-iterator/go"
  7. "github.com/zeromicro/go-zero/core/logx"
  8. clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client"
  9. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option"
  10. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
  11. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
  12. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  13. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
  14. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
  15. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
  16. "gorm.io/gorm"
  17. )
  18. type AiStorage struct {
  19. DbEngin *gorm.DB
  20. }
  21. func (s *AiStorage) GetParticipants() (*types.ClusterListResp, error) {
  22. var resp types.ClusterListResp
  23. tx := s.DbEngin.Raw("select * from t_cluster where `deleted_at` IS NULL ORDER BY create_time Desc").Scan(&resp.List)
  24. if tx.Error != nil {
  25. logx.Errorf(tx.Error.Error())
  26. return nil, tx.Error
  27. }
  28. return &resp, nil
  29. }
  30. func (s *AiStorage) GetClustersByAdapterId(id string) (*types.ClusterListResp, error) {
  31. var resp types.ClusterListResp
  32. tx := s.DbEngin.Raw("select * from t_cluster where `deleted_at` IS NULL and `adapter_id` = ? ORDER BY create_time Desc", id).Scan(&resp.List)
  33. if tx.Error != nil {
  34. logx.Errorf(tx.Error.Error())
  35. return nil, tx.Error
  36. }
  37. return &resp, nil
  38. }
  39. func (s *AiStorage) GetClusterNameById(id string) (string, error) {
  40. var name string
  41. tx := s.DbEngin.Raw("select `description` from t_cluster where `id` = ?", id).Scan(&name)
  42. if tx.Error != nil {
  43. logx.Errorf(tx.Error.Error())
  44. return "", tx.Error
  45. }
  46. return name, nil
  47. }
  48. func (s *AiStorage) GetAdapterNameById(id string) (string, error) {
  49. var name string
  50. tx := s.DbEngin.Raw("select `name` from t_adapter where `id` = ?", id).Scan(&name)
  51. if tx.Error != nil {
  52. logx.Errorf(tx.Error.Error())
  53. return "", tx.Error
  54. }
  55. return name, nil
  56. }
  57. func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) {
  58. var list []types.AdapterInfo
  59. var ids []string
  60. db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
  61. db = db.Where("type = ?", adapterType)
  62. err := db.Order("create_time desc").Find(&list).Error
  63. if err != nil {
  64. return nil, err
  65. }
  66. for _, info := range list {
  67. ids = append(ids, info.Id)
  68. }
  69. return ids, nil
  70. }
  71. func (s *AiStorage) GetAdaptersByType(adapterType string) ([]*types.AdapterInfo, error) {
  72. var list []*types.AdapterInfo
  73. db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
  74. db = db.Where("type = ?", adapterType)
  75. err := db.Order("create_time desc").Find(&list).Error
  76. if err != nil {
  77. return nil, err
  78. }
  79. return list, nil
  80. }
  81. func (s *AiStorage) GetAiTasksByAdapterId(adapterId string) ([]*models.TaskAi, error) {
  82. var resp []*models.TaskAi
  83. db := s.DbEngin.Model(&models.TaskAi{}).Table("task_ai")
  84. db = db.Where("adapter_id = ?", adapterId)
  85. err := db.Order("commit_time desc").Find(&resp).Error
  86. if err != nil {
  87. return nil, err
  88. }
  89. return resp, nil
  90. }
  91. func (s *AiStorage) GetAiTaskListById(id int64) ([]*models.TaskAi, error) {
  92. var aiTaskList []*models.TaskAi
  93. tx := s.DbEngin.Raw("select * from task_ai where `task_id` = ? ", id).Scan(&aiTaskList)
  94. if tx.Error != nil {
  95. return nil, tx.Error
  96. }
  97. return aiTaskList, nil
  98. }
  99. func (s *AiStorage) DoesTaskNameExist(name string, taskType string) (bool, error) {
  100. var total int32
  101. switch taskType {
  102. case "training":
  103. tx := s.DbEngin.Raw("select count(*) from task where `name` = ?", name).Scan(&total)
  104. if tx.Error != nil {
  105. logx.Errorf(tx.Error.Error())
  106. return false, tx.Error
  107. }
  108. case "inference":
  109. tx := s.DbEngin.Raw("select count(*) from ai_deploy_instance_task where `name` = ?", name).Scan(&total)
  110. if tx.Error != nil {
  111. logx.Errorf(tx.Error.Error())
  112. return false, tx.Error
  113. }
  114. }
  115. return total > 0, nil
  116. }
  117. func (s *AiStorage) SaveTask(name string, desc string, userId int64, strategyCode int64, synergyStatus int64, aiType string, yaml string, saveToChain func(task models.Task, id int64) error, userName string) (int64, error) {
  118. startTime := time.Now()
  119. // 构建主任务结构体
  120. taskModel := models.Task{
  121. Id: utils.GenSnowflakeID(),
  122. Status: constants.Saved,
  123. Description: desc,
  124. Name: name,
  125. UserId: userId,
  126. UserName: userName,
  127. SynergyStatus: synergyStatus,
  128. Strategy: strategyCode,
  129. AdapterTypeDict: "1",
  130. TaskTypeDict: aiType,
  131. YamlString: yaml,
  132. StartTime: &startTime,
  133. CommitTime: time.Now(),
  134. }
  135. // 保存任务数据到数据库
  136. tx := s.DbEngin.Create(&taskModel)
  137. if tx.Error != nil {
  138. return 0, tx.Error
  139. }
  140. id := taskModel.Id
  141. // 数据上链
  142. if saveToChain != nil {
  143. err := saveToChain(taskModel, id)
  144. if err != nil {
  145. logx.Error(err)
  146. }
  147. }
  148. return id, nil
  149. }
  150. func (s *AiStorage) UpdateTask(task *types.TaskModel) error {
  151. task.UpdatedTime = time.Now().Format(constants.Layout)
  152. tx := s.DbEngin.Table("task").Model(task).Updates(task)
  153. if tx.Error != nil {
  154. logx.Errorf(tx.Error.Error())
  155. return tx.Error
  156. }
  157. return nil
  158. }
  159. func (s *AiStorage) AllTaskLastMonth() ([]*types.TaskModel, error) {
  160. var list []*types.TaskModel
  161. // 构建数据库查询
  162. db := s.DbEngin.Model(&types.TaskModel{}).Table("task")
  163. now := time.Now()
  164. lastMonth := now.AddDate(0, -1, 0)
  165. db = db.Where("created_time >= ?", lastMonth)
  166. // 查询任务列表
  167. if err := db.Order("created_time desc").Find(&list).Error; err != nil {
  168. return nil, result.NewDefaultError(err.Error())
  169. }
  170. return list, nil
  171. }
  172. type Resource struct {
  173. Name string `json:"name"`
  174. Number string `json:"number"`
  175. Type string `json:"type"`
  176. }
  177. func (s *AiStorage) SaveAiTask(taskId int64, opt option.Option, adapterName string, clusterId string, clusterName string, jobId string, status string, msg string) error {
  178. var aiOpt *option.AiOption
  179. switch (opt).(type) {
  180. case *option.AiOption:
  181. aiOpt = (opt).(*option.AiOption)
  182. case *option.InferOption:
  183. inferOpt := (opt).(*option.InferOption)
  184. aiOpt = &option.AiOption{}
  185. aiOpt.TaskName = inferOpt.TaskName
  186. aiOpt.Replica = inferOpt.Replica
  187. aiOpt.AdapterId = inferOpt.AdapterId
  188. aiOpt.TaskType = inferOpt.ModelType
  189. aiOpt.ModelName = inferOpt.ModelName
  190. aiOpt.StrategyName = inferOpt.Strategy
  191. }
  192. // 构建主任务结构体
  193. aId, err := strconv.ParseInt(aiOpt.AdapterId, 10, 64)
  194. if err != nil {
  195. return err
  196. }
  197. cId, err := strconv.ParseInt(clusterId, 10, 64)
  198. if err != nil {
  199. return err
  200. }
  201. aiResourceSpec, err := s.getResourceSpec(aiOpt, clusterName)
  202. if err != nil {
  203. return err
  204. }
  205. aiTaskModel := models.TaskAi{
  206. TaskId: taskId,
  207. AdapterId: aId,
  208. AdapterName: adapterName,
  209. ClusterId: cId,
  210. ClusterName: clusterName,
  211. Name: aiOpt.TaskName,
  212. Replica: int64(aiOpt.Replica),
  213. JobId: jobId,
  214. TaskType: aiOpt.TaskType,
  215. ModelName: aiOpt.ModelName,
  216. Strategy: aiOpt.StrategyName,
  217. Status: status,
  218. Msg: msg,
  219. Output: aiOpt.Output,
  220. Card: aiOpt.ComputeCard,
  221. StartTime: time.Now().Format(time.RFC3339),
  222. CommitTime: time.Now(),
  223. ResourceSpec: *aiResourceSpec,
  224. }
  225. // 保存任务数据到数据库
  226. tx := s.DbEngin.Table("task_ai").Create(&aiTaskModel)
  227. if tx.Error != nil {
  228. return tx.Error
  229. }
  230. return nil
  231. }
  232. func (s *AiStorage) getResourceSpec(aiOpt *option.AiOption, clusterName string) (*models.AIResourceSpec, error) {
  233. var aiResourceSpec models.AIResourceSpec
  234. // 序列化和反序列化资源需求
  235. jsonData, err := jsoniter.Marshal(aiOpt.ResourcesRequired)
  236. if err != nil {
  237. return nil, fmt.Errorf("failed to marshal ResourcesRequired: %w", err)
  238. }
  239. if err := jsoniter.Unmarshal(jsonData, &aiResourceSpec.Specifications); err != nil {
  240. return nil, fmt.Errorf("failed to unmarshal to Specifications: %w", err)
  241. }
  242. // 从资源数据中提取计算卡信息
  243. var resources []Resource
  244. if err := jsoniter.Unmarshal(jsonData, &resources); err != nil {
  245. return nil, fmt.Errorf("failed to unmarshal resources: %w", err)
  246. }
  247. // 查找计算卡类型和名称
  248. computeCardType := ""
  249. computeCardName := ""
  250. for _, res := range resources {
  251. switch res.Type {
  252. case "GPU", "DCU", "GCU", "ILUVATAR-GPGPU", "MLU", "NPU":
  253. computeCardType = res.Type
  254. computeCardName = res.Name
  255. break // 只取第一个匹配的计算卡
  256. }
  257. }
  258. // 设置资源名称
  259. if computeCardType != "" && computeCardName != "" {
  260. aiResourceSpec.ResourceName = fmt.Sprintf("%s_%s_%s", clusterName, computeCardType, computeCardName)
  261. } else if aiOpt.ComputeCard != "" {
  262. aiResourceSpec.ResourceName = fmt.Sprintf("%s_%s", clusterName, aiOpt.ComputeCard)
  263. } else {
  264. aiResourceSpec.ResourceName = fmt.Sprintf("%s_UNKNOWN_None", clusterName)
  265. }
  266. aiResourceSpec.ResourceType = constants.TaskTypeAiTrain
  267. return &aiResourceSpec, nil
  268. }
  269. func (s *AiStorage) SaveAiTaskImageSubTask(ta *models.TaskAiSub) error {
  270. tx := s.DbEngin.Table("task_ai_sub").Create(ta)
  271. if tx.Error != nil {
  272. return tx.Error
  273. }
  274. return nil
  275. }
  276. func (s *AiStorage) SaveClusterTaskQueue(adapterId string, clusterId string, queueNum int64) error {
  277. aId, err := strconv.ParseInt(adapterId, 10, 64)
  278. if err != nil {
  279. return err
  280. }
  281. cId, err := strconv.ParseInt(clusterId, 10, 64)
  282. if err != nil {
  283. return err
  284. }
  285. taskQueue := models.TClusterTaskQueue{
  286. AdapterId: aId,
  287. ClusterId: cId,
  288. QueueNum: queueNum,
  289. }
  290. tx := s.DbEngin.Create(&taskQueue)
  291. if tx.Error != nil {
  292. return tx.Error
  293. }
  294. return nil
  295. }
  296. func (s *AiStorage) GetClusterTaskQueues(adapterId string, clusterId string) ([]*models.TClusterTaskQueue, error) {
  297. var taskQueues []*models.TClusterTaskQueue
  298. tx := s.DbEngin.Raw("select * from t_cluster_task_queue where `adapter_id` = ? and `cluster_id` = ?", adapterId, clusterId).Scan(&taskQueues)
  299. if tx.Error != nil {
  300. logx.Errorf(tx.Error.Error())
  301. return nil, tx.Error
  302. }
  303. return taskQueues, nil
  304. }
  305. func (s *AiStorage) GetAiTaskIdByClusterIdAndTaskId(clusterId string, taskId string) (string, error) {
  306. var aiTask models.TaskAi
  307. tx := s.DbEngin.Raw("select * from task_ai where `cluster_id` = ? and `task_id` = ?", clusterId, taskId).Scan(&aiTask)
  308. if tx.Error != nil {
  309. logx.Errorf(tx.Error.Error())
  310. return "", tx.Error
  311. }
  312. return aiTask.JobId, nil
  313. }
  314. func (s *AiStorage) GetClusterResourcesById(clusterId string) (*models.TClusterResource, error) {
  315. var clusterResource models.TClusterResource
  316. tx := s.DbEngin.Raw("select * from t_cluster_resource where `cluster_id` = ?", clusterId).Scan(&clusterResource)
  317. if tx.Error != nil {
  318. logx.Errorf(tx.Error.Error())
  319. return nil, tx.Error
  320. }
  321. return &clusterResource, nil
  322. }
  323. func (s *AiStorage) SaveClusterResources(adapterId string, clusterId string, clusterName string, clusterType int64, cpuAvail float64, cpuTotal float64,
  324. memAvail float64, memTotal float64, diskAvail float64, diskTotal float64, gpuAvail float64, gpuTotal float64, cardTotal int64, topsTotal float64, cardHours float64,
  325. balance float64, taskCompleted int64) error {
  326. cId, err := strconv.ParseInt(clusterId, 10, 64)
  327. if err != nil {
  328. return err
  329. }
  330. aId, err := strconv.ParseInt(adapterId, 10, 64)
  331. if err != nil {
  332. return err
  333. }
  334. clusterResource := models.TClusterResource{
  335. AdapterId: aId,
  336. ClusterId: cId,
  337. ClusterName: clusterName,
  338. ClusterType: clusterType,
  339. CpuAvail: cpuAvail,
  340. CpuTotal: cpuTotal,
  341. MemAvail: memAvail,
  342. MemTotal: memTotal,
  343. DiskAvail: diskAvail,
  344. DiskTotal: diskTotal,
  345. GpuAvail: gpuAvail,
  346. GpuTotal: gpuTotal,
  347. CardTotal: cardTotal,
  348. CardTopsTotal: topsTotal,
  349. CardHours: cardHours,
  350. Balance: balance,
  351. TaskCompleted: taskCompleted,
  352. }
  353. tx := s.DbEngin.Create(&clusterResource)
  354. if tx.Error != nil {
  355. return tx.Error
  356. }
  357. // prometheus
  358. param := tracker.ClusterLoadRecord{
  359. AdapterId: aId,
  360. ClusterName: clusterName,
  361. CpuAvail: cpuAvail,
  362. CpuTotal: cpuTotal,
  363. CpuUtilisation: clusterResource.CpuAvail / clusterResource.CpuTotal,
  364. MemoryAvail: memAvail,
  365. MemoryTotal: memTotal,
  366. MemoryUtilisation: clusterResource.MemAvail / clusterResource.MemTotal,
  367. DiskAvail: diskAvail,
  368. DiskTotal: diskTotal,
  369. DiskUtilisation: clusterResource.DiskAvail / clusterResource.DiskTotal,
  370. }
  371. tracker.SyncClusterLoad(param)
  372. return nil
  373. }
  374. func (s *AiStorage) UpdateClusterResources(clusterResource *models.TClusterResource) error {
  375. tx := s.DbEngin.Where("cluster_id = ?", clusterResource.ClusterId).Updates(clusterResource)
  376. if tx.Error != nil {
  377. return tx.Error
  378. }
  379. // prometheus
  380. param := tracker.ClusterLoadRecord{
  381. AdapterId: clusterResource.AdapterId,
  382. ClusterName: clusterResource.ClusterName,
  383. CpuAvail: clusterResource.CpuAvail,
  384. CpuTotal: clusterResource.CpuTotal,
  385. CpuUtilisation: clusterResource.CpuAvail / clusterResource.CpuTotal,
  386. MemoryAvail: clusterResource.MemAvail,
  387. MemoryTotal: clusterResource.MemTotal,
  388. MemoryUtilisation: clusterResource.MemAvail / clusterResource.MemTotal,
  389. DiskAvail: clusterResource.DiskAvail,
  390. DiskTotal: clusterResource.DiskTotal,
  391. DiskUtilisation: clusterResource.DiskAvail / clusterResource.DiskTotal,
  392. }
  393. tracker.SyncClusterLoad(param)
  394. return nil
  395. }
  396. func (s *AiStorage) UpdateAiTask(task *models.TaskAi) error {
  397. tx := s.DbEngin.Updates(task)
  398. if tx.Error != nil {
  399. return tx.Error
  400. }
  401. return nil
  402. }
  403. func (s *AiStorage) UpdateTaskByModel(task *models.Task) error {
  404. tx := s.DbEngin.Updates(task)
  405. if tx.Error != nil {
  406. return tx.Error
  407. }
  408. return nil
  409. }
  410. func (s *AiStorage) GetStrategyCode(name string) (int64, error) {
  411. var strategy int64
  412. sqlStr := `select t_dict_item.item_value
  413. from t_dict
  414. left join t_dict_item on t_dict.id = t_dict_item.dict_id
  415. where item_text = ?
  416. and t_dict.dict_code = 'schedule_Strategy'`
  417. //查询调度策略
  418. err := s.DbEngin.Raw(sqlStr, name).Scan(&strategy).Error
  419. if err != nil {
  420. return strategy, nil
  421. }
  422. return strategy, nil
  423. }
  424. func (s *AiStorage) AddNoticeInfo(adapterId string, adapterName string, clusterId string, clusterName string, taskName string, noticeType string, incident string) {
  425. aId, err := strconv.ParseInt(adapterId, 10, 64)
  426. if err != nil {
  427. logx.Errorf("adapterId convert failure, err: %v", err)
  428. }
  429. var cId int64
  430. if clusterId != "" {
  431. cId, err = strconv.ParseInt(clusterId, 10, 64)
  432. if err != nil {
  433. logx.Errorf("clusterId convert failure, err: %v", err)
  434. }
  435. }
  436. noticeInfo := clientCore.NoticeInfo{
  437. AdapterId: aId,
  438. AdapterName: adapterName,
  439. ClusterId: cId,
  440. ClusterName: clusterName,
  441. NoticeType: noticeType,
  442. TaskName: taskName,
  443. Incident: incident,
  444. CreatedTime: time.Now(),
  445. }
  446. result := s.DbEngin.Table("t_notice").Create(&noticeInfo)
  447. if result.Error != nil {
  448. logx.Errorf("Task creation failure, err: %v", result.Error)
  449. }
  450. }
  451. func (s *AiStorage) SaveInferDeployInstance(taskId int64, instanceId string, instanceName string, adapterId int64,
  452. adapterName string, clusterId int64, clusterName string, modelName string, modelType string, inferCard string, clusterType string) (int64, error) {
  453. startTime := time.Now().Format(time.RFC3339)
  454. // 构建主任务结构体
  455. insModel := models.AiInferDeployInstance{
  456. DeployInstanceTaskId: taskId,
  457. InstanceId: instanceId,
  458. InstanceName: instanceName,
  459. AdapterId: adapterId,
  460. AdapterName: adapterName,
  461. ClusterId: clusterId,
  462. ClusterName: clusterName,
  463. ModelName: modelName,
  464. ModelType: modelType,
  465. InferCard: inferCard,
  466. ClusterType: clusterType,
  467. Status: constants.Deploying,
  468. CreateTime: startTime,
  469. UpdateTime: startTime,
  470. }
  471. // 保存任务数据到数据库
  472. tx := s.DbEngin.Table("ai_infer_deploy_instance").Create(&insModel)
  473. if tx.Error != nil {
  474. return 0, tx.Error
  475. }
  476. return insModel.Id, nil
  477. }
  478. func (s *AiStorage) UpdateInferDeployInstance(instance *models.AiInferDeployInstance, needUpdateTime bool) error {
  479. if needUpdateTime {
  480. instance.UpdateTime = time.Now().Format(time.RFC3339)
  481. }
  482. tx := s.DbEngin.Table("ai_infer_deploy_instance").Updates(instance)
  483. if tx.Error != nil {
  484. logx.Errorf(tx.Error.Error())
  485. return tx.Error
  486. }
  487. return nil
  488. }
  489. func (s *AiStorage) GetTaskById(id int64) (*models.Task, error) {
  490. var task *models.Task
  491. tx := s.DbEngin.Raw("select * from task where `id` = ?", id).Scan(&task)
  492. if tx.Error != nil {
  493. logx.Errorf(tx.Error.Error())
  494. return nil, tx.Error
  495. }
  496. return task, nil
  497. }
  498. func (s *AiStorage) GetInferDeployInstanceById(id int64) (*models.AiInferDeployInstance, error) {
  499. var deployIns *models.AiInferDeployInstance
  500. tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `id` = ?", id).Scan(&deployIns)
  501. if tx.Error != nil {
  502. logx.Errorf(tx.Error.Error())
  503. return nil, tx.Error
  504. }
  505. return deployIns, nil
  506. }
  507. func (s *AiStorage) GetDeployTaskById(id int64) (*models.AiDeployInstanceTask, error) {
  508. var task *models.AiDeployInstanceTask
  509. tx := s.DbEngin.Raw("select * from ai_deploy_instance_task where `id` = ?", id).Scan(&task)
  510. if tx.Error != nil {
  511. logx.Errorf(tx.Error.Error())
  512. return nil, tx.Error
  513. }
  514. return task, nil
  515. }
  516. func (s *AiStorage) GetDeployTaskListByType(modelType string) ([]*models.AiDeployInstanceTask, error) {
  517. var tasks []*models.AiDeployInstanceTask
  518. tx := s.DbEngin.Raw("select * from ai_deploy_instance_task where `model_type` = ?", modelType).Scan(&tasks)
  519. if tx.Error != nil {
  520. logx.Errorf(tx.Error.Error())
  521. return nil, tx.Error
  522. }
  523. return tasks, nil
  524. }
  525. func (s *AiStorage) GetAllDeployTasks() ([]*models.AiDeployInstanceTask, error) {
  526. var tasks []*models.AiDeployInstanceTask
  527. tx := s.DbEngin.Raw("select * from ai_deploy_instance_task").Scan(&tasks)
  528. if tx.Error != nil {
  529. logx.Errorf(tx.Error.Error())
  530. return nil, tx.Error
  531. }
  532. return tasks, nil
  533. }
  534. func (s *AiStorage) UpdateDeployTask(task *models.AiDeployInstanceTask, needUpdateTime bool) error {
  535. if needUpdateTime {
  536. task.UpdateTime = time.Now().Format(time.RFC3339)
  537. }
  538. tx := s.DbEngin.Table("ai_deploy_instance_task").Updates(task)
  539. if tx.Error != nil {
  540. logx.Errorf(tx.Error.Error())
  541. return tx.Error
  542. }
  543. return nil
  544. }
  545. func (s *AiStorage) DeleteDeployTaskById(id int64) error {
  546. tx := s.DbEngin.Delete(&models.AiDeployInstanceTask{}, id)
  547. if tx.Error != nil {
  548. logx.Errorf(tx.Error.Error())
  549. return tx.Error
  550. }
  551. return nil
  552. }
  553. func (s *AiStorage) UpdateDeployTaskById(id int64) error {
  554. task, err := s.GetDeployTaskById(id)
  555. if err != nil {
  556. return err
  557. }
  558. err = s.UpdateDeployTask(task, true)
  559. if err != nil {
  560. return err
  561. }
  562. return nil
  563. }
  564. func (s *AiStorage) GetInstanceListByDeployTaskId(id int64) ([]*models.AiInferDeployInstance, error) {
  565. var list []*models.AiInferDeployInstance
  566. tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `deploy_instance_task_id` = ?", id).Scan(&list)
  567. if tx.Error != nil {
  568. logx.Errorf(tx.Error.Error())
  569. return nil, tx.Error
  570. }
  571. return list, nil
  572. }
  573. func (s *AiStorage) GetInferDeployInstanceListLastMonth() ([]*models.AiInferDeployInstance, error) {
  574. var list []*models.AiInferDeployInstance
  575. now := time.Now()
  576. lastMonth := now.AddDate(0, -1, 0)
  577. tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `create_time` >= ?", lastMonth).Scan(&list)
  578. if tx.Error != nil {
  579. logx.Errorf(tx.Error.Error())
  580. return nil, tx.Error
  581. }
  582. return list, nil
  583. }
  584. func (s *AiStorage) GetDeployTaskList() ([]*models.AiDeployInstanceTask, error) {
  585. var list []*models.AiDeployInstanceTask
  586. tx := s.DbEngin.Raw("select * from ai_deploy_instance_task").Scan(&list)
  587. if tx.Error != nil {
  588. logx.Errorf(tx.Error.Error())
  589. return nil, tx.Error
  590. }
  591. return list, nil
  592. }
  593. func (s *AiStorage) GetInferDeployInstanceTotalNum() (int32, error) {
  594. var total int32
  595. tx := s.DbEngin.Raw("select count(*) from ai_infer_deploy_instance").Scan(&total)
  596. if tx.Error != nil {
  597. logx.Errorf(tx.Error.Error())
  598. return 0, tx.Error
  599. }
  600. return total, nil
  601. }
  602. func (s *AiStorage) GetInferDeployInstanceRunningNum() (int32, error) {
  603. var total int32
  604. tx := s.DbEngin.Raw("select count(*) from ai_infer_deploy_instance where `status` = 'Running'").Scan(&total)
  605. if tx.Error != nil {
  606. logx.Errorf(tx.Error.Error())
  607. return 0, tx.Error
  608. }
  609. return total, nil
  610. }
  611. func (s *AiStorage) GetInferenceTaskTotalNum() (int32, error) {
  612. var total int32
  613. tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 11 or `task_type_dict` = 12").Scan(&total)
  614. if tx.Error != nil {
  615. logx.Errorf(tx.Error.Error())
  616. return 0, tx.Error
  617. }
  618. return total, nil
  619. }
  620. func (s *AiStorage) GetInferenceTaskRunningNum() (int32, error) {
  621. var total int32
  622. tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 11 and `status` = 'Running'").Scan(&total)
  623. if tx.Error != nil {
  624. logx.Errorf(tx.Error.Error())
  625. return 0, tx.Error
  626. }
  627. return total, nil
  628. }
  629. func (s *AiStorage) GetTrainingTaskTotalNum() (int32, error) {
  630. var total int32
  631. tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 10").Scan(&total)
  632. if tx.Error != nil {
  633. logx.Errorf(tx.Error.Error())
  634. return 0, tx.Error
  635. }
  636. return total, nil
  637. }
  638. func (s *AiStorage) GetTrainingTaskRunningNum() (int32, error) {
  639. var total int32
  640. tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 10 and `status` = 'Running'").Scan(&total)
  641. if tx.Error != nil {
  642. logx.Errorf(tx.Error.Error())
  643. return 0, tx.Error
  644. }
  645. return total, nil
  646. }
  647. func (s *AiStorage) SaveInferDeployTask(taskName string, userId int64, modelName string, modelType string, desc string) (int64, error) {
  648. startTime := time.Now().Format(time.RFC3339)
  649. // 构建主任务结构体
  650. taskModel := models.AiDeployInstanceTask{
  651. Id: utils.GenSnowflakeID(),
  652. Name: taskName,
  653. UserId: userId,
  654. ModelName: modelName,
  655. ModelType: modelType,
  656. Desc: desc,
  657. CreateTime: startTime,
  658. UpdateTime: startTime,
  659. }
  660. // 保存任务数据到数据库
  661. tx := s.DbEngin.Table("ai_deploy_instance_task").Create(&taskModel)
  662. if tx.Error != nil {
  663. return 0, tx.Error
  664. }
  665. return taskModel.Id, nil
  666. }
  667. func (s *AiStorage) GetRunningDeployInstanceById(id int64, adapterId string) ([]*models.AiInferDeployInstance, error) {
  668. var list []*models.AiInferDeployInstance
  669. tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `deploy_instance_task_id` = ? and `adapter_id` = ? and `status` = 'Running'", id, adapterId).Scan(&list)
  670. if tx.Error != nil {
  671. logx.Errorf(tx.Error.Error())
  672. return nil, tx.Error
  673. }
  674. return list, nil
  675. }
  676. func (s *AiStorage) IsDeployTaskNameDuplicated(name string) (bool, error) {
  677. var total int32
  678. tx := s.DbEngin.Raw("select count(*) from ai_deploy_instance_task where `name` = ?", name).Scan(&total)
  679. if tx.Error != nil {
  680. return false, tx.Error
  681. }
  682. if total == 0 {
  683. return false, nil
  684. }
  685. return true, nil
  686. }
  687. func (s *AiStorage) GetClustersById(id string) (*types.ClusterInfo, error) {
  688. var resp types.ClusterInfo
  689. tx := s.DbEngin.Raw("select * from t_cluster where `id` = ? ", id).Scan(&resp)
  690. if tx.Error != nil {
  691. logx.Errorf(tx.Error.Error())
  692. return nil, tx.Error
  693. }
  694. return &resp, nil
  695. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.