You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

aiStorage.go 23 kB

3 months ago
4 months ago
2 months ago
3 months ago
6 months ago
3 months ago
11 months ago
4 months ago
4 months ago
4 months ago
4 months ago
3 months ago
10 months ago
11 months ago
4 months ago
4 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777
  1. package database
  2. import (
  3. "fmt"
  4. "strconv"
  5. "time"
  6. jsoniter "github.com/json-iterator/go"
  7. "github.com/zeromicro/go-zero/core/logx"
  8. clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client"
  9. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option"
  10. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
  11. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
  12. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  13. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
  14. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
  15. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
  16. "gorm.io/gorm"
  17. )
  18. type AiStorage struct {
  19. DbEngin *gorm.DB
  20. }
  21. func (s *AiStorage) GetParticipants() (*types.ClusterListResp, error) {
  22. var resp types.ClusterListResp
  23. tx := s.DbEngin.Raw("select * from t_cluster where `deleted_at` IS NULL ORDER BY create_time Desc").Scan(&resp.List)
  24. if tx.Error != nil {
  25. logx.Errorf(tx.Error.Error())
  26. return nil, tx.Error
  27. }
  28. return &resp, nil
  29. }
  30. func (s *AiStorage) GetClustersByAdapterId(id string) (*types.ClusterListResp, error) {
  31. var resp types.ClusterListResp
  32. tx := s.DbEngin.Raw("select * from t_cluster where `deleted_at` IS NULL and `adapter_id` = ? ORDER BY create_time Desc", id).Scan(&resp.List)
  33. if tx.Error != nil {
  34. logx.Errorf(tx.Error.Error())
  35. return nil, tx.Error
  36. }
  37. return &resp, nil
  38. }
  39. func (s *AiStorage) GetClusterNameById(id string) (string, error) {
  40. var name string
  41. tx := s.DbEngin.Raw("select `description` from t_cluster where `id` = ?", id).Scan(&name)
  42. if tx.Error != nil {
  43. logx.Errorf(tx.Error.Error())
  44. return "", tx.Error
  45. }
  46. return name, nil
  47. }
  48. func (s *AiStorage) GetAdapterNameById(id string) (string, error) {
  49. var name string
  50. tx := s.DbEngin.Raw("select `name` from t_adapter where `id` = ?", id).Scan(&name)
  51. if tx.Error != nil {
  52. logx.Errorf(tx.Error.Error())
  53. return "", tx.Error
  54. }
  55. return name, nil
  56. }
  57. func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) {
  58. var list []types.AdapterInfo
  59. var ids []string
  60. db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
  61. db = db.Where("type = ?", adapterType)
  62. err := db.Order("create_time desc").Find(&list).Error
  63. if err != nil {
  64. return nil, err
  65. }
  66. for _, info := range list {
  67. ids = append(ids, info.Id)
  68. }
  69. return ids, nil
  70. }
  71. func (s *AiStorage) GetAdaptersByType(adapterType string) ([]*types.AdapterInfo, error) {
  72. var list []*types.AdapterInfo
  73. db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
  74. db = db.Where("type = ?", adapterType)
  75. err := db.Order("create_time desc").Find(&list).Error
  76. if err != nil {
  77. return nil, err
  78. }
  79. return list, nil
  80. }
  81. func (s *AiStorage) GetAiTasksByAdapterId(adapterId string) ([]*models.TaskAi, error) {
  82. var resp []*models.TaskAi
  83. db := s.DbEngin.Model(&models.TaskAi{}).Table("task_ai")
  84. db = db.Where("adapter_id = ?", adapterId)
  85. err := db.Order("commit_time desc").Find(&resp).Error
  86. if err != nil {
  87. return nil, err
  88. }
  89. return resp, nil
  90. }
  91. func (s *AiStorage) GetAiTasksByTaskId(taskId string) ([]*models.TaskAi, error) {
  92. var resp []*models.TaskAi
  93. db := s.DbEngin.Model(&models.TaskAi{}).Table("task_ai")
  94. db = db.Where("task_id = ?", taskId)
  95. err := db.Order("commit_time desc").Find(&resp).Error
  96. if err != nil {
  97. return nil, err
  98. }
  99. return resp, nil
  100. }
  101. func (s *AiStorage) GetAiTaskListById(id int64) ([]*models.TaskAi, error) {
  102. var aiTaskList []*models.TaskAi
  103. tx := s.DbEngin.Raw("select * from task_ai where `task_id` = ? ", id).Scan(&aiTaskList)
  104. if tx.Error != nil {
  105. return nil, tx.Error
  106. }
  107. return aiTaskList, nil
  108. }
  109. func (s *AiStorage) DoesTaskNameExist(name string, taskType string) (bool, error) {
  110. var total int32
  111. switch taskType {
  112. case "training":
  113. tx := s.DbEngin.Raw("select count(*) from task where `name` = ?", name).Scan(&total)
  114. if tx.Error != nil {
  115. logx.Errorf(tx.Error.Error())
  116. return false, tx.Error
  117. }
  118. case "inference":
  119. tx := s.DbEngin.Raw("select count(*) from ai_deploy_instance_task where `name` = ?", name).Scan(&total)
  120. if tx.Error != nil {
  121. logx.Errorf(tx.Error.Error())
  122. return false, tx.Error
  123. }
  124. }
  125. return total > 0, nil
  126. }
  127. func (s *AiStorage) SaveTask(name string, desc string, userId int64, strategyCode int64, synergyStatus int64, aiType string, yaml string, saveToChain func(task models.Task, id int64) error, userName string) (int64, error) {
  128. startTime := time.Now()
  129. // 构建主任务结构体
  130. taskModel := models.Task{
  131. Id: utils.GenSnowflakeID(),
  132. Status: constants.Saved,
  133. Description: desc,
  134. Name: name,
  135. UserId: userId,
  136. UserName: userName,
  137. SynergyStatus: synergyStatus,
  138. Strategy: strategyCode,
  139. AdapterTypeDict: "1",
  140. TaskTypeDict: aiType,
  141. YamlString: yaml,
  142. StartTime: &startTime,
  143. CommitTime: time.Now(),
  144. }
  145. // 保存任务数据到数据库
  146. tx := s.DbEngin.Create(&taskModel)
  147. if tx.Error != nil {
  148. return 0, tx.Error
  149. }
  150. id := taskModel.Id
  151. // 数据上链
  152. if saveToChain != nil {
  153. err := saveToChain(taskModel, id)
  154. if err != nil {
  155. logx.Error(err)
  156. }
  157. }
  158. return id, nil
  159. }
  160. func (s *AiStorage) UpdateTask(task *types.TaskModel) error {
  161. task.UpdatedTime = time.Now().Format(constants.Layout)
  162. tx := s.DbEngin.Table("task").Model(task).Updates(task)
  163. if tx.Error != nil {
  164. logx.Errorf(tx.Error.Error())
  165. return tx.Error
  166. }
  167. return nil
  168. }
  169. func (s *AiStorage) AllTaskLastMonth() ([]*types.TaskModel, error) {
  170. var list []*types.TaskModel
  171. // 构建数据库查询
  172. db := s.DbEngin.Model(&types.TaskModel{}).Table("task")
  173. now := time.Now()
  174. lastMonth := now.AddDate(0, -1, 0)
  175. db = db.Where("created_time >= ?", lastMonth)
  176. // 查询任务列表
  177. if err := db.Order("created_time desc").Find(&list).Error; err != nil {
  178. return nil, result.NewDefaultError(err.Error())
  179. }
  180. return list, nil
  181. }
  182. type Resource struct {
  183. Name string `json:"name"`
  184. Number string `json:"number"`
  185. Type string `json:"type"`
  186. }
  187. func (s *AiStorage) SaveAiTask(taskId int64, opt option.Option, adapterName string, clusterId string, clusterName string, jobId string, status string, msg string) error {
  188. var aiOpt *option.AiOption
  189. switch (opt).(type) {
  190. case *option.AiOption:
  191. aiOpt = (opt).(*option.AiOption)
  192. case *option.InferOption:
  193. inferOpt := (opt).(*option.InferOption)
  194. aiOpt = &option.AiOption{}
  195. aiOpt.TaskName = inferOpt.TaskName
  196. aiOpt.Replica = inferOpt.Replica
  197. aiOpt.AdapterId = inferOpt.AdapterId
  198. aiOpt.TaskType = inferOpt.ModelType
  199. aiOpt.ModelName = inferOpt.ModelName
  200. aiOpt.StrategyName = inferOpt.Strategy
  201. }
  202. // 构建主任务结构体
  203. aId, err := strconv.ParseInt(aiOpt.AdapterId, 10, 64)
  204. if err != nil {
  205. return err
  206. }
  207. cId, err := strconv.ParseInt(clusterId, 10, 64)
  208. if err != nil {
  209. return err
  210. }
  211. aiResourceSpec, err := s.getResourceSpec(aiOpt, clusterName)
  212. if err != nil {
  213. return err
  214. }
  215. aiTaskModel := models.TaskAi{
  216. TaskId: taskId,
  217. AdapterId: aId,
  218. AdapterName: adapterName,
  219. ClusterId: cId,
  220. ClusterName: clusterName,
  221. Name: aiOpt.TaskName,
  222. Replica: int64(aiOpt.Replica),
  223. JobId: jobId,
  224. TaskType: aiOpt.TaskType,
  225. ModelName: aiOpt.ModelName,
  226. Strategy: aiOpt.StrategyName,
  227. Status: status,
  228. Msg: msg,
  229. Output: aiOpt.Output,
  230. Card: aiOpt.ComputeCard,
  231. StartTime: time.Now().Format(time.RFC3339),
  232. CommitTime: time.Now(),
  233. ResourceSpec: *aiResourceSpec,
  234. }
  235. // 保存任务数据到数据库
  236. tx := s.DbEngin.Table("task_ai").Create(&aiTaskModel)
  237. if tx.Error != nil {
  238. return tx.Error
  239. }
  240. return nil
  241. }
  242. func (s *AiStorage) getResourceSpec(aiOpt *option.AiOption, clusterName string) (*models.AIResourceSpec, error) {
  243. var aiResourceSpec models.AIResourceSpec
  244. // 序列化和反序列化资源需求
  245. jsonData, err := jsoniter.Marshal(aiOpt.ResourcesRequired)
  246. if err != nil {
  247. return nil, fmt.Errorf("failed to marshal ResourcesRequired: %w", err)
  248. }
  249. if err := jsoniter.Unmarshal(jsonData, &aiResourceSpec.Specifications); err != nil {
  250. return nil, fmt.Errorf("failed to unmarshal to Specifications: %w", err)
  251. }
  252. // 从资源数据中提取计算卡信息
  253. var resources []Resource
  254. if err := jsoniter.Unmarshal(jsonData, &resources); err != nil {
  255. return nil, fmt.Errorf("failed to unmarshal resources: %w", err)
  256. }
  257. // 查找计算卡类型和名称
  258. computeCardType := ""
  259. computeCardName := ""
  260. for _, res := range resources {
  261. switch res.Type {
  262. case "GPU", "DCU", "GCU", "ILUVATAR-GPGPU", "MLU", "NPU":
  263. computeCardType = res.Type
  264. computeCardName = res.Name
  265. break // 只取第一个匹配的计算卡
  266. }
  267. }
  268. // 设置资源名称
  269. if computeCardType != "" && computeCardName != "" {
  270. aiResourceSpec.ResourceName = fmt.Sprintf("%s_%s_%s", clusterName, computeCardType, computeCardName)
  271. } else if aiOpt.ComputeCard != "" {
  272. aiResourceSpec.ResourceName = fmt.Sprintf("%s_%s", clusterName, aiOpt.ComputeCard)
  273. } else {
  274. aiResourceSpec.ResourceName = fmt.Sprintf("%s_UNKNOWN_None", clusterName)
  275. }
  276. aiResourceSpec.ResourceType = constants.TaskTypeAiTrain
  277. return &aiResourceSpec, nil
  278. }
  279. func (s *AiStorage) SaveAiTaskImageSubTask(ta *models.TaskAiSub) error {
  280. tx := s.DbEngin.Table("task_ai_sub").Create(ta)
  281. if tx.Error != nil {
  282. return tx.Error
  283. }
  284. return nil
  285. }
  286. func (s *AiStorage) SaveClusterTaskQueue(adapterId string, clusterId string, queueNum int64) error {
  287. aId, err := strconv.ParseInt(adapterId, 10, 64)
  288. if err != nil {
  289. return err
  290. }
  291. cId, err := strconv.ParseInt(clusterId, 10, 64)
  292. if err != nil {
  293. return err
  294. }
  295. taskQueue := models.TClusterTaskQueue{
  296. AdapterId: aId,
  297. ClusterId: cId,
  298. QueueNum: queueNum,
  299. }
  300. tx := s.DbEngin.Create(&taskQueue)
  301. if tx.Error != nil {
  302. return tx.Error
  303. }
  304. return nil
  305. }
  306. func (s *AiStorage) GetClusterTaskQueues(adapterId string, clusterId string) ([]*models.TClusterTaskQueue, error) {
  307. var taskQueues []*models.TClusterTaskQueue
  308. tx := s.DbEngin.Raw("select * from t_cluster_task_queue where `adapter_id` = ? and `cluster_id` = ?", adapterId, clusterId).Scan(&taskQueues)
  309. if tx.Error != nil {
  310. logx.Errorf(tx.Error.Error())
  311. return nil, tx.Error
  312. }
  313. return taskQueues, nil
  314. }
  315. func (s *AiStorage) GetAiTaskIdByClusterIdAndTaskId(clusterId string, taskId string) (string, error) {
  316. var aiTask models.TaskAi
  317. tx := s.DbEngin.Raw("select * from task_ai where `cluster_id` = ? and `task_id` = ?", clusterId, taskId).Scan(&aiTask)
  318. if tx.Error != nil {
  319. logx.Errorf(tx.Error.Error())
  320. return "", tx.Error
  321. }
  322. return aiTask.JobId, nil
  323. }
  324. func (s *AiStorage) GetClusterResourcesById(clusterId string) (*models.TClusterResource, error) {
  325. var clusterResource models.TClusterResource
  326. tx := s.DbEngin.Raw("select * from t_cluster_resource where `cluster_id` = ?", clusterId).Scan(&clusterResource)
  327. if tx.Error != nil {
  328. logx.Errorf(tx.Error.Error())
  329. return nil, tx.Error
  330. }
  331. return &clusterResource, nil
  332. }
  333. func (s *AiStorage) SaveClusterResources(adapterId string, clusterId string, clusterName string, clusterType int64, cpuAvail float64, cpuTotal float64,
  334. memAvail float64, memTotal float64, diskAvail float64, diskTotal float64, gpuAvail float64, gpuTotal float64, cardTotal int64, topsTotal float64, cardHours float64,
  335. balance float64, taskCompleted int64) error {
  336. cId, err := strconv.ParseInt(clusterId, 10, 64)
  337. if err != nil {
  338. return err
  339. }
  340. aId, err := strconv.ParseInt(adapterId, 10, 64)
  341. if err != nil {
  342. return err
  343. }
  344. clusterResource := models.TClusterResource{
  345. AdapterId: aId,
  346. ClusterId: cId,
  347. ClusterName: clusterName,
  348. ClusterType: clusterType,
  349. CpuAvail: cpuAvail,
  350. CpuTotal: cpuTotal,
  351. MemAvail: memAvail,
  352. MemTotal: memTotal,
  353. DiskAvail: diskAvail,
  354. DiskTotal: diskTotal,
  355. GpuAvail: gpuAvail,
  356. GpuTotal: gpuTotal,
  357. CardTotal: cardTotal,
  358. CardTopsTotal: topsTotal,
  359. CardHours: cardHours,
  360. Balance: balance,
  361. TaskCompleted: taskCompleted,
  362. }
  363. tx := s.DbEngin.Create(&clusterResource)
  364. if tx.Error != nil {
  365. return tx.Error
  366. }
  367. // prometheus
  368. param := tracker.ClusterLoadRecord{
  369. AdapterId: aId,
  370. ClusterName: clusterName,
  371. CpuAvail: cpuAvail,
  372. CpuTotal: cpuTotal,
  373. CpuUtilisation: clusterResource.CpuAvail / clusterResource.CpuTotal,
  374. MemoryAvail: memAvail,
  375. MemoryTotal: memTotal,
  376. MemoryUtilisation: clusterResource.MemAvail / clusterResource.MemTotal,
  377. DiskAvail: diskAvail,
  378. DiskTotal: diskTotal,
  379. DiskUtilisation: clusterResource.DiskAvail / clusterResource.DiskTotal,
  380. }
  381. tracker.SyncClusterLoad(param)
  382. return nil
  383. }
  384. func (s *AiStorage) UpdateClusterResources(clusterResource *models.TClusterResource) error {
  385. tx := s.DbEngin.Where("cluster_id = ?", clusterResource.ClusterId).Updates(clusterResource)
  386. if tx.Error != nil {
  387. return tx.Error
  388. }
  389. // prometheus
  390. param := tracker.ClusterLoadRecord{
  391. AdapterId: clusterResource.AdapterId,
  392. ClusterName: clusterResource.ClusterName,
  393. CpuAvail: clusterResource.CpuAvail,
  394. CpuTotal: clusterResource.CpuTotal,
  395. CpuUtilisation: clusterResource.CpuAvail / clusterResource.CpuTotal,
  396. MemoryAvail: clusterResource.MemAvail,
  397. MemoryTotal: clusterResource.MemTotal,
  398. MemoryUtilisation: clusterResource.MemAvail / clusterResource.MemTotal,
  399. DiskAvail: clusterResource.DiskAvail,
  400. DiskTotal: clusterResource.DiskTotal,
  401. DiskUtilisation: clusterResource.DiskAvail / clusterResource.DiskTotal,
  402. }
  403. tracker.SyncClusterLoad(param)
  404. return nil
  405. }
  406. func (s *AiStorage) UpdateAiTask(task *models.TaskAi) error {
  407. tx := s.DbEngin.Updates(task)
  408. if tx.Error != nil {
  409. return tx.Error
  410. }
  411. return nil
  412. }
  413. func (s *AiStorage) UpdateTaskByModel(task *models.Task) error {
  414. tx := s.DbEngin.Updates(task)
  415. if tx.Error != nil {
  416. return tx.Error
  417. }
  418. return nil
  419. }
  420. func (s *AiStorage) GetStrategyCode(name string) (int64, error) {
  421. var strategy int64
  422. sqlStr := `select t_dict_item.item_value
  423. from t_dict
  424. left join t_dict_item on t_dict.id = t_dict_item.dict_id
  425. where item_text = ?
  426. and t_dict.dict_code = 'schedule_Strategy'`
  427. //查询调度策略
  428. err := s.DbEngin.Raw(sqlStr, name).Scan(&strategy).Error
  429. if err != nil {
  430. return strategy, nil
  431. }
  432. return strategy, nil
  433. }
  434. func (s *AiStorage) AddNoticeInfo(adapterId string, adapterName string, clusterId string, clusterName string, taskName string, noticeType string, incident string) {
  435. aId, err := strconv.ParseInt(adapterId, 10, 64)
  436. if err != nil {
  437. logx.Errorf("adapterId convert failure, err: %v", err)
  438. }
  439. var cId int64
  440. if clusterId != "" {
  441. cId, err = strconv.ParseInt(clusterId, 10, 64)
  442. if err != nil {
  443. logx.Errorf("clusterId convert failure, err: %v", err)
  444. }
  445. }
  446. noticeInfo := clientCore.NoticeInfo{
  447. AdapterId: aId,
  448. AdapterName: adapterName,
  449. ClusterId: cId,
  450. ClusterName: clusterName,
  451. NoticeType: noticeType,
  452. TaskName: taskName,
  453. Incident: incident,
  454. CreatedTime: time.Now(),
  455. }
  456. result := s.DbEngin.Table("t_notice").Create(&noticeInfo)
  457. if result.Error != nil {
  458. logx.Errorf("Task creation failure, err: %v", result.Error)
  459. }
  460. }
  461. func (s *AiStorage) SaveInferDeployInstance(taskId int64, instanceId string, instanceName string, adapterId int64,
  462. adapterName string, clusterId int64, clusterName string, modelName string, modelType string, inferCard string, clusterType string) (int64, error) {
  463. startTime := time.Now().Format(time.RFC3339)
  464. // 构建主任务结构体
  465. insModel := models.AiInferDeployInstance{
  466. DeployInstanceTaskId: taskId,
  467. InstanceId: instanceId,
  468. InstanceName: instanceName,
  469. AdapterId: adapterId,
  470. AdapterName: adapterName,
  471. ClusterId: clusterId,
  472. ClusterName: clusterName,
  473. ModelName: modelName,
  474. ModelType: modelType,
  475. InferCard: inferCard,
  476. ClusterType: clusterType,
  477. Status: constants.Deploying,
  478. CreateTime: startTime,
  479. UpdateTime: startTime,
  480. }
  481. // 保存任务数据到数据库
  482. tx := s.DbEngin.Table("ai_infer_deploy_instance").Create(&insModel)
  483. if tx.Error != nil {
  484. return 0, tx.Error
  485. }
  486. return insModel.Id, nil
  487. }
  488. func (s *AiStorage) UpdateInferDeployInstance(instance *models.AiInferDeployInstance, needUpdateTime bool) error {
  489. if needUpdateTime {
  490. instance.UpdateTime = time.Now().Format(time.RFC3339)
  491. }
  492. tx := s.DbEngin.Table("ai_infer_deploy_instance").Updates(instance)
  493. if tx.Error != nil {
  494. logx.Errorf(tx.Error.Error())
  495. return tx.Error
  496. }
  497. return nil
  498. }
  499. func (s *AiStorage) GetTaskById(id int64) (*models.Task, error) {
  500. var task *models.Task
  501. tx := s.DbEngin.Raw("select * from task where `id` = ?", id).Scan(&task)
  502. if tx.Error != nil {
  503. logx.Errorf(tx.Error.Error())
  504. return nil, tx.Error
  505. }
  506. return task, nil
  507. }
  508. func (s *AiStorage) GetInferDeployInstanceById(id int64) (*models.AiInferDeployInstance, error) {
  509. var deployIns *models.AiInferDeployInstance
  510. tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `id` = ?", id).Scan(&deployIns)
  511. if tx.Error != nil {
  512. logx.Errorf(tx.Error.Error())
  513. return nil, tx.Error
  514. }
  515. return deployIns, nil
  516. }
  517. func (s *AiStorage) GetDeployTaskById(id int64) (*models.AiDeployInstanceTask, error) {
  518. var task *models.AiDeployInstanceTask
  519. tx := s.DbEngin.Raw("select * from ai_deploy_instance_task where `id` = ?", id).Scan(&task)
  520. if tx.Error != nil {
  521. logx.Errorf(tx.Error.Error())
  522. return nil, tx.Error
  523. }
  524. return task, nil
  525. }
  526. func (s *AiStorage) GetDeployTaskListByType(modelType string) ([]*models.AiDeployInstanceTask, error) {
  527. var tasks []*models.AiDeployInstanceTask
  528. tx := s.DbEngin.Raw("select * from ai_deploy_instance_task where `model_type` = ?", modelType).Scan(&tasks)
  529. if tx.Error != nil {
  530. logx.Errorf(tx.Error.Error())
  531. return nil, tx.Error
  532. }
  533. return tasks, nil
  534. }
  535. func (s *AiStorage) GetAllDeployTasks() ([]*models.AiDeployInstanceTask, error) {
  536. var tasks []*models.AiDeployInstanceTask
  537. tx := s.DbEngin.Raw("select * from ai_deploy_instance_task").Scan(&tasks)
  538. if tx.Error != nil {
  539. logx.Errorf(tx.Error.Error())
  540. return nil, tx.Error
  541. }
  542. return tasks, nil
  543. }
  544. func (s *AiStorage) UpdateDeployTask(task *models.AiDeployInstanceTask, needUpdateTime bool) error {
  545. if needUpdateTime {
  546. task.UpdateTime = time.Now().Format(time.RFC3339)
  547. }
  548. tx := s.DbEngin.Table("ai_deploy_instance_task").Updates(task)
  549. if tx.Error != nil {
  550. logx.Errorf(tx.Error.Error())
  551. return tx.Error
  552. }
  553. return nil
  554. }
  555. func (s *AiStorage) DeleteDeployTaskById(id int64) error {
  556. tx := s.DbEngin.Delete(&models.AiDeployInstanceTask{}, id)
  557. if tx.Error != nil {
  558. logx.Errorf(tx.Error.Error())
  559. return tx.Error
  560. }
  561. return nil
  562. }
  563. func (s *AiStorage) UpdateDeployTaskById(id int64) error {
  564. task, err := s.GetDeployTaskById(id)
  565. if err != nil {
  566. return err
  567. }
  568. err = s.UpdateDeployTask(task, true)
  569. if err != nil {
  570. return err
  571. }
  572. return nil
  573. }
  574. func (s *AiStorage) GetInstanceListByDeployTaskId(id int64) ([]*models.AiInferDeployInstance, error) {
  575. var list []*models.AiInferDeployInstance
  576. tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `deploy_instance_task_id` = ?", id).Scan(&list)
  577. if tx.Error != nil {
  578. logx.Errorf(tx.Error.Error())
  579. return nil, tx.Error
  580. }
  581. return list, nil
  582. }
  583. func (s *AiStorage) GetInferDeployInstanceListLastMonth() ([]*models.AiInferDeployInstance, error) {
  584. var list []*models.AiInferDeployInstance
  585. now := time.Now()
  586. lastMonth := now.AddDate(0, -1, 0)
  587. tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `create_time` >= ?", lastMonth).Scan(&list)
  588. if tx.Error != nil {
  589. logx.Errorf(tx.Error.Error())
  590. return nil, tx.Error
  591. }
  592. return list, nil
  593. }
  594. func (s *AiStorage) GetDeployTaskList() ([]*models.AiDeployInstanceTask, error) {
  595. var list []*models.AiDeployInstanceTask
  596. tx := s.DbEngin.Raw("select * from ai_deploy_instance_task").Scan(&list)
  597. if tx.Error != nil {
  598. logx.Errorf(tx.Error.Error())
  599. return nil, tx.Error
  600. }
  601. return list, nil
  602. }
  603. func (s *AiStorage) GetInferDeployInstanceTotalNum() (int32, error) {
  604. var total int32
  605. tx := s.DbEngin.Raw("select count(*) from ai_infer_deploy_instance").Scan(&total)
  606. if tx.Error != nil {
  607. logx.Errorf(tx.Error.Error())
  608. return 0, tx.Error
  609. }
  610. return total, nil
  611. }
  612. func (s *AiStorage) GetInferDeployInstanceRunningNum() (int32, error) {
  613. var total int32
  614. tx := s.DbEngin.Raw("select count(*) from ai_infer_deploy_instance where `status` = 'Running'").Scan(&total)
  615. if tx.Error != nil {
  616. logx.Errorf(tx.Error.Error())
  617. return 0, tx.Error
  618. }
  619. return total, nil
  620. }
  621. func (s *AiStorage) GetInferenceTaskTotalNum() (int32, error) {
  622. var total int32
  623. tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 11 or `task_type_dict` = 12").Scan(&total)
  624. if tx.Error != nil {
  625. logx.Errorf(tx.Error.Error())
  626. return 0, tx.Error
  627. }
  628. return total, nil
  629. }
  630. func (s *AiStorage) GetInferenceTaskRunningNum() (int32, error) {
  631. var total int32
  632. tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 11 and `status` = 'Running'").Scan(&total)
  633. if tx.Error != nil {
  634. logx.Errorf(tx.Error.Error())
  635. return 0, tx.Error
  636. }
  637. return total, nil
  638. }
  639. func (s *AiStorage) GetTrainingTaskTotalNum() (int32, error) {
  640. var total int32
  641. tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 10").Scan(&total)
  642. if tx.Error != nil {
  643. logx.Errorf(tx.Error.Error())
  644. return 0, tx.Error
  645. }
  646. return total, nil
  647. }
  648. func (s *AiStorage) GetTrainingTaskRunningNum() (int32, error) {
  649. var total int32
  650. tx := s.DbEngin.Raw("select count(*) from task where `task_type_dict` = 10 and `status` = 'Running'").Scan(&total)
  651. if tx.Error != nil {
  652. logx.Errorf(tx.Error.Error())
  653. return 0, tx.Error
  654. }
  655. return total, nil
  656. }
  657. func (s *AiStorage) SaveInferDeployTask(taskName string, userId int64, modelName string, modelType string, desc string) (int64, error) {
  658. startTime := time.Now().Format(time.RFC3339)
  659. // 构建主任务结构体
  660. taskModel := models.AiDeployInstanceTask{
  661. Id: utils.GenSnowflakeID(),
  662. Name: taskName,
  663. UserId: userId,
  664. ModelName: modelName,
  665. ModelType: modelType,
  666. Desc: desc,
  667. CreateTime: startTime,
  668. UpdateTime: startTime,
  669. }
  670. // 保存任务数据到数据库
  671. tx := s.DbEngin.Table("ai_deploy_instance_task").Create(&taskModel)
  672. if tx.Error != nil {
  673. return 0, tx.Error
  674. }
  675. return taskModel.Id, nil
  676. }
  677. func (s *AiStorage) GetRunningDeployInstanceById(id int64, adapterId string) ([]*models.AiInferDeployInstance, error) {
  678. var list []*models.AiInferDeployInstance
  679. tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `deploy_instance_task_id` = ? and `adapter_id` = ? and `status` = 'Running'", id, adapterId).Scan(&list)
  680. if tx.Error != nil {
  681. logx.Errorf(tx.Error.Error())
  682. return nil, tx.Error
  683. }
  684. return list, nil
  685. }
  686. func (s *AiStorage) IsDeployTaskNameDuplicated(name string) (bool, error) {
  687. var total int32
  688. tx := s.DbEngin.Raw("select count(*) from ai_deploy_instance_task where `name` = ?", name).Scan(&total)
  689. if tx.Error != nil {
  690. return false, tx.Error
  691. }
  692. if total == 0 {
  693. return false, nil
  694. }
  695. return true, nil
  696. }
  697. func (s *AiStorage) GetClustersById(id string) (*types.ClusterInfo, error) {
  698. var resp types.ClusterInfo
  699. tx := s.DbEngin.Raw("select * from t_cluster where `id` = ? ", id).Scan(&resp)
  700. if tx.Error != nil {
  701. logx.Errorf(tx.Error.Error())
  702. return nil, tx.Error
  703. }
  704. return &resp, nil
  705. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.