You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

task_resource_usage.go 2.6 kB

3 months ago
3 months ago
3 months ago
3 months ago
3 months ago
3 months ago
3 months ago
3 months ago
3 months ago
3 months ago
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. package xjlab
  2. import (
  3. "context"
  4. "fmt"
  5. "github.com/pkg/errors"
  6. "github.com/zeromicro/go-zero/core/logx"
  7. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service"
  8. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
  9. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
  10. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
  11. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  12. "gorm.io/gorm"
  13. )
  14. type TaskResourceUsageLogic struct {
  15. logx.Logger
  16. ctx context.Context
  17. svcCtx *svc.ServiceContext
  18. hpcService *service.HpcService
  19. }
  20. func NewTaskResourceUsageLogic(ctx context.Context, svcCtx *svc.ServiceContext) *TaskResourceUsageLogic {
  21. cache := make(map[string]interface{}, 10)
  22. hpcService, err := service.NewHpcService(&svcCtx.Config, svcCtx.Scheduler.HpcStorages, cache)
  23. if err != nil {
  24. return nil
  25. }
  26. return &TaskResourceUsageLogic{
  27. Logger: logx.WithContext(ctx),
  28. ctx: ctx,
  29. svcCtx: svcCtx,
  30. hpcService: hpcService,
  31. }
  32. }
  33. func (l *TaskResourceUsageLogic) TaskResourceUsage(req *types.FId) (interface{}, error) {
  34. task := &models.Task{}
  35. var resp interface{}
  36. if errors.Is(l.svcCtx.DbEngin.Where("id", req.Id).First(&task).Error, gorm.ErrRecordNotFound) {
  37. return nil, errors.New("记录不存在")
  38. }
  39. switch task.AdapterTypeDict {
  40. case constants.AdapterTypeCloud:
  41. return nil, nil
  42. case constants.AdapterTypeAI:
  43. return nil, nil
  44. case constants.AdapterTypeHPC:
  45. // 获取HPC任务的资源使用情况
  46. usage, err := l.GetHpcTaskResourceUsage(req)
  47. if err != nil {
  48. return nil, err
  49. }
  50. resp = usage
  51. }
  52. return resp, nil
  53. }
  54. type TaskHPCResult struct {
  55. ID uint `gorm:"column:id"` // 对应 t.id
  56. JobID string `gorm:"column:job_id"` // 对应 hpc.job_id
  57. AdapterId string `gorm:"column:adapter_id"` // 对应 hpc.adapter_id
  58. ClusterId string `gorm:"column:cluster_id"` // 对应 hpc.cluster_id
  59. }
  60. func (l *TaskResourceUsageLogic) GetHpcTaskResourceUsage(req *types.FId) (resp interface{}, err error) {
  61. var hpcR TaskHPCResult
  62. tx := l.svcCtx.DbEngin.Raw(
  63. "SELECT t.id, hpc.job_id ,hpc.adapter_id ,hpc.cluster_id FROM task t "+
  64. "INNER JOIN task_hpc hpc ON t.id = hpc.task_id "+
  65. "WHERE adapter_type_dict = 2 AND t.id = ?",
  66. req.Id,
  67. ).Scan(&hpcR).Error
  68. if tx != nil {
  69. return nil, fmt.Errorf("数据库查询失败: %v", tx.Error)
  70. }
  71. if hpcR.ID == 0 {
  72. return nil, fmt.Errorf("任务不存在")
  73. }
  74. // 获取资源使用情况
  75. resp, err = l.hpcService.HpcExecutorAdapterMap[hpcR.AdapterId].GetTaskResourceUsage(l.ctx, hpcR.JobID, hpcR.ClusterId)
  76. if err != nil {
  77. return nil, err
  78. }
  79. return resp, nil
  80. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.