You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

canceljoblogic.go 3.4 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. package hpc
  2. import (
  3. "context"
  4. "fmt"
  5. "github.com/zeromicro/go-zero/core/logx"
  6. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service"
  7. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
  8. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
  9. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  10. )
  11. type CancelJobLogic struct {
  12. logx.Logger
  13. ctx context.Context
  14. svcCtx *svc.ServiceContext
  15. hpcService *service.HpcService
  16. }
  17. type TaskHPCResult struct {
  18. ID uint `gorm:"column:id"` // 对应 t.id
  19. JobID string `gorm:"column:job_id"` // 对应 hpc.job_id
  20. AdapterId string `gorm:"column:adapter_id"` // 对应 hpc.adapter_id
  21. ClusterId string `gorm:"column:cluster_id"` // 对应 hpc.cluster_id
  22. }
  23. func NewCancelJobLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CancelJobLogic {
  24. cache := make(map[string]interface{}, 10)
  25. hpcService, err := service.NewHpcService(&svcCtx.Config, svcCtx.Scheduler.HpcStorages, cache)
  26. if err != nil {
  27. return nil
  28. }
  29. return &CancelJobLogic{
  30. Logger: logx.WithContext(ctx),
  31. ctx: ctx,
  32. svcCtx: svcCtx,
  33. hpcService: hpcService,
  34. }
  35. }
  36. func (l *CancelJobLogic) CancelJob(req *types.CancelJobReq) error {
  37. //var clusterInfo *types.ClusterInfo
  38. //tx := l.svcCtx.DbEngin.Raw("select * from t_cluster where id = ?", req.ClusterId).Scan(&clusterInfo)
  39. //if tx.Error != nil {
  40. // return tx.Error
  41. //}
  42. //// 查询p端调用地址
  43. //var adapterAddress string
  44. //l.svcCtx.DbEngin.Raw("SELECT server FROM `t_adapter` where id = ?", clusterInfo.AdapterId).Scan(&adapterAddress)
  45. //var jobResp slurm.GetJobResp
  46. //httpClient := resty.New().R()
  47. //_, err := httpClient.SetHeader("Content-Type", "application/json").
  48. // SetQueryParams(map[string]string{
  49. // "jobId": req.JobId,
  50. // "server": clusterInfo.Server,
  51. // "version": clusterInfo.Version,
  52. // "token": clusterInfo.Token,
  53. // "username": clusterInfo.Username,
  54. // }).
  55. // SetResult(&jobResp).
  56. // Delete(adapterAddress + "/api/v1/job/cancel")
  57. //if err != nil {
  58. // return err
  59. //}
  60. //if len(jobResp.Errors) != 0 {
  61. // return errors.Errorf(jobResp.Errors[0].Description)
  62. //}
  63. //return nil
  64. var hpcR TaskHPCResult
  65. tx := l.svcCtx.DbEngin.Raw(
  66. "SELECT t.id, hpc.job_id ,hpc.adapter_id, hpc.cluster_id FROM task t "+
  67. "INNER JOIN task_hpc hpc ON t.id = hpc.task_id "+
  68. "WHERE adapter_type_dict = 2 AND t.id = ?",
  69. req.TaskId,
  70. ).Scan(&hpcR).Error
  71. if tx != nil {
  72. return fmt.Errorf("数据库查询失败: %v", tx.Error)
  73. }
  74. if hpcR.ID == 0 || hpcR.JobID == "" {
  75. return fmt.Errorf("作业不存在")
  76. }
  77. var adapterInfo types.AdapterInfo
  78. l.svcCtx.DbEngin.Raw("SELECT * FROM `t_adapter` where id = ?", hpcR.AdapterId).Scan(&adapterInfo)
  79. if adapterInfo.Id == "" {
  80. return fmt.Errorf("adapter not found")
  81. }
  82. // 取消作业
  83. err := l.hpcService.HpcExecutorAdapterMap[adapterInfo.Id].CancelTask(l.ctx, hpcR.JobID, hpcR.ClusterId)
  84. if err != nil {
  85. return err
  86. }
  87. // 更新数据库状态
  88. tx = l.svcCtx.DbEngin.Model(&types.Task{}).Where("id = ?", hpcR.ID).Update("status", "Canceled").Error
  89. if tx != nil {
  90. return fmt.Errorf("数据库更新失败: %v", tx.Error)
  91. }
  92. // 更新数据库状态
  93. tx = l.svcCtx.DbEngin.Model(&models.TaskHpc{}).Where("task_id = ?", hpcR.ID).Update("status", "Canceled").Error
  94. if tx != nil {
  95. return fmt.Errorf("数据库更新失败: %v", tx.Error)
  96. }
  97. return nil
  98. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.