You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

commithpctasklogic.go 5.1 kB

11 months ago
11 months ago
1 year ago
1 year ago
1 year ago
11 months ago
1 year ago
1 year ago
1 year ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. package hpc
  2. import (
  3. "context"
  4. "errors"
  5. "github.com/go-resty/resty/v2"
  6. clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client"
  7. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  8. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils/remoteUtil"
  9. v1 "gitlink.org.cn/JointCloud/pcm-hpc/routers/v1"
  10. "k8s.io/apimachinery/pkg/util/json"
  11. "strconv"
  12. "time"
  13. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
  14. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
  15. "github.com/zeromicro/go-zero/core/logx"
  16. )
  17. type CommitHpcTaskLogic struct {
  18. logx.Logger
  19. ctx context.Context
  20. svcCtx *svc.ServiceContext
  21. }
  22. func NewCommitHpcTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CommitHpcTaskLogic {
  23. return &CommitHpcTaskLogic{
  24. Logger: logx.WithContext(ctx),
  25. ctx: ctx,
  26. svcCtx: svcCtx,
  27. }
  28. }
  29. func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *types.CommitHpcTaskResp, err error) {
  30. var clusterInfo types.ClusterInfo
  31. l.svcCtx.DbEngin.Raw("SELECT * FROM `t_cluster` where id = ?", req.ClusterId).First(&clusterInfo)
  32. if len(clusterInfo.Id) == 0 {
  33. return resp, errors.New("cluster not found")
  34. }
  35. // 构建主任务结构体
  36. taskModel := models.Task{
  37. Name: req.Name,
  38. Description: req.Description,
  39. CommitTime: time.Now(),
  40. Status: "Running",
  41. AdapterTypeDict: "2",
  42. }
  43. // 保存任务数据到数据库
  44. tx := l.svcCtx.DbEngin.Create(&taskModel)
  45. if tx.Error != nil {
  46. return nil, tx.Error
  47. }
  48. var adapterName string
  49. l.svcCtx.DbEngin.Raw("SELECT name FROM `t_adapter` where id = ?", clusterInfo.AdapterId).Scan(&adapterName)
  50. var server string
  51. l.svcCtx.DbEngin.Raw("SELECT server FROM `t_adapter` where id = ?", clusterInfo.AdapterId).Scan(&server)
  52. if len(adapterName) == 0 || adapterName == "" {
  53. return nil, errors.New("no corresponding adapter found")
  54. }
  55. clusterId, err := strconv.ParseInt(req.ClusterId, 10, 64)
  56. hpcInfo := models.TaskHpc{
  57. TaskId: taskModel.Id,
  58. AdapterId: clusterInfo.AdapterId,
  59. AdapterName: adapterName,
  60. ClusterId: clusterId,
  61. ClusterName: clusterInfo.Name,
  62. Name: taskModel.Name,
  63. CmdScript: req.CmdScript,
  64. StartTime: time.Now().String(),
  65. CardCount: req.CardCount,
  66. WorkDir: req.WorkDir,
  67. WallTime: req.WallTime,
  68. AppType: req.AppType,
  69. AppName: req.AppName,
  70. Queue: req.Queue,
  71. SubmitType: req.SubmitType,
  72. NNode: req.NNode,
  73. Account: clusterInfo.Username,
  74. StdInput: req.StdInput,
  75. Partition: req.Partition,
  76. CreatedTime: time.Now(),
  77. UpdatedTime: time.Now(),
  78. Status: "Running",
  79. }
  80. hpcInfo.WorkDir = clusterInfo.WorkDir + req.WorkDir
  81. tx = l.svcCtx.DbEngin.Create(&hpcInfo)
  82. if tx.Error != nil {
  83. return nil, tx.Error
  84. }
  85. // 提交job到指定集群
  86. logx.Info("提交job到指定集群")
  87. jobId, err := submitJob(&hpcInfo, &clusterInfo, server)
  88. logx.Info("提交job到指定集群完成")
  89. if err != nil {
  90. return nil, err
  91. }
  92. // 保存操作记录
  93. noticeInfo := clientCore.NoticeInfo{
  94. AdapterId: clusterInfo.AdapterId,
  95. AdapterName: adapterName,
  96. ClusterId: clusterId,
  97. ClusterName: clusterInfo.Name,
  98. NoticeType: "create",
  99. TaskName: req.Name,
  100. Incident: "任务创建中",
  101. CreatedTime: time.Now(),
  102. }
  103. result := l.svcCtx.DbEngin.Table("t_notice").Create(&noticeInfo)
  104. if result.Error != nil {
  105. logx.Errorf("Task creation failure, err: %v", result.Error)
  106. }
  107. resp = &types.CommitHpcTaskResp{
  108. JobId: string(jobId),
  109. }
  110. // 数据上链
  111. bytes, _ := json.Marshal(taskModel)
  112. remoteUtil.Evidence(remoteUtil.EvidenceParam{
  113. Url: l.svcCtx.Config.BlockChain.Url,
  114. ContractAddress: l.svcCtx.Config.BlockChain.ContractAddress,
  115. FunctionName: l.svcCtx.Config.BlockChain.FunctionName,
  116. MemberName: l.svcCtx.Config.BlockChain.MemberName,
  117. Type: l.svcCtx.Config.BlockChain.Type,
  118. Args: []string{strconv.FormatInt(taskModel.Id, 10), string(bytes)},
  119. })
  120. return resp, nil
  121. }
  122. func submitJob(hpcInfo *models.TaskHpc, clusterInfo *types.ClusterInfo, adapterAddress string) (int, error) {
  123. SubmitJobReq := v1.SubmitJobReq{
  124. Server: clusterInfo.Server,
  125. Version: clusterInfo.Version,
  126. Username: clusterInfo.Username,
  127. Token: clusterInfo.Token,
  128. JobOptions: v1.JobOptions{
  129. Script: hpcInfo.CmdScript,
  130. Job: &v1.JobProperties{
  131. Account: hpcInfo.Account,
  132. Name: hpcInfo.Name,
  133. NTasks: 1,
  134. CurrentWorkingDirectory: hpcInfo.WorkDir,
  135. Partition: hpcInfo.Partition,
  136. Environment: map[string]string{"PATH": clusterInfo.EnvPath,
  137. "LD_LIBRARY_PATH": clusterInfo.EnvLdPath},
  138. StandardOutput: hpcInfo.WorkDir + "/job.out",
  139. StandardError: hpcInfo.WorkDir + "/job.err",
  140. },
  141. },
  142. }
  143. var resp v1.SubmitJobResp
  144. httpClient := resty.New().R()
  145. logx.Info("远程调用p端接口开始")
  146. _, err := httpClient.SetHeader("Content-Type", "application/json").
  147. SetBody(SubmitJobReq).
  148. SetResult(&resp).
  149. Post(adapterAddress + "/api/v1/job/submit")
  150. logx.Info("远程调用p端接口完成")
  151. if err != nil {
  152. return 0, err
  153. }
  154. return resp.JobId, nil
  155. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.