You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

commithpctasklogic.go 6.2 kB

11 months ago
1 year ago
1 year ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. package hpc
  2. import (
  3. "context"
  4. "errors"
  5. "github.com/go-resty/resty/v2"
  6. jsoniter "github.com/json-iterator/go"
  7. clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client"
  8. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  9. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
  10. "strconv"
  11. "time"
  12. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
  13. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
  14. "github.com/zeromicro/go-zero/core/logx"
  15. )
  16. type CommitHpcTaskLogic struct {
  17. logx.Logger
  18. ctx context.Context
  19. svcCtx *svc.ServiceContext
  20. }
  21. func NewCommitHpcTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CommitHpcTaskLogic {
  22. return &CommitHpcTaskLogic{
  23. Logger: logx.WithContext(ctx),
  24. ctx: ctx,
  25. svcCtx: svcCtx,
  26. }
  27. }
  28. type JobSpec struct {
  29. Name string // 应用名称: BWA/lammps
  30. Backend string // 后端类型:slurm/sugonac
  31. App string
  32. OperateType string // 应用内操作类型: bwa:构建索引/对比序列
  33. Parameters map[string]string // 通用参数
  34. CustomParams map[string]string // 各平台自定义参数
  35. }
  36. type ResultParticipant struct {
  37. Code int `json:"code"`
  38. Data struct {
  39. Backend string `json:"backend"`
  40. JobInfo struct {
  41. JobDir string `json:"jobDir"`
  42. JobId string `json:"jobId"`
  43. } `json:"jobInfo"`
  44. } `json:"data"`
  45. Msg string `json:"msg"`
  46. TraceId string `json:"trace_id"`
  47. }
  48. func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *types.CommitHpcTaskResp, err error) {
  49. reqStr, _ := jsoniter.MarshalToString(req)
  50. yaml := utils.StringToYaml(reqStr)
  51. var clusterInfo types.ClusterInfo
  52. l.svcCtx.DbEngin.Raw("SELECT * FROM `t_cluster` where id = ?", req.ClusterId).First(&clusterInfo)
  53. if len(clusterInfo.Id) == 0 {
  54. return resp, errors.New("cluster not found")
  55. }
  56. // 构建主任务结构体
  57. userId, _ := strconv.ParseInt(req.Parameters["UserId"], 10, 64)
  58. taskModel := models.Task{
  59. Name: req.Name,
  60. Description: req.Description,
  61. CommitTime: time.Now(),
  62. Status: "Running",
  63. AdapterTypeDict: "2",
  64. UserId: userId,
  65. YamlString: *yaml,
  66. }
  67. // 保存任务数据到数据库
  68. tx := l.svcCtx.DbEngin.Create(&taskModel)
  69. if tx.Error != nil {
  70. return nil, tx.Error
  71. }
  72. var adapterName string
  73. l.svcCtx.DbEngin.Raw("SELECT name FROM `t_adapter` where id = ?", clusterInfo.AdapterId).Scan(&adapterName)
  74. var server string
  75. l.svcCtx.DbEngin.Raw("SELECT server FROM `t_adapter` where id = ?", clusterInfo.AdapterId).Scan(&server)
  76. if len(adapterName) == 0 || adapterName == "" {
  77. return nil, errors.New("no corresponding adapter found")
  78. }
  79. clusterId, err := strconv.ParseInt(req.ClusterId, 10, 64)
  80. cardCount, _ := strconv.ParseInt(req.Parameters["cardCount"], 10, 64)
  81. timelimit, _ := strconv.ParseInt(req.Parameters["timeLimit"], 10, 64)
  82. hpcInfo := models.TaskHpc{
  83. TaskId: taskModel.Id,
  84. AdapterId: clusterInfo.AdapterId,
  85. AdapterName: adapterName,
  86. ClusterId: clusterId,
  87. ClusterName: clusterInfo.Name,
  88. Name: taskModel.Name,
  89. Backend: req.Backend,
  90. OperateType: req.OperateType,
  91. CmdScript: req.Parameters["cmdScript"],
  92. StartTime: time.Now().String(),
  93. CardCount: cardCount,
  94. WorkDir: req.Parameters["workDir"],
  95. WallTime: req.Parameters["wallTime"],
  96. AppType: req.Parameters["appType"],
  97. AppName: req.Parameters["appName"],
  98. Queue: req.Parameters["queue"],
  99. SubmitType: req.Parameters["submitType"],
  100. NNode: req.Parameters["nNode"],
  101. Account: clusterInfo.Username,
  102. StdInput: req.Parameters["stdInput"],
  103. Partition: req.Parameters["partition"],
  104. CreatedTime: time.Now(),
  105. UpdatedTime: time.Now(),
  106. Status: "Deploying",
  107. TimeLimit: timelimit,
  108. UserId: userId,
  109. YamlString: *yaml,
  110. }
  111. hpcInfo.WorkDir = clusterInfo.WorkDir + req.Parameters["WorkDir"]
  112. tx = l.svcCtx.DbEngin.Create(&hpcInfo)
  113. if tx.Error != nil {
  114. return nil, tx.Error
  115. }
  116. // 保存操作记录
  117. noticeInfo := clientCore.NoticeInfo{
  118. AdapterId: clusterInfo.AdapterId,
  119. AdapterName: adapterName,
  120. ClusterId: clusterId,
  121. ClusterName: clusterInfo.Name,
  122. NoticeType: "create",
  123. TaskName: req.Name,
  124. Incident: "任务创建中",
  125. CreatedTime: time.Now(),
  126. }
  127. result := l.svcCtx.DbEngin.Table("t_notice").Create(&noticeInfo)
  128. if result.Error != nil {
  129. logx.Errorf("Task creation failure, err: %v", result.Error)
  130. }
  131. // 数据上链
  132. // 查询资源价格
  133. var price int64
  134. l.svcCtx.DbEngin.Raw("select price from `resource_cost` where resource_id = ?", clusterId).Scan(&price)
  135. //bytes, _ := json.Marshal(taskModel)
  136. //remoteUtil.Evidence(remoteUtil.EvidenceParam{
  137. // UserIp: req.Parameters["UserIp"],
  138. // Url: l.svcCtx.Config.BlockChain.Url,
  139. // ContractAddress: l.svcCtx.Config.BlockChain.ContractAddress,
  140. // FunctionName: l.svcCtx.Config.BlockChain.FunctionName,
  141. // Type: l.svcCtx.Config.BlockChain.Type,
  142. // Token: req.Parameters["Token"],
  143. // Amount: price,
  144. // Args: []string{strconv.FormatInt(taskModel.Id, 10), string(bytes)},
  145. //})
  146. // 提交job到指定集群
  147. logx.Info("提交job到指定集群")
  148. resp, err = submitJob(req, server)
  149. if err != nil {
  150. return nil, err
  151. }
  152. // 更新任务状态
  153. updates := l.svcCtx.DbEngin.Model(&hpcInfo).Updates(models.TaskHpc{
  154. Id: hpcInfo.Id,
  155. JobId: resp.Data.JobInfo["jobId"],
  156. WorkDir: resp.Data.JobInfo["jobDir"],
  157. })
  158. if updates.Error != nil {
  159. return nil, updates.Error
  160. }
  161. return resp, nil
  162. }
  163. func submitJob(req *types.CommitHpcTaskReq, adapterAddress string) (resp *types.CommitHpcTaskResp, err error) {
  164. req.Parameters["jobName"] = req.Name + "_" + req.OperateType
  165. reqParticipant := JobSpec{
  166. Name: req.Name,
  167. Backend: req.Backend,
  168. App: req.App,
  169. OperateType: req.OperateType,
  170. Parameters: req.Parameters,
  171. CustomParams: req.CustomParams,
  172. }
  173. httpClient := resty.New().R()
  174. logx.Info("远程调用p端接口开始")
  175. _, err = httpClient.SetHeader("Content-Type", "application/json").
  176. SetBody(reqParticipant).
  177. SetResult(&resp).
  178. Post(adapterAddress + "/api/v1/jobs")
  179. if err != nil {
  180. return nil, err
  181. }
  182. logx.Info("远程调用p端接口完成")
  183. return resp, nil
  184. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.