You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

commithpctasklogic.go 5.1 kB

11 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. package hpc
  2. import (
  3. "context"
  4. "errors"
  5. jsoniter "github.com/json-iterator/go"
  6. clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client"
  7. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service"
  8. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  9. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
  10. "strconv"
  11. "time"
  12. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
  13. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
  14. "github.com/zeromicro/go-zero/core/logx"
  15. )
  16. type CommitHpcTaskLogic struct {
  17. logx.Logger
  18. ctx context.Context
  19. svcCtx *svc.ServiceContext
  20. hpcService *service.HpcService
  21. }
  22. func NewCommitHpcTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CommitHpcTaskLogic {
  23. cache := make(map[string]interface{}, 10)
  24. hpcService, err := service.NewHpcService(&svcCtx.Config, svcCtx.Scheduler.HpcStorages, cache)
  25. if err != nil {
  26. return nil
  27. }
  28. return &CommitHpcTaskLogic{
  29. Logger: logx.WithContext(ctx),
  30. ctx: ctx,
  31. svcCtx: svcCtx,
  32. hpcService: hpcService,
  33. }
  34. }
  35. func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *types.CommitHpcTaskResp, err error) {
  36. reqStr, _ := jsoniter.MarshalToString(req)
  37. yaml := utils.StringToYaml(reqStr)
  38. var clusterInfo types.ClusterInfo
  39. l.svcCtx.DbEngin.Raw("SELECT * FROM `t_cluster` where id = ?", req.ClusterId).First(&clusterInfo)
  40. if len(clusterInfo.Id) == 0 {
  41. return resp, errors.New("cluster not found")
  42. }
  43. // 构建主任务结构体
  44. userId, _ := strconv.ParseInt(req.Parameters["UserId"], 10, 64)
  45. taskModel := models.Task{
  46. Name: req.Name,
  47. Description: req.Description,
  48. CommitTime: time.Now(),
  49. Status: "Saved",
  50. AdapterTypeDict: "2",
  51. UserId: userId,
  52. YamlString: *yaml,
  53. }
  54. // 保存任务数据到数据库
  55. tx := l.svcCtx.DbEngin.Create(&taskModel)
  56. if tx.Error != nil {
  57. return nil, tx.Error
  58. }
  59. var adapterInfo types.AdapterInfo
  60. l.svcCtx.DbEngin.Raw("SELECT * FROM `t_adapter` where id = ?", clusterInfo.AdapterId).Scan(&adapterInfo)
  61. if adapterInfo.Id == "" {
  62. return resp, errors.New("adapter not found")
  63. }
  64. clusterId, err := strconv.ParseInt(req.ClusterId, 10, 64)
  65. cardCount, _ := strconv.ParseInt(req.Parameters["cardCount"], 10, 64)
  66. timelimit, _ := strconv.ParseInt(req.Parameters["timeLimit"], 10, 64)
  67. hpcInfo := models.TaskHpc{
  68. TaskId: taskModel.Id,
  69. AdapterId: clusterInfo.AdapterId,
  70. AdapterName: adapterInfo.Name,
  71. ClusterId: clusterId,
  72. ClusterName: clusterInfo.Name,
  73. Name: taskModel.Name,
  74. Backend: req.Backend,
  75. OperateType: req.OperateType,
  76. CmdScript: req.Parameters["cmdScript"],
  77. CardCount: cardCount,
  78. WorkDir: req.Parameters["workDir"],
  79. WallTime: req.Parameters["wallTime"],
  80. AppType: req.Parameters["appType"],
  81. AppName: req.Parameters["appName"],
  82. Queue: req.Parameters["queue"],
  83. SubmitType: req.Parameters["submitType"],
  84. NNode: req.Parameters["nNode"],
  85. Account: clusterInfo.Username,
  86. StdInput: req.Parameters["stdInput"],
  87. Partition: req.Parameters["partition"],
  88. CreatedTime: time.Now(),
  89. UpdatedTime: time.Now(),
  90. Status: "Deploying",
  91. TimeLimit: timelimit,
  92. UserId: userId,
  93. YamlString: *yaml,
  94. }
  95. hpcInfo.WorkDir = clusterInfo.WorkDir + req.Parameters["WorkDir"]
  96. tx = l.svcCtx.DbEngin.Create(&hpcInfo)
  97. if tx.Error != nil {
  98. return nil, tx.Error
  99. }
  100. // 保存操作记录
  101. noticeInfo := clientCore.NoticeInfo{
  102. AdapterId: clusterInfo.AdapterId,
  103. AdapterName: adapterInfo.Name,
  104. ClusterId: clusterId,
  105. ClusterName: clusterInfo.Name,
  106. NoticeType: "create",
  107. TaskName: req.Name,
  108. Incident: "任务创建中",
  109. CreatedTime: time.Now(),
  110. }
  111. result := l.svcCtx.DbEngin.Table("t_notice").Create(&noticeInfo)
  112. if result.Error != nil {
  113. logx.Errorf("Task creation failure, err: %v", result.Error)
  114. }
  115. // 数据上链
  116. // 查询资源价格
  117. //var price int64
  118. //l.svcCtx.DbEngin.Raw("select price from `resource_cost` where resource_id = ?", clusterId).Scan(&price)
  119. //bytes, _ := json.Marshal(taskModel)
  120. //remoteUtil.Evidence(remoteUtil.EvidenceParam{
  121. // UserIp: req.Parameters["UserIp"],
  122. // Url: l.svcCtx.Config.BlockChain.Url,
  123. // ContractAddress: l.svcCtx.Config.BlockChain.ContractAddress,
  124. // FunctionName: l.svcCtx.Config.BlockChain.FunctionName,
  125. // Type: l.svcCtx.Config.BlockChain.Type,
  126. // Token: req.Parameters["Token"],
  127. // Amount: price,
  128. // Args: []string{strconv.FormatInt(taskModel.Id, 10), string(bytes)},
  129. //})
  130. // 提交job到指定集群
  131. logx.Info("提交job到指定集群")
  132. resp, err = l.hpcService.HpcExecutorAdapterMap[adapterInfo.Id].SubmitTask(context.Background(), *req)
  133. if err != nil {
  134. return nil, err
  135. }
  136. // 更新任务状态
  137. updates := l.svcCtx.DbEngin.Model(&hpcInfo).Updates(models.TaskHpc{
  138. Id: hpcInfo.Id,
  139. JobId: resp.Data.JobInfo["jobId"],
  140. WorkDir: resp.Data.JobInfo["jobDir"],
  141. })
  142. if updates.Error != nil {
  143. return nil, updates.Error
  144. }
  145. resp.Data.JobInfo["taskId"] = strconv.FormatInt(taskModel.Id, 10)
  146. return resp, nil
  147. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.