You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

commithpctasklogic.go 5.2 kB

11 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. package hpc
  2. import (
  3. "context"
  4. "errors"
  5. jsoniter "github.com/json-iterator/go"
  6. clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client"
  7. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service"
  8. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
  9. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
  10. "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
  11. "strconv"
  12. "time"
  13. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
  14. "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
  15. "github.com/zeromicro/go-zero/core/logx"
  16. )
  17. type CommitHpcTaskLogic struct {
  18. logx.Logger
  19. ctx context.Context
  20. svcCtx *svc.ServiceContext
  21. hpcService *service.HpcService
  22. }
  23. func NewCommitHpcTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CommitHpcTaskLogic {
  24. cache := make(map[string]interface{}, 10)
  25. hpcService, err := service.NewHpcService(&svcCtx.Config, svcCtx.Scheduler.HpcStorages, cache)
  26. if err != nil {
  27. return nil
  28. }
  29. return &CommitHpcTaskLogic{
  30. Logger: logx.WithContext(ctx),
  31. ctx: ctx,
  32. svcCtx: svcCtx,
  33. hpcService: hpcService,
  34. }
  35. }
  36. func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *types.CommitHpcTaskResp, err error) {
  37. reqStr, _ := jsoniter.MarshalToString(req)
  38. yaml := utils.StringToYaml(reqStr)
  39. var clusterInfo types.ClusterInfo
  40. l.svcCtx.DbEngin.Raw("SELECT * FROM `t_cluster` where id = ?", req.ClusterId).First(&clusterInfo)
  41. if len(clusterInfo.Id) == 0 {
  42. return resp, errors.New("cluster not found")
  43. }
  44. // 构建主任务结构体
  45. userId, _ := strconv.ParseInt(req.Parameters["UserId"], 10, 64)
  46. taskModel := models.Task{
  47. Name: req.Name,
  48. Description: req.Description,
  49. CommitTime: time.Now(),
  50. Status: "Saved",
  51. AdapterTypeDict: "2",
  52. UserId: userId,
  53. YamlString: *yaml,
  54. }
  55. // 保存任务数据到数据库
  56. tx := l.svcCtx.DbEngin.Create(&taskModel)
  57. if tx.Error != nil {
  58. return nil, tx.Error
  59. }
  60. var adapterInfo types.AdapterInfo
  61. l.svcCtx.DbEngin.Raw("SELECT * FROM `t_adapter` where id = ?", clusterInfo.AdapterId).Scan(&adapterInfo)
  62. if adapterInfo.Id == "" {
  63. return resp, errors.New("adapter not found")
  64. }
  65. clusterId, err := strconv.ParseInt(req.ClusterId, 10, 64)
  66. cardCount, _ := strconv.ParseInt(req.Parameters["cardCount"], 10, 64)
  67. timelimit, _ := strconv.ParseInt(req.Parameters["timeLimit"], 10, 64)
  68. hpcInfo := models.TaskHpc{
  69. TaskId: taskModel.Id,
  70. AdapterId: clusterInfo.AdapterId,
  71. AdapterName: adapterInfo.Name,
  72. ClusterId: clusterId,
  73. ClusterName: clusterInfo.Name,
  74. Name: taskModel.Name,
  75. Backend: req.Backend,
  76. OperateType: req.OperateType,
  77. CmdScript: req.Parameters["cmdScript"],
  78. StartTime: time.Now().Format(constants.Layout),
  79. CardCount: cardCount,
  80. WorkDir: req.Parameters["workDir"],
  81. WallTime: req.Parameters["wallTime"],
  82. AppType: req.Parameters["appType"],
  83. AppName: req.Parameters["appName"],
  84. Queue: req.Parameters["queue"],
  85. SubmitType: req.Parameters["submitType"],
  86. NNode: req.Parameters["nNode"],
  87. Account: clusterInfo.Username,
  88. StdInput: req.Parameters["stdInput"],
  89. Partition: req.Parameters["partition"],
  90. CreatedTime: time.Now(),
  91. UpdatedTime: time.Now(),
  92. Status: "Deploying",
  93. TimeLimit: timelimit,
  94. UserId: userId,
  95. YamlString: *yaml,
  96. }
  97. hpcInfo.WorkDir = clusterInfo.WorkDir + req.Parameters["WorkDir"]
  98. tx = l.svcCtx.DbEngin.Create(&hpcInfo)
  99. if tx.Error != nil {
  100. return nil, tx.Error
  101. }
  102. // 保存操作记录
  103. noticeInfo := clientCore.NoticeInfo{
  104. AdapterId: clusterInfo.AdapterId,
  105. AdapterName: adapterInfo.Name,
  106. ClusterId: clusterId,
  107. ClusterName: clusterInfo.Name,
  108. NoticeType: "create",
  109. TaskName: req.Name,
  110. Incident: "任务创建中",
  111. CreatedTime: time.Now(),
  112. }
  113. result := l.svcCtx.DbEngin.Table("t_notice").Create(&noticeInfo)
  114. if result.Error != nil {
  115. logx.Errorf("Task creation failure, err: %v", result.Error)
  116. }
  117. // 数据上链
  118. // 查询资源价格
  119. //var price int64
  120. //l.svcCtx.DbEngin.Raw("select price from `resource_cost` where resource_id = ?", clusterId).Scan(&price)
  121. //bytes, _ := json.Marshal(taskModel)
  122. //remoteUtil.Evidence(remoteUtil.EvidenceParam{
  123. // UserIp: req.Parameters["UserIp"],
  124. // Url: l.svcCtx.Config.BlockChain.Url,
  125. // ContractAddress: l.svcCtx.Config.BlockChain.ContractAddress,
  126. // FunctionName: l.svcCtx.Config.BlockChain.FunctionName,
  127. // Type: l.svcCtx.Config.BlockChain.Type,
  128. // Token: req.Parameters["Token"],
  129. // Amount: price,
  130. // Args: []string{strconv.FormatInt(taskModel.Id, 10), string(bytes)},
  131. //})
  132. // 提交job到指定集群
  133. logx.Info("提交job到指定集群")
  134. resp, err = l.hpcService.HpcExecutorAdapterMap[adapterInfo.Id].SubmitTask(context.Background(), *req)
  135. if err != nil {
  136. return nil, err
  137. }
  138. // 更新任务状态
  139. updates := l.svcCtx.DbEngin.Model(&hpcInfo).Updates(models.TaskHpc{
  140. Id: hpcInfo.Id,
  141. JobId: resp.Data.JobInfo["jobId"],
  142. WorkDir: resp.Data.JobInfo["jobDir"],
  143. })
  144. if updates.Error != nil {
  145. return nil, updates.Error
  146. }
  147. return resp, nil
  148. }

PCM is positioned as Software stack over Cloud, aiming to build the standards and ecology of heterogeneous cloud collaboration for JCC in a non intrusive and autonomous peer-to-peer manner.