|
- package hpcservice
-
- import (
- "context"
- "github.com/go-resty/resty/v2"
- "github.com/zeromicro/go-zero/core/logx"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
- "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
- "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
- )
-
- type ParticipantHpc struct {
- participantId int64
- platform string
- host string
- userName string
- accessToken string
- }
-
- const (
- BackendSlurm = "slurm"
- JobDetailUrl = "/api/v1/jobs/detail/{backend}/{jobId}"
- SubmitTaskUrl = "/api/v1/jobs"
- CancelTaskUrl = "/api/v1/jobs/cancel/{backend}/{jobId}"
- JobLogUrl = "/api/v1/jobs/logs/{backend}/{jobId}"
- )
-
- func NewHpc(host string, id int64, platform string) *ParticipantHpc {
- return &ParticipantHpc{
- host: host,
- participantId: id,
- platform: platform,
- }
- }
-
- func (c *ParticipantHpc) GetTask(ctx context.Context, taskId string) (*collector.Task, error) {
- reqUrl := c.host + JobDetailUrl
- hpcResp := &collector.HpcJobDetailResp{}
- httpClient := resty.New().R()
- _, err := httpClient.SetHeader("Content-Type", "application/json").
- SetPathParam("jobId", taskId).
- SetPathParam("backend", "slurm").
- SetResult(&hpcResp).
- Get(reqUrl)
- if err != nil {
- return nil, err
- }
- var resp collector.Task
- resp.Id = hpcResp.Data.ID
- if !hpcResp.Data.StartTime.IsZero() {
- resp.Start = hpcResp.Data.StartTime.Format(constants.Layout)
- }
- if !hpcResp.Data.EndTime.IsZero() {
- resp.End = hpcResp.Data.EndTime.Format(constants.Layout)
- }
- switch hpcResp.Data.StatusText {
- case "COMPLETED":
- resp.Status = constants.Completed
- case "FAILED":
- resp.Status = constants.Failed
- case "CREATED_FAILED":
- resp.Status = constants.Failed
- case "RUNNING":
- resp.Status = constants.Running
- case "STOPPED":
- resp.Status = constants.Stopped
- case "PENDING":
- resp.Status = constants.Pending
- case "WAITING":
- resp.Status = constants.Waiting
- case "CANCELLED":
- resp.Status = constants.Cancelled
- default:
- resp.Status = "undefined"
- }
-
- return &resp, nil
- }
-
- func (c *ParticipantHpc) SubmitTask(ctx context.Context, req types.CommitHpcTaskReq) (*types.CommitHpcTaskResp, error) {
- reqUrl := c.host + SubmitTaskUrl
- resp := types.CommitHpcTaskResp{}
- logx.WithContext(ctx).Infof("提交任务到超算集群, url: %s, req: %+v", reqUrl, req)
- httpClient := resty.New().R()
- _, err := httpClient.SetHeaders(
- map[string]string{
- "Content-Type": "application/json",
- "traceId": result.TraceIDFromContext(ctx),
- }).SetBody(req).
- SetResult(&resp).
- Post(reqUrl)
- if err != nil {
- return nil, err
- }
-
- return &resp, nil
- }
-
- func (c *ParticipantHpc) CancelTask(ctx context.Context, jobId string) error {
- reqUrl := c.host + CancelTaskUrl
- resp := types.CommonResp{}
- logx.WithContext(ctx).Infof("取消超算集群任务, url: %s, jobId: %s", reqUrl, jobId)
- httpClient := resty.New().R()
- _, err := httpClient.SetHeaders(
- map[string]string{
- "Content-Type": "application/json",
- "traceId": result.TraceIDFromContext(ctx),
- }).SetPathParams(map[string]string{
- "backend": BackendSlurm,
- "jobId": jobId,
- }).SetResult(&resp).Delete(reqUrl)
- if err != nil {
- return err
- }
- return nil
- }
-
- func (c *ParticipantHpc) GetTaskLogs(ctx context.Context, jobId string) (interface{}, error) {
- reqUrl := c.host + JobLogUrl
- resp := types.CommonResp{}
- logx.WithContext(ctx).Infof("获取超算集群任务日志, url: %s, jobId: %s", reqUrl, jobId)
- httpClient := resty.New().R()
- _, err := httpClient.SetHeaders(
- map[string]string{
- "Content-Type": "application/json",
- "traceId": result.TraceIDFromContext(ctx),
- }).SetPathParams(map[string]string{
- "backend": BackendSlurm,
- "jobId": jobId,
- }).SetResult(&resp).Get(reqUrl)
- if err != nil {
- return nil, err
- }
- return resp, nil
- }
|