package hpcservice import ( "context" "fmt" "net/http" "github.com/go-resty/resty/v2" "github.com/zeromicro/go-zero/core/logx" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils/restyclient" ) type ParticipantHpc struct { participantId int64 platform string host string userName string accessToken string *restyclient.RestyClient } const ( SubmitTaskUrl = "/api/v1/hpc/jobs" JobStatus = "/api/v1/hpc/jobs/status/{clusterId}/{jobId}" JobDetailUrl = "/api/v1/hpc/jobs/detail/{clusterId}/{jobId}" JobLogUrl = "/api/v1/hpc/jobs/logs/{clusterId}/{jobId}" CancelTaskUrl = "/api/v1/hpc/jobs/cancel/{clusterId}/{jobId}" JobResourceUsageUrl = "/api/v1/hpc/jobs/resource/usage/{clusterId}/{jobId}" JobFailureAnalyze = "/api/v1/hpc/task/analyze" ) func NewHpc(host string, id int64, platform string) *ParticipantHpc { return &ParticipantHpc{ host: host, participantId: id, platform: platform, RestyClient: restyclient.InitClient(host, ""), } } func (c *ParticipantHpc) GetTaskDetail(ctx context.Context, taskId string, clusterId string) (*collector.Task, error) { reqUrl := c.host + JobDetailUrl hpcResp := &collector.HpcJobDetailResp{} httpClient := resty.New().R() _, err := httpClient.SetHeaders( map[string]string{ "Content-Type": "application/json", "traceId": result.TraceIDFromContext(ctx), }). SetPathParams(map[string]string{ "clusterId": clusterId, "jobId": taskId, }). SetResult(&hpcResp). Get(reqUrl) if err != nil { return nil, err } var resp collector.Task resp.Id = hpcResp.Data.ID if !hpcResp.Data.StartTime.IsZero() { resp.Start = hpcResp.Data.StartTime.Format(constants.Layout) } if !hpcResp.Data.EndTime.IsZero() { resp.End = hpcResp.Data.EndTime.Format(constants.Layout) } switch hpcResp.Data.StatusText { case "COMPLETED": resp.Status = constants.Completed case "FAILED": resp.Status = constants.Failed case "CREATED_FAILED": resp.Status = constants.Failed case "RUNNING": resp.Status = constants.Running case "STOPPED": resp.Status = constants.Stopped case "PENDING": resp.Status = constants.Pending case "WAITING": resp.Status = constants.Waiting case "CANCELLED": resp.Status = constants.Cancelled default: resp.Status = "undefined" } return &resp, nil } func (c *ParticipantHpc) SubmitTask(ctx context.Context, req types.SubmitHpcTaskReq) (*types.CommitHpcTaskResp, error) { reqUrl := c.host + SubmitTaskUrl resp := types.CommitHpcTaskResp{} logx.WithContext(ctx).Infof("提交任务到超算集群, url: %s, req: %+v", reqUrl, req) httpClient := resty.New().R() _, err := httpClient.SetHeaders( map[string]string{ "Content-Type": "application/json", "traceId": result.TraceIDFromContext(ctx), }).SetBody(req). SetResult(&resp). Post(reqUrl) if err != nil { return nil, err } if resp.Code != http.StatusOK { return nil, fmt.Errorf(resp.Msg) } return &resp, nil } func (c *ParticipantHpc) CancelTask(ctx context.Context, jobId string, clusterId string) error { reqUrl := c.host + CancelTaskUrl resp := types.CommonResp{} logx.WithContext(ctx).Infof("取消超算集群任务, url: %s, jobId: %s", reqUrl, jobId) httpClient := resty.New().R() _, err := httpClient.SetHeaders( map[string]string{ "Content-Type": "application/json", "traceId": result.TraceIDFromContext(ctx), }).SetPathParams(map[string]string{ "clusterId": clusterId, "jobId": jobId, }).SetResult(&resp).Delete(reqUrl) if err != nil { return err } if resp.Code != http.StatusOK { return fmt.Errorf(resp.Msg) } return nil } func (c *ParticipantHpc) GetTaskLogs(ctx context.Context, jobId string, clusterId string) (interface{}, error) { logx.WithContext(ctx).Infof("获取超算集群任务日志, url: %s, jobId: %s", JobLogUrl, jobId) if jobId == "" { return nil, fmt.Errorf("jobId is empty") } resp := types.CommonResp{} _, err := c.Request(JobLogUrl, http.MethodGet, func(req *resty.Request) { req.SetHeaders(map[string]string{ "Content-Type": "application/json", "traceId": result.TraceIDFromContext(ctx), }).SetPathParams(map[string]string{ "clusterId": clusterId, "jobId": jobId, }).SetResult(&resp) }) if err != nil { return nil, err } if resp.Code != http.StatusOK { return nil, fmt.Errorf(resp.Msg) } return resp, nil } func (c *ParticipantHpc) GetTaskStatus(ctx context.Context, jobId string, clusterId string) (interface{}, error) { logx.WithContext(ctx).Infof("获取超算集群任务日志, url: %s, jobId: %s", JobLogUrl, jobId) if jobId == "" { return nil, fmt.Errorf("jobId is empty") } resp := types.CommonResp{} _, err := c.Request(JobStatus, http.MethodGet, func(req *resty.Request) { req.SetHeaders(map[string]string{ "Content-Type": "application/json", "traceId": result.TraceIDFromContext(ctx), }).SetPathParams(map[string]string{ "clusterId": clusterId, "jobId": jobId, }).SetResult(&resp) }) if err != nil { return nil, err } if resp.Code != http.StatusOK { return nil, fmt.Errorf(resp.Msg) } return resp, nil } func (c *ParticipantHpc) GetTaskResourceUsage(ctx context.Context, jobId string, clusterId string) (interface{}, error) { logx.WithContext(ctx).Infof("获取超算集群任务资源使用情况, url: %s, jobId: %s", JobResourceUsageUrl, jobId) if jobId == "" { return nil, fmt.Errorf("jobId is empty") } resp := types.CommonResp{} _, err := c.Request(JobResourceUsageUrl, http.MethodGet, func(req *resty.Request) { req.SetHeaders(map[string]string{ "Content-Type": "application/json", "traceId": result.TraceIDFromContext(ctx), }).SetPathParams(map[string]string{ "clusterId": clusterId, "jobId": jobId, }).SetResult(&resp) }) if err != nil { return nil, err } if resp.Code != http.StatusOK { return nil, fmt.Errorf(resp.Msg) } return resp, nil } func (c *ParticipantHpc) GetHpcTaskFailureAnalyze(ctx context.Context, jobId string, clusterId string) (interface{}, error) { logx.WithContext(ctx).Infof("获取超算集群任务失败分析, url: %s, jobId: %s", JobFailureAnalyze, jobId) if jobId == "" { return nil, fmt.Errorf("jobId is empty") } resp := types.CommonResp{} _, err := c.Request(JobFailureAnalyze, http.MethodPost, func(req *resty.Request) { req.SetHeaders(map[string]string{ "Content-Type": "application/json", "traceId": result.TraceIDFromContext(ctx), }).SetBody(map[string]string{ "JobId": jobId, "clusterId": clusterId, "clusterType": "hpc", }).SetResult(&resp) }) if err != nil { return nil, err } if resp.Code != http.StatusOK { return nil, fmt.Errorf(resp.Msg) } return resp, nil }