Browse Source

add hpcAppCluster

Signed-off-by: jagger <cossjie@foxmail.com>
pull/489/head
jagger 5 months ago
parent
commit
583e07bc08
7 changed files with 170 additions and 39 deletions
  1. +7
    -12
      desc/hpc/pcm-hpc.api
  2. +5
    -1
      desc/pcm.api
  3. +24
    -0
      internal/handler/hpc/gethpcappclusterhandler.go
  4. +6
    -0
      internal/handler/routes.go
  5. +85
    -26
      internal/logic/hpc/commithpctasklogic.go
  6. +39
    -0
      internal/logic/hpc/gethpcappclusterlogic.go
  7. +4
    -0
      internal/types/types.go

+ 7
- 12
desc/hpc/pcm-hpc.api View File

@@ -205,22 +205,17 @@ type (
CreateTime string `json:"create_time"` CreateTime string `json:"create_time"`
UpdateTime string `json:"update_time"` UpdateTime string `json:"update_time"`
} }
// App string `json:"app"`
//Backend string `json:"backend" binding:"required,oneof=slurm sugonac"` // 后端类型:slurm/sugonac
//Partition string // 分区/队列名称
//TaskId string `json:"taskId"`
//ClusterId string `json:"clusterId"`
//JobName string `json:"jobName"`
//ScriptContent string `json:"scriptContent"`
//ScriptDir string `json:"scriptDir"`
//Parameters map[string]string `json:"parameters"`
//TimeLimit time.Duration // 作业时间限制

SubmitHpcTaskReq { SubmitHpcTaskReq {
App string `json:"app"` App string `json:"app"`
ClusterId string `json:"clusterId"` ClusterId string `json:"clusterId"`
JobName string `json:"jobName"` JobName string `json:"jobName"`
ScriptContent string `json:"scriptContent"` ScriptContent string `json:"scriptContent"`
Parameters map[string]string `json:"parameters"` Parameters map[string]string `json:"parameters"`
Backend string `json:"backend"`
Backend string `json:"backend"`
} }
)
)

type HpcAppClusterReq {
App string `form:"app"`
}

+ 5
- 1
desc/pcm.api View File

@@ -186,7 +186,7 @@ service pcm {


@doc "同步指定资源规格" @doc "同步指定资源规格"
@handler syncResourceSpecHandler @handler syncResourceSpecHandler
put /core/ai/resourceSpec/sync (SyncResourceReq) returns (CommonResp)
put /core/ai/resourceSpec/sync (SyncResourceReq) returns (ListResult)


@doc "获取指定资源规格详情" @doc "获取指定资源规格详情"
@handler detailResourceSpecHandler @handler detailResourceSpecHandler
@@ -251,6 +251,10 @@ service pcm {
@doc "超算任务日志" @doc "超算任务日志"
@handler getHpcTaskLogHandler @handler getHpcTaskLogHandler
get /hpc/jobLogs/:taskId (HpcTaskLogReq) returns (HpcTaskLogResp) get /hpc/jobLogs/:taskId (HpcTaskLogReq) returns (HpcTaskLogResp)

@doc "查询超算应用支持的集群"
@handler getHpcAppClusterHandler
get /hpc/getHpcAppCluster (HpcAppClusterReq) returns (CommonResp)
} }


//cloud二级接口 //cloud二级接口


+ 24
- 0
internal/handler/hpc/gethpcappclusterhandler.go View File

@@ -0,0 +1,24 @@
package hpc

import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/hpc"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)

func GetHpcAppClusterHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.HpcAppClusterReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}

l := hpc.NewGetHpcAppClusterLogic(r.Context(), svcCtx)
resp, err := l.GetHpcAppCluster(&req)
result.HttpResult(r, w, resp, err)
}
}

+ 6
- 0
internal/handler/routes.go View File

@@ -724,6 +724,12 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/hpc/commitHpcTask", Path: "/hpc/commitHpcTask",
Handler: hpc.CommitHpcTaskHandler(serverCtx), Handler: hpc.CommitHpcTaskHandler(serverCtx),
}, },
{
// 查询超算应用支持的集群
Method: http.MethodGet,
Path: "/hpc/getHpcAppCluster",
Handler: hpc.GetHpcAppClusterHandler(serverCtx),
},
{ {
// 超算查询任务列表 // 超算查询任务列表
Method: http.MethodGet, Method: http.MethodGet,


+ 85
- 26
internal/logic/hpc/commithpctasklogic.go View File

@@ -13,6 +13,7 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"regexp"
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
@@ -85,13 +86,67 @@ func (l *CommitHpcTaskLogic) getClusterInfo(clusterID string) (*types.ClusterInf
return &clusterInfo, &adapterInfo, nil return &clusterInfo, &adapterInfo, nil
} }


// 自定义函数映射
func createFuncMap() template.FuncMap {
return template.FuncMap{
"regexMatch": regexMatch,
"required": required,
"error": errorHandler,
"default": defaultHandler,
}
}
func extractUserError(originalErr error) error {
// 尝试匹配模板引擎返回的错误格式
re := regexp.MustCompile(`error calling \w+: (.*)$`)
matches := re.FindStringSubmatch(originalErr.Error())
if len(matches) > 1 {
return errors.New(matches[1])
}
return originalErr
}

// 正则匹配函数
func regexMatch(pattern string) *regexp.Regexp {
return regexp.MustCompile(pattern)
}

// 必填字段检查
func required(msg string, val interface{}) (interface{}, error) {
if val == nil || val == "" {
return nil, errors.New(msg)
}
return val, nil
}

// 错误处理函数
func errorHandler(msg string) (string, error) {
return "", errors.New(msg)
}

// 默认值处理函数
func defaultHandler(defaultVal interface{}, val interface{}) interface{} {
switch v := val.(type) {
case nil:
return defaultVal
case string:
if v == "" {
return defaultVal
}
case int:
if v == 0 {
return defaultVal
}
// 可根据需要添加其他类型判断
}
return val
}
func (l *CommitHpcTaskLogic) RenderJobScript(templateContent string, req *JobRequest) (string, error) { func (l *CommitHpcTaskLogic) RenderJobScript(templateContent string, req *JobRequest) (string, error) {
// 使用缓存模板 // 使用缓存模板
tmpl, ok := templateCache.Load(templateContent) tmpl, ok := templateCache.Load(templateContent)
if !ok { if !ok {
parsedTmpl, err := template.New("jobScript").Parse(templateContent)
parsedTmpl, err := template.New("slurmTemplate").Funcs(createFuncMap()).Parse(templateContent)
if err != nil { if err != nil {
return "", fmt.Errorf("template parse failed: %w", err)
return "", err
} }
templateCache.Store(templateContent, parsedTmpl) templateCache.Store(templateContent, parsedTmpl)
tmpl = parsedTmpl tmpl = parsedTmpl
@@ -104,7 +159,8 @@ func (l *CommitHpcTaskLogic) RenderJobScript(templateContent string, req *JobReq


var buf strings.Builder var buf strings.Builder
if err := tmpl.(*template.Template).Execute(&buf, params); err != nil { if err := tmpl.(*template.Template).Execute(&buf, params); err != nil {
return "", fmt.Errorf("template render failed: %w", err)
log.Error().Err(err).Msg("模板渲染失败")
return "", extractUserError(err)
} }
return buf.String(), nil return buf.String(), nil
} }
@@ -235,36 +291,39 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
if err != nil { if err != nil {
return nil, err return nil, err
} }
scriptContent := req.ScriptContent
if scriptContent == "" {
// 获取模板
var templateInfo types.HpcAppTemplateInfo
tx := l.svcCtx.DbEngin.Table("hpc_app_template").
Where("cluster_id = ? and app = ? ", req.ClusterId, req.App)
if req.OperateType != "" {
tx.Where("app_type = ?", req.OperateType)
}
if err := tx.First(&templateInfo).Error; err != nil {
return nil, fmt.Errorf("failed to get template: %w", err)
}


// 获取模板
var templateInfo types.HpcAppTemplateInfo
tx := l.svcCtx.DbEngin.Table("hpc_app_template").
Where("cluster_id = ? and app = ? ", req.ClusterId, req.App)
if req.OperateType != "" {
tx.Where("app_type = ?", req.OperateType)
}
if err := tx.First(&templateInfo).Error; err != nil {
return nil, fmt.Errorf("failed to get template: %w", err)
}

// 转换请求参数
jobRequest, err := ConvertToJobRequest(req)
if err != nil {
return nil, fmt.Errorf("invalid job request: %w", err)
}
// 转换请求参数
jobRequest, err := ConvertToJobRequest(req)
if err != nil {
return nil, err
}


// 渲染脚本
script, err := l.RenderJobScript(templateInfo.Content, &jobRequest)
if err != nil {
return nil, fmt.Errorf("script rendering failed: %w", err)
// 渲染脚本
script, err := l.RenderJobScript(templateInfo.Content, &jobRequest)
if err != nil {
return nil, err
}
scriptContent = script
} }


q, _ := jsoniter.MarshalToString(script)
q, _ := jsoniter.MarshalToString(scriptContent)
submitQ := types.SubmitHpcTaskReq{ submitQ := types.SubmitHpcTaskReq{
App: req.App, App: req.App,
ClusterId: req.ClusterId, ClusterId: req.ClusterId,
JobName: jobName, JobName: jobName,
ScriptContent: script,
ScriptContent: scriptContent,
Parameters: req.Parameters, Parameters: req.Parameters,
Backend: req.Backend, Backend: req.Backend,
} }
@@ -276,7 +335,7 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t


jobID := resp.Data.JobInfo["jobId"] jobID := resp.Data.JobInfo["jobId"]
workDir := resp.Data.JobInfo["jobDir"] workDir := resp.Data.JobInfo["jobDir"]
taskID, err := l.SaveHpcTaskToDB(req, script, jobID, workDir)
taskID, err := l.SaveHpcTaskToDB(req, scriptContent, jobID, workDir)
if err != nil { if err != nil {
log.Error().Msgf("Failed to save task to DB: %v", err) log.Error().Msgf("Failed to save task to DB: %v", err)
return nil, fmt.Errorf("db save failed: %w", err) return nil, fmt.Errorf("db save failed: %w", err)


+ 39
- 0
internal/logic/hpc/gethpcappclusterlogic.go View File

@@ -0,0 +1,39 @@
package hpc

import (
"context"
"github.com/rs/zerolog/log"

"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"

"github.com/zeromicro/go-zero/core/logx"
)

type GetHpcAppClusterLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}

func NewGetHpcAppClusterLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetHpcAppClusterLogic {
return &GetHpcAppClusterLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}

func (l *GetHpcAppClusterLogic) GetHpcAppCluster(req *types.HpcAppClusterReq) (resp *types.ListResult, err error) {
resp = &types.ListResult{}
var clusterIds []string
err = l.svcCtx.DbEngin.Table("hpc_app_template").Distinct("cluster_id").
Where(" app = ? and status = 1 and deleted_at is null", req.App).
Find(&clusterIds).Error
if err != nil {
log.Error().Msgf("GetHpcAppCluster err:%v", err)
return nil, err
}
resp.List = clusterIds
return
}

+ 4
- 0
internal/types/types.go View File

@@ -2786,6 +2786,10 @@ type Hooks struct {
ContainerHooksResp ContainerHooksResp `json:"containerHooks,omitempty" copier:"ContainerHooksResp"` // * ContainerHooksResp ContainerHooksResp `json:"containerHooks,omitempty" copier:"ContainerHooksResp"` // *
} }


type HpcAppClusterReq struct {
App string `form:"app"`
}

type HpcAppTemplateInfo struct { type HpcAppTemplateInfo struct {
Id int64 `json:"id"` Id int64 `json:"id"`
Name string `json:"name"` Name string `json:"name"`


Loading…
Cancel
Save