| @@ -94,4 +94,7 @@ JcsMiddleware: | |||||
| Participant: | Participant: | ||||
| AdapterId: "1777144940456666666" | AdapterId: "1777144940456666666" | ||||
| CloudAdapterId: "1770658294298316800" | |||||
| CloudAdapterId: "1770658294298316800" | |||||
| JccUserService: | |||||
| Url: http://jcce-admin:8082/jcc-admin/admin/user/{id} | |||||
| @@ -1,11 +1,12 @@ | |||||
| package cloud | package cloud | ||||
| import ( | import ( | ||||
| "net/http" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | ||||
| "k8s.io/apimachinery/pkg/util/json" | "k8s.io/apimachinery/pkg/util/json" | ||||
| "net/http" | |||||
| "github.com/zeromicro/go-zero/rest/httpx" | "github.com/zeromicro/go-zero/rest/httpx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/cloud" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/cloud" | ||||
| @@ -32,6 +33,7 @@ func CommitGeneralTaskHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { | |||||
| user := &models.JccUserInfo{} | user := &models.JccUserInfo{} | ||||
| json.Unmarshal([]byte(userStr), user) | json.Unmarshal([]byte(userStr), user) | ||||
| req.UserId = user.Id | req.UserId = user.Id | ||||
| req.UserName = user.UserName | |||||
| l := cloud.NewCommitGeneralTaskLogic(r.Context(), svcCtx) | l := cloud.NewCommitGeneralTaskLogic(r.Context(), svcCtx) | ||||
| resp, err := l.CommitGeneralTask(&req) | resp, err := l.CommitGeneralTask(&req) | ||||
| result.HttpResult(r, w, resp, err) | result.HttpResult(r, w, resp, err) | ||||
| @@ -1,12 +1,13 @@ | |||||
| package cloud | package cloud | ||||
| import ( | import ( | ||||
| "io" | |||||
| "net/http" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/cloud" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/cloud" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | ||||
| "io" | |||||
| "k8s.io/apimachinery/pkg/util/json" | "k8s.io/apimachinery/pkg/util/json" | ||||
| "net/http" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| container "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types/cloud" | container "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types/cloud" | ||||
| @@ -30,6 +31,7 @@ func ContainerCreateHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { | |||||
| user := &models.JccUserInfo{} | user := &models.JccUserInfo{} | ||||
| json.Unmarshal([]byte(userStr), user) | json.Unmarshal([]byte(userStr), user) | ||||
| req.UserId = user.Id | req.UserId = user.Id | ||||
| req.UserName = user.UserName | |||||
| l := cloud.NewContainerCreateLogic(r.Context(), svcCtx) | l := cloud.NewContainerCreateLogic(r.Context(), svcCtx) | ||||
| resp, err := l.ContainerCreate(&req) | resp, err := l.ContainerCreate(&req) | ||||
| result.HttpResult(r, w, resp, err) | result.HttpResult(r, w, resp, err) | ||||
| @@ -1,14 +1,16 @@ | |||||
| package hpc | package hpc | ||||
| import ( | import ( | ||||
| "net/http" | |||||
| "strconv" | |||||
| "github.com/zeromicro/go-zero/rest/httpx" | "github.com/zeromicro/go-zero/rest/httpx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/hpc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/hpc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | ||||
| "net/http" | |||||
| "strconv" | |||||
| ) | ) | ||||
| func CommitHpcTaskHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { | func CommitHpcTaskHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { | ||||
| @@ -30,7 +32,8 @@ func CommitHpcTaskHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { | |||||
| result.ParamErrorResult(r, w, err) | result.ParamErrorResult(r, w, err) | ||||
| return | return | ||||
| } | } | ||||
| req.Parameters["UserId"] = strconv.FormatInt(jccUserInfo.Id, 10) | |||||
| req.Parameters[constants.UserId] = strconv.FormatInt(jccUserInfo.Id, 10) | |||||
| req.Parameters[constants.UserName] = jccUserInfo.UserName | |||||
| l := hpc.NewCommitHpcTaskLogic(r.Context(), svcCtx) | l := hpc.NewCommitHpcTaskLogic(r.Context(), svcCtx) | ||||
| resp, err := l.CommitHpcTask(&req) | resp, err := l.CommitHpcTask(&req) | ||||
| @@ -16,6 +16,7 @@ import ( | |||||
| storage "gitlink.org.cn/JointCloud/pcm-coordinator/internal/handler/storage" | storage "gitlink.org.cn/JointCloud/pcm-coordinator/internal/handler/storage" | ||||
| storelink "gitlink.org.cn/JointCloud/pcm-coordinator/internal/handler/storelink" | storelink "gitlink.org.cn/JointCloud/pcm-coordinator/internal/handler/storelink" | ||||
| vm "gitlink.org.cn/JointCloud/pcm-coordinator/internal/handler/vm" | vm "gitlink.org.cn/JointCloud/pcm-coordinator/internal/handler/vm" | ||||
| xjlab "gitlink.org.cn/JointCloud/pcm-coordinator/internal/handler/xjlab" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "github.com/zeromicro/go-zero/rest" | "github.com/zeromicro/go-zero/rest" | ||||
| @@ -1735,4 +1736,28 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { | |||||
| }, | }, | ||||
| rest.WithPrefix("/pcm/v1"), | rest.WithPrefix("/pcm/v1"), | ||||
| ) | ) | ||||
| server.AddRoutes( | |||||
| []rest.Route{ | |||||
| { | |||||
| // 查询任务列表 | |||||
| Method: http.MethodGet, | |||||
| Path: "/xjlab/taskList", | |||||
| Handler: xjlab.TaskListHandler(serverCtx), | |||||
| }, | |||||
| { | |||||
| // 查询指定任务资源使用情况 | |||||
| Method: http.MethodGet, | |||||
| Path: "/xjlab/taskResourceUsage", | |||||
| Handler: xjlab.TaskResourceUsageHandler(serverCtx), | |||||
| }, | |||||
| { | |||||
| //任务状态监控 | |||||
| Method: http.MethodGet, | |||||
| Path: "/xjlab/taskStatusStatistics", | |||||
| Handler: xjlab.TaskStatusStatisticsHandler(serverCtx), | |||||
| }, | |||||
| }, | |||||
| rest.WithPrefix("/pcm/v1"), | |||||
| ) | |||||
| } | } | ||||
| @@ -1,13 +1,14 @@ | |||||
| package schedule | package schedule | ||||
| import ( | import ( | ||||
| "net/http" | |||||
| "github.com/zeromicro/go-zero/rest/httpx" | "github.com/zeromicro/go-zero/rest/httpx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/schedule" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/schedule" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | ||||
| "net/http" | |||||
| ) | ) | ||||
| func ScheduleCreateTaskHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { | func ScheduleCreateTaskHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { | ||||
| @@ -29,6 +30,7 @@ func ScheduleCreateTaskHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { | |||||
| return | return | ||||
| } | } | ||||
| req.UserId = jccUserInfo.Id | req.UserId = jccUserInfo.Id | ||||
| req.UserName = jccUserInfo.UserName | |||||
| l := schedule.NewScheduleCreateTaskLogic(r.Context(), svcCtx) | l := schedule.NewScheduleCreateTaskLogic(r.Context(), svcCtx) | ||||
| resp, err := l.ScheduleCreateTask(&req) | resp, err := l.ScheduleCreateTask(&req) | ||||
| result.HttpResult(r, w, resp, err) | result.HttpResult(r, w, resp, err) | ||||
| @@ -0,0 +1,64 @@ | |||||
| package xjlab | |||||
| import ( | |||||
| "net/http" | |||||
| "github.com/zeromicro/go-zero/rest/httpx" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/xjlab" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | |||||
| ) | |||||
| func TaskListHandler(ctx *svc.ServiceContext) http.HandlerFunc { | |||||
| return func(w http.ResponseWriter, r *http.Request) { | |||||
| var req types.XJLABTaskReq | |||||
| if err := httpx.Parse(r, &req); err != nil { | |||||
| result.ParamErrorResult(r, w, err) | |||||
| return | |||||
| } | |||||
| token := r.Header.Get("Authorization") | |||||
| // 获取用户信息 | |||||
| jccUserInfo, err := utils.ParseTokenWithoutVerify(token) | |||||
| if err != nil { | |||||
| result.ParamErrorResult(r, w, err) | |||||
| return | |||||
| } | |||||
| req.UserId = jccUserInfo.Id | |||||
| if req.UserName == "" { | |||||
| req.UserName = jccUserInfo.UserName | |||||
| } | |||||
| l := xjlab.NewPageListTaskLogic(r.Context(), ctx) | |||||
| resp, err := l.PageListTask(&req) | |||||
| result.HttpResult(r, w, resp, err) | |||||
| } | |||||
| } | |||||
| func TaskResourceUsageHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { | |||||
| return func(w http.ResponseWriter, r *http.Request) { | |||||
| var req types.FId | |||||
| if err := httpx.Parse(r, &req); err != nil { | |||||
| result.ParamErrorResult(r, w, err) | |||||
| return | |||||
| } | |||||
| l := xjlab.NewTaskResourceUsageLogic(r.Context(), svcCtx) | |||||
| resp, err := l.TaskResourceUsage(&req) | |||||
| result.HttpResult(r, w, resp, err) | |||||
| } | |||||
| } | |||||
| func TaskStatusStatisticsHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { | |||||
| return func(w http.ResponseWriter, r *http.Request) { | |||||
| var req types.XJLABCommonReq | |||||
| if err := httpx.Parse(r, &req); err != nil { | |||||
| result.ParamErrorResult(r, w, err) | |||||
| return | |||||
| } | |||||
| l := xjlab.NewTaskStatusStatisticsLogic(r.Context(), svcCtx) | |||||
| resp, err := l.GetSimpleTaskStatistics(&req) | |||||
| result.HttpResult(r, w, resp, err) | |||||
| } | |||||
| } | |||||
| @@ -3,6 +3,11 @@ package cloud | |||||
| import ( | import ( | ||||
| "bytes" | "bytes" | ||||
| "context" | "context" | ||||
| "io" | |||||
| "strconv" | |||||
| "strings" | |||||
| "time" | |||||
| "github.com/pkg/errors" | "github.com/pkg/errors" | ||||
| clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client" | clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers" | ||||
| @@ -13,15 +18,11 @@ import ( | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models/cloud" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models/cloud" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils/remoteUtil" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils/remoteUtil" | ||||
| "io" | |||||
| "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" | "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" | ||||
| "k8s.io/apimachinery/pkg/runtime" | "k8s.io/apimachinery/pkg/runtime" | ||||
| syaml "k8s.io/apimachinery/pkg/runtime/serializer/yaml" | syaml "k8s.io/apimachinery/pkg/runtime/serializer/yaml" | ||||
| "k8s.io/apimachinery/pkg/util/json" | "k8s.io/apimachinery/pkg/util/json" | ||||
| kyaml "k8s.io/apimachinery/pkg/util/yaml" | kyaml "k8s.io/apimachinery/pkg/util/yaml" | ||||
| "strconv" | |||||
| "strings" | |||||
| "time" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| @@ -102,6 +103,7 @@ func (l *CommitGeneralTaskLogic) CommitGeneralTask(req *types.GeneralTaskReq) (r | |||||
| SynergyStatus: synergyStatus, | SynergyStatus: synergyStatus, | ||||
| Strategy: strategy, | Strategy: strategy, | ||||
| UserId: req.UserId, | UserId: req.UserId, | ||||
| UserName: req.UserName, | |||||
| } | } | ||||
| resp.TaskId = taskModel.Id | resp.TaskId = taskModel.Id | ||||
| var taskClouds []cloud.TaskCloudModel | var taskClouds []cloud.TaskCloudModel | ||||
| @@ -18,6 +18,10 @@ import ( | |||||
| "context" | "context" | ||||
| "errors" | "errors" | ||||
| "fmt" | "fmt" | ||||
| "net/http" | |||||
| "strconv" | |||||
| "time" | |||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/participant/cloud" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/participant/cloud" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| @@ -26,9 +30,6 @@ import ( | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | ||||
| cloud2 "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models/cloud" | cloud2 "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models/cloud" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | ||||
| "net/http" | |||||
| "strconv" | |||||
| "time" | |||||
| ) | ) | ||||
| type ContainerCreateLogic struct { | type ContainerCreateLogic struct { | ||||
| @@ -79,6 +80,7 @@ func (l *ContainerCreateLogic) ContainerCreate(req *container.CreateParam) (resp | |||||
| Description: req.Description, | Description: req.Description, | ||||
| Name: req.Name, | Name: req.Name, | ||||
| UserId: req.UserId, | UserId: req.UserId, | ||||
| UserName: req.UserName, | |||||
| AdapterTypeDict: "0", | AdapterTypeDict: "0", | ||||
| CommitTime: time.Now(), | CommitTime: time.Now(), | ||||
| } | } | ||||
| @@ -3,6 +3,13 @@ package hpc | |||||
| import ( | import ( | ||||
| "context" | "context" | ||||
| "fmt" | "fmt" | ||||
| "regexp" | |||||
| "strconv" | |||||
| "strings" | |||||
| "sync" | |||||
| "text/template" | |||||
| "time" | |||||
| jsoniter "github.com/json-iterator/go" | jsoniter "github.com/json-iterator/go" | ||||
| "github.com/pkg/errors" | "github.com/pkg/errors" | ||||
| "github.com/rs/zerolog/log" | "github.com/rs/zerolog/log" | ||||
| @@ -12,14 +19,9 @@ import ( | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | ||||
| "regexp" | |||||
| "strconv" | |||||
| "strings" | |||||
| "sync" | |||||
| "text/template" | |||||
| "time" | |||||
| ) | ) | ||||
| type CommitHpcTaskLogic struct { | type CommitHpcTaskLogic struct { | ||||
| @@ -29,12 +31,6 @@ type CommitHpcTaskLogic struct { | |||||
| hpcService *service.HpcService | hpcService *service.HpcService | ||||
| } | } | ||||
| const ( | |||||
| statusSaved = "Saved" | |||||
| statusDeploying = "Deploying" | |||||
| adapterTypeHPC = "2" | |||||
| ) | |||||
| type JobRequest struct { | type JobRequest struct { | ||||
| App string `json:"app"` | App string `json:"app"` | ||||
| Common CommonParams `json:"common"` | Common CommonParams `json:"common"` | ||||
| @@ -201,16 +197,18 @@ func (l *CommitHpcTaskLogic) SaveHpcTaskToDB(req *types.CommitHpcTaskReq, jobScr | |||||
| } | } | ||||
| }() | }() | ||||
| userID, _ := strconv.ParseInt(req.Parameters["UserId"], 10, 64) | |||||
| userID, _ := strconv.ParseInt(req.Parameters[constants.UserId], 10, 64) | |||||
| taskID := utils.GenSnowflakeID() | taskID := utils.GenSnowflakeID() | ||||
| taskModel := models.Task{ | taskModel := models.Task{ | ||||
| Id: taskID, | Id: taskID, | ||||
| Name: req.Name, | Name: req.Name, | ||||
| Description: req.Description, | Description: req.Description, | ||||
| CommitTime: time.Now(), | CommitTime: time.Now(), | ||||
| Status: statusSaved, | |||||
| AdapterTypeDict: adapterTypeHPC, | |||||
| Status: constants.StatusSaved, | |||||
| AdapterTypeDict: constants.AdapterTypeHPC, | |||||
| UserId: userID, | UserId: userID, | ||||
| UserName: req.Parameters[constants.UserName], | |||||
| } | } | ||||
| if err = tx.Table("task").Create(&taskModel).Error; err != nil { | if err = tx.Table("task").Create(&taskModel).Error; err != nil { | ||||
| @@ -226,36 +224,48 @@ func (l *CommitHpcTaskLogic) SaveHpcTaskToDB(req *types.CommitHpcTaskReq, jobScr | |||||
| if err != nil { | if err != nil { | ||||
| return "", fmt.Errorf("failed to marshal parameters: %w", err) | return "", fmt.Errorf("failed to marshal parameters: %w", err) | ||||
| } | } | ||||
| //解析slurm脚本内容 | |||||
| var resource models.ResourceSpec | |||||
| if req.Backend == string(constants.HPC_SYSTEM_SLURM) { | |||||
| parser := utils.NewSlurmParser() | |||||
| slurmResource := parser.ParseScript(jobScript) | |||||
| resource = models.ResourceSpec{ | |||||
| //资源规格名称,采用拼接的方式 集群名+队列名 | |||||
| ResourceName: fmt.Sprintf("%s_%s", clusterInfo.Name, slurmResource.Partition), | |||||
| Partition: slurmResource.Partition, | |||||
| Specifications: slurmResource, | |||||
| } | |||||
| } | |||||
| clusterID := utils.StringToInt64(clusterInfo.Id) | clusterID := utils.StringToInt64(clusterInfo.Id) | ||||
| hpcTask := models.TaskHpc{ | hpcTask := models.TaskHpc{ | ||||
| Id: utils.GenSnowflakeID(), | |||||
| TaskId: taskID, | |||||
| AdapterId: clusterInfo.AdapterId, | |||||
| AdapterName: adapterInfo.Name, | |||||
| ClusterId: clusterID, | |||||
| ClusterName: clusterInfo.Name, | |||||
| Name: taskModel.Name, | |||||
| Backend: req.Backend, | |||||
| OperateType: req.OperateType, | |||||
| CmdScript: req.Parameters["cmdScript"], | |||||
| WallTime: req.Parameters["wallTime"], | |||||
| AppType: req.Parameters["appType"], | |||||
| AppName: req.App, | |||||
| Queue: req.Parameters["queue"], | |||||
| SubmitType: req.Parameters["submitType"], | |||||
| NNode: req.Parameters["nNode"], | |||||
| Account: clusterInfo.Username, | |||||
| StdInput: req.Parameters["stdInput"], | |||||
| Partition: req.Parameters["partition"], | |||||
| CreatedTime: time.Now(), | |||||
| UpdatedTime: time.Now(), | |||||
| Status: statusDeploying, | |||||
| UserId: userID, | |||||
| Params: paramsJSON, | |||||
| Script: jobScript, | |||||
| JobId: jobId, | |||||
| WorkDir: workDir, | |||||
| Id: utils.GenSnowflakeID(), | |||||
| TaskId: taskID, | |||||
| AdapterId: clusterInfo.AdapterId, | |||||
| AdapterName: adapterInfo.Name, | |||||
| ClusterId: clusterID, | |||||
| ClusterName: clusterInfo.Name, | |||||
| Name: taskModel.Name, | |||||
| Backend: req.Backend, | |||||
| OperateType: req.OperateType, | |||||
| CmdScript: req.Parameters["cmdScript"], | |||||
| WallTime: req.Parameters["wallTime"], | |||||
| AppType: req.Parameters["appType"], | |||||
| AppName: req.App, | |||||
| Queue: req.Parameters["queue"], | |||||
| SubmitType: req.Parameters["submitType"], | |||||
| NNode: req.Parameters["nNode"], | |||||
| Account: clusterInfo.Username, | |||||
| StdInput: req.Parameters["stdInput"], | |||||
| Partition: req.Parameters["partition"], | |||||
| CreatedTime: time.Now(), | |||||
| UpdatedTime: time.Now(), | |||||
| Status: constants.StatusDeploying, | |||||
| UserId: userID, | |||||
| Params: paramsJSON, | |||||
| Script: jobScript, | |||||
| JobId: jobId, | |||||
| WorkDir: workDir, | |||||
| ResourceSpec: resource, | |||||
| } | } | ||||
| if err = tx.Table("task_hpc").Create(&hpcTask).Error; err != nil { | if err = tx.Table("task_hpc").Create(&hpcTask).Error; err != nil { | ||||
| @@ -3,6 +3,10 @@ package schedule | |||||
| import ( | import ( | ||||
| "context" | "context" | ||||
| "fmt" | "fmt" | ||||
| "slices" | |||||
| "strings" | |||||
| "time" | |||||
| "github.com/pkg/errors" | "github.com/pkg/errors" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/common" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/common" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector" | ||||
| @@ -12,9 +16,6 @@ import ( | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "gopkg.in/yaml.v3" | "gopkg.in/yaml.v3" | ||||
| "slices" | |||||
| "strings" | |||||
| "time" | |||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| ) | ) | ||||
| @@ -167,7 +168,7 @@ func (l *ScheduleCreateTaskLogic) ScheduleCreateTask(req *types.CreateTaskReq) ( | |||||
| // filter data distribution | // filter data distribution | ||||
| clustersWithDataDistributes := generateFilteredDataDistributes(assignedClusters, req.DataDistributes) | clustersWithDataDistributes := generateFilteredDataDistributes(assignedClusters, req.DataDistributes) | ||||
| taskId, err := l.createTask(taskName, req.Description, req.UserId, req.JobResources.ScheduleStrategy, clustersWithDataDistributes, req.Token, req.UserIp) | |||||
| taskId, err := l.createTask(taskName, req.Description, req.UserId, req.JobResources.ScheduleStrategy, clustersWithDataDistributes, req.Token, req.UserIp, req.UserName) | |||||
| if err != nil { | if err != nil { | ||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| @@ -198,7 +199,7 @@ func (l *ScheduleCreateTaskLogic) ScheduleCreateTask(req *types.CreateTaskReq) ( | |||||
| // filter data distribution | // filter data distribution | ||||
| clustersWithDataDistributes := generateFilteredDataDistributes(assignedClusters, req.DataDistributes) | clustersWithDataDistributes := generateFilteredDataDistributes(assignedClusters, req.DataDistributes) | ||||
| taskId, err := l.createTask(taskName, req.Description, req.UserId, req.JobResources.ScheduleStrategy, clustersWithDataDistributes, req.Token, req.UserIp) | |||||
| taskId, err := l.createTask(taskName, req.Description, req.UserId, req.JobResources.ScheduleStrategy, clustersWithDataDistributes, req.Token, req.UserIp, req.UserName) | |||||
| if err != nil { | if err != nil { | ||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| @@ -262,7 +263,7 @@ func (l *ScheduleCreateTaskLogic) getAssignedClustersByStrategy(resources *types | |||||
| return assignedClusters, nil | return assignedClusters, nil | ||||
| } | } | ||||
| func (l *ScheduleCreateTaskLogic) createTask(taskName string, desc string, userId int64, strategyName string, clustersWithDataDistributes *ClustersWithDataDistributes, token string, userIp string) (int64, error) { | |||||
| func (l *ScheduleCreateTaskLogic) createTask(taskName string, desc string, userId int64, strategyName string, clustersWithDataDistributes *ClustersWithDataDistributes, token string, userIp string, userName string) (int64, error) { | |||||
| var synergyStatus int64 | var synergyStatus int64 | ||||
| if len(clustersWithDataDistributes.Clusters) > 1 { | if len(clustersWithDataDistributes.Clusters) > 1 { | ||||
| synergyStatus = 1 | synergyStatus = 1 | ||||
| @@ -273,7 +274,7 @@ func (l *ScheduleCreateTaskLogic) createTask(taskName string, desc string, userI | |||||
| fmt.Printf("Error while Marshaling. %v", err) | fmt.Printf("Error while Marshaling. %v", err) | ||||
| } | } | ||||
| taskId, err := l.svcCtx.Scheduler.CreateTask(taskName, desc, userId, synergyStatus, strategyName, string(y), token, userIp, &l.svcCtx.Config) | |||||
| taskId, err := l.svcCtx.Scheduler.CreateTask(taskName, desc, userId, synergyStatus, strategyName, string(y), token, userIp, &l.svcCtx.Config, userName) | |||||
| if err != nil { | if err != nil { | ||||
| return 0, err | return 0, err | ||||
| } | } | ||||
| @@ -2,14 +2,15 @@ package schedule | |||||
| import ( | import ( | ||||
| "context" | "context" | ||||
| "strconv" | |||||
| "strings" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/executor" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/executor" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | ||||
| "strconv" | |||||
| "strings" | |||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| ) | ) | ||||
| @@ -65,7 +66,7 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type | |||||
| synergystatus = 1 | synergystatus = 1 | ||||
| } | } | ||||
| taskId, err := l.svcCtx.Scheduler.CreateTask(req.AiOption.TaskName, "", 0, synergystatus, req.AiOption.Strategy, "", req.Token, "", &l.svcCtx.Config) | |||||
| taskId, err := l.svcCtx.Scheduler.CreateTask(req.AiOption.TaskName, "", 0, synergystatus, req.AiOption.Strategy, "", req.Token, "", &l.svcCtx.Config, "") | |||||
| if err != nil { | if err != nil { | ||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| @@ -0,0 +1,275 @@ | |||||
| package xjlab | |||||
| import ( | |||||
| "context" | |||||
| "time" | |||||
| jsoniter "github.com/json-iterator/go" | |||||
| "github.com/zeromicro/go-zero/core/logx" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils/timeutils" | |||||
| "gorm.io/gorm" | |||||
| ) | |||||
| type PageListTaskLogic struct { | |||||
| logx.Logger | |||||
| ctx context.Context | |||||
| svcCtx *svc.ServiceContext | |||||
| } | |||||
| func NewPageListTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *PageListTaskLogic { | |||||
| return &PageListTaskLogic{ | |||||
| Logger: logx.WithContext(ctx), | |||||
| ctx: ctx, | |||||
| svcCtx: svcCtx, | |||||
| } | |||||
| } | |||||
| type TaskResp struct { | |||||
| Id int64 `json:"id,omitempty,string" db:"id"` // id | |||||
| Name string `json:"name,omitempty" db:"name"` // 作业名称 | |||||
| Description string `json:"description,omitempty" db:"description"` // 作业描述 | |||||
| Status string `json:"status,omitempty" db:"status"` // 作业状态 | |||||
| Strategy int64 `json:"strategy" db:"strategy"` // 策略 | |||||
| SynergyStatus int64 `json:"synergyStatus" db:"synergy_status"` // 协同状态(0-未协同、1-已协同) | |||||
| CommitTime string `json:"commitTime,omitempty" db:"commit_time"` // 提交时间 | |||||
| StartTime string `json:"startTime,omitempty" db:"start_time"` // 开始时间 | |||||
| EndTime string `json:"endTime,omitempty" db:"end_time"` // 结束运行时间 | |||||
| RunningTime int64 `json:"runningTime" db:"running_time"` // 已运行时间(单位秒) | |||||
| YamlString string `json:"yamlString,omitempty" db:"yaml_string"` | |||||
| Result string `json:"result,omitempty" db:"result"` // 作业结果 | |||||
| DeletedAt string `json:"deletedAt,omitempty" gorm:"index" db:"deleted_at"` | |||||
| NsID string `json:"nsId,omitempty" db:"ns_id"` | |||||
| TenantId string `json:"tenantId,omitempty" db:"tenant_id"` | |||||
| CreatedTime string `json:"createdTime,omitempty" db:"created_time" gorm:"autoCreateTime"` | |||||
| UpdatedTime string `json:"updatedTime,omitempty" db:"updated_time"` | |||||
| AdapterTypeDict string `json:"adapterTypeDict" db:"adapter_type_dict" gorm:"adapter_type_dict"` //适配器类型(对应字典表的值 | |||||
| TaskTypeDict string `json:"taskTypeDict" db:"task_type_dict" gorm:"task_type_dict"` //任务类型(对应字典表的值 | |||||
| UserId int64 `json:"userId,omitempty" db:"user_id"` | |||||
| UserName string `json:"userName,omitempty" db:"user_name"` | |||||
| ClusterId string `json:"clusterId,omitempty" db:"cluster_id"` | |||||
| ClusterName string `json:"clusterName,omitempty" db:"cluster_name"` | |||||
| ResourceSpec string `json:"resourceSpec,omitempty" db:"resource_spec"` | |||||
| Card string `json:"card,omitempty" db:"card"` | |||||
| } | |||||
| // clusterInfo 集群信息结构体 | |||||
| type clusterInfo struct { | |||||
| ClusterId string `json:"cluster_id"` | |||||
| ClusterName string `json:"cluster_name"` | |||||
| ResourceSpec string `json:"resource_spec"` | |||||
| Card string `json:"card"` | |||||
| } | |||||
| func (l *PageListTaskLogic) PageListTask(req *types.XJLABTaskReq) (*types.PageResult, error) { | |||||
| // 验证请求参数 | |||||
| if err := l.validateRequest(req); err != nil { | |||||
| return nil, err | |||||
| } | |||||
| // 查询任务总数 | |||||
| total, err := l.getTaskCount(req) | |||||
| if err != nil { | |||||
| return nil, err | |||||
| } | |||||
| // 查询任务列表 | |||||
| tasks, err := l.getTaskList(req) | |||||
| if err != nil { | |||||
| return nil, err | |||||
| } | |||||
| // 异步更新任务状态 | |||||
| l.updateTaskStatusAsync(tasks) | |||||
| // 处理任务响应数据 | |||||
| taskResps := l.processTaskResponses(tasks) | |||||
| // 构建分页结果 | |||||
| return &types.PageResult{ | |||||
| List: &taskResps, | |||||
| PageSize: req.PageSize, | |||||
| PageNum: req.PageNum, | |||||
| Total: total, | |||||
| }, nil | |||||
| } | |||||
| // validateRequest 验证请求参数 | |||||
| func (l *PageListTaskLogic) validateRequest(req *types.XJLABTaskReq) error { | |||||
| if req.PageSize <= 0 || req.PageNum <= 0 { | |||||
| return result.NewDefaultError("Invalid page size or page number") | |||||
| } | |||||
| return nil | |||||
| } | |||||
| // buildBaseQuery 构建基础查询条件 | |||||
| func (l *PageListTaskLogic) buildBaseQuery(req *types.XJLABTaskReq) *gorm.DB { | |||||
| db := l.svcCtx.DbEngin.Model(&types.TaskModel{}).Table("task").Where("deleted_at is null") | |||||
| // 用户权限过滤 | |||||
| if req.UserName != "" && req.UserName != "admin" { | |||||
| db = db.Where("user_name = ?", req.UserName) | |||||
| } | |||||
| // 任务名称模糊查询 | |||||
| if req.Name != "" { | |||||
| db = db.Where("name LIKE ?", "%"+req.Name+"%") | |||||
| } | |||||
| // 计算类型筛选 | |||||
| if req.AdapterTypeDict != "" { | |||||
| db = db.Where("adapter_type_dict = ?", req.AdapterTypeDict) | |||||
| } | |||||
| // 任务状态筛选 | |||||
| if req.Status != "" { | |||||
| db = db.Where("status = ?", req.Status) | |||||
| } | |||||
| // 时间范围筛选 | |||||
| if req.StartTime != "" && req.EndTime != "" { | |||||
| db = db.Where("created_time BETWEEN ? AND ?", req.StartTime, req.EndTime) | |||||
| } | |||||
| return db | |||||
| } | |||||
| // getTaskCount 获取任务总数 | |||||
| func (l *PageListTaskLogic) getTaskCount(req *types.XJLABTaskReq) (int64, error) { | |||||
| var total int64 | |||||
| db := l.buildBaseQuery(req) | |||||
| if err := db.Count(&total).Error; err != nil { | |||||
| return 0, result.NewDefaultError(err.Error()) | |||||
| } | |||||
| return total, nil | |||||
| } | |||||
| // getTaskList 获取任务列表 | |||||
| func (l *PageListTaskLogic) getTaskList(req *types.XJLABTaskReq) ([]*types.TaskModel, error) { | |||||
| var list []*types.TaskModel | |||||
| limit := req.PageSize | |||||
| offset := req.PageSize * (req.PageNum - 1) | |||||
| db := l.buildBaseQuery(req) | |||||
| if err := db.Limit(limit).Offset(offset).Order("created_time desc").Find(&list).Error; err != nil { | |||||
| return nil, result.NewDefaultError(err.Error()) | |||||
| } | |||||
| return list, nil | |||||
| } | |||||
| // updateTaskStatusAsync 异步更新任务状态 | |||||
| func (l *PageListTaskLogic) updateTaskStatusAsync(tasks []*types.TaskModel) { | |||||
| go l.svcCtx.Scheduler.AiService.St.UpdateTaskStatus(tasks) | |||||
| go l.svcCtx.Scheduler.AiService.St.UpdateAiTaskStatus(tasks) | |||||
| } | |||||
| // processTaskResponses 处理任务响应数据 | |||||
| func (l *PageListTaskLogic) processTaskResponses(tasks []*types.TaskModel) []*TaskResp { | |||||
| taskResps := make([]*TaskResp, 0, len(tasks)) | |||||
| for _, model := range tasks { | |||||
| // 计算运行时间 | |||||
| model.RunningTime = l.calculateRunningTime(model.StartTime, model.EndTime) | |||||
| // 转换为响应结构体 | |||||
| taskResp, err := l.convertToTaskResp(model) | |||||
| if err != nil { | |||||
| l.Errorf("Failed to convert task model: %v", err) | |||||
| continue | |||||
| } | |||||
| // 丰富任务详情 | |||||
| enrichedTask := l.enrichTaskDetails(taskResp) | |||||
| taskResps = append(taskResps, &enrichedTask) | |||||
| } | |||||
| return taskResps | |||||
| } | |||||
| // convertToTaskResp 将TaskModel转换为TaskResp | |||||
| func (l *PageListTaskLogic) convertToTaskResp(model *types.TaskModel) (*TaskResp, error) { | |||||
| jsonData, err := jsoniter.Marshal(model) | |||||
| if err != nil { | |||||
| return nil, err | |||||
| } | |||||
| var taskResp TaskResp | |||||
| if err := jsoniter.Unmarshal(jsonData, &taskResp); err != nil { | |||||
| return nil, err | |||||
| } | |||||
| return &taskResp, nil | |||||
| } | |||||
| // calculateRunningTime 计算任务的运行时间 | |||||
| func (l *PageListTaskLogic) calculateRunningTime(startTimeStr, endTimeStr string) int64 { | |||||
| if startTimeStr == "" { | |||||
| return 0 | |||||
| } | |||||
| startTime := timeutils.TimeStringToGoTime(startTimeStr) | |||||
| // 如果没有结束时间,计算到当前时间 | |||||
| if endTimeStr == "" { | |||||
| return int64(time.Since(startTime).Seconds()) | |||||
| } | |||||
| endTime := timeutils.TimeStringToGoTime(endTimeStr) | |||||
| return int64(endTime.Sub(startTime).Seconds()) | |||||
| } | |||||
| // enrichTaskDetails 丰富任务详情 | |||||
| func (l *PageListTaskLogic) enrichTaskDetails(task *TaskResp) TaskResp { | |||||
| cluster := l.getClusterInfo(task.Id, task.AdapterTypeDict) | |||||
| task.ClusterId = cluster.ClusterId | |||||
| task.ClusterName = cluster.ClusterName | |||||
| task.ResourceSpec = cluster.ResourceSpec | |||||
| task.Card = cluster.Card | |||||
| return *task | |||||
| } | |||||
| // getClusterInfo 根据适配器类型获取集群信息 | |||||
| func (l *PageListTaskLogic) getClusterInfo(taskId int64, adapterType string) clusterInfo { | |||||
| var cluster clusterInfo | |||||
| switch adapterType { | |||||
| case constants.AdapterTypeCloud: | |||||
| // 云计算任务 | |||||
| l.svcCtx.DbEngin.Table("task_cloud"). | |||||
| Where("task_id = ?", taskId). | |||||
| Select("cluster_id,cluster_name,resource_spec"). | |||||
| Find(&cluster) | |||||
| case constants.AdapterTypeAI: | |||||
| // AI计算任务 | |||||
| l.svcCtx.DbEngin.Table("task_ai"). | |||||
| Where("task_id = ?", taskId). | |||||
| Select("cluster_id,cluster_name,resource_spec,card"). | |||||
| Find(&cluster) | |||||
| // AI任务特殊处理:如果没有resource_spec,使用card字段 | |||||
| if cluster.ResourceSpec == "" { | |||||
| cluster.ResourceSpec = cluster.Card | |||||
| } | |||||
| case constants.AdapterTypeHPC: | |||||
| // 高性能计算任务 | |||||
| l.svcCtx.DbEngin.Table("task_hpc"). | |||||
| Where("task_id = ?", taskId). | |||||
| Select("cluster_id,cluster_name,resource_spec"). | |||||
| Find(&cluster) | |||||
| } | |||||
| return cluster | |||||
| } | |||||
| @@ -0,0 +1,27 @@ | |||||
| package xjlab | |||||
| import ( | |||||
| "context" | |||||
| "github.com/zeromicro/go-zero/core/logx" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | |||||
| ) | |||||
| type TaskResourceUsageLogic struct { | |||||
| logx.Logger | |||||
| ctx context.Context | |||||
| svcCtx *svc.ServiceContext | |||||
| } | |||||
| func NewTaskResourceUsageLogic(ctx context.Context, svcCtx *svc.ServiceContext) *TaskResourceUsageLogic { | |||||
| return &TaskResourceUsageLogic{ | |||||
| Logger: logx.WithContext(ctx), | |||||
| ctx: ctx, | |||||
| svcCtx: svcCtx, | |||||
| } | |||||
| } | |||||
| func (l *TaskResourceUsageLogic) TaskResourceUsage(req *types.FId) (*types.CommonResp, error) { | |||||
| return nil, nil | |||||
| } | |||||
| @@ -0,0 +1,155 @@ | |||||
| package xjlab | |||||
| import ( | |||||
| "context" | |||||
| "github.com/zeromicro/go-zero/core/logx" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | |||||
| ) | |||||
| // 任务状态常量定义 | |||||
| const ( | |||||
| StatusCompleted = "Completed" | |||||
| StatusFailed = "Failed" | |||||
| StatusRunning = "Running" | |||||
| StatusSaved = "Saved" | |||||
| StatusStopped = "Stopped" | |||||
| StatusSucceeded = "Succeeded" | |||||
| StatusUndefined = "undefined" | |||||
| StatusWaiting = "Waiting" | |||||
| StatusWaitRestart = "WaitRestart" | |||||
| ) | |||||
| type TaskStatusStatistics struct { | |||||
| logx.Logger | |||||
| ctx context.Context | |||||
| svcCtx *svc.ServiceContext | |||||
| } | |||||
| func NewTaskStatusStatisticsLogic(ctx context.Context, svcCtx *svc.ServiceContext) *TaskStatusStatistics { | |||||
| return &TaskStatusStatistics{ | |||||
| Logger: logx.WithContext(ctx), | |||||
| ctx: ctx, | |||||
| svcCtx: svcCtx, | |||||
| } | |||||
| } | |||||
| // TaskStatus 任务状态统计结构体 | |||||
| type TaskStatus struct { | |||||
| // 总数 | |||||
| Total int64 `json:"total"` | |||||
| // 按状态分类统计 | |||||
| Completed int64 `json:"completed"` // 已完成 | |||||
| Failed int64 `json:"failed"` // 失败 | |||||
| Running int64 `json:"running"` // 运行中 | |||||
| Saved int64 `json:"saved"` // 已保存 | |||||
| Stopped int64 `json:"stopped"` // 已停止 | |||||
| Succeeded int64 `json:"succeeded"` // 成功 | |||||
| Undefined int64 `json:"undefined"` // 未定义 | |||||
| Waiting int64 `json:"waiting"` // 等待中 | |||||
| WaitRestart int64 `json:"waitRestart"` // 等待重启 | |||||
| // 业务分类统计 | |||||
| NormalCount int64 `json:"normalCount"` // 正常任务数 (Completed + Succeeded + Saved) | |||||
| RunningCount int64 `json:"runningCount"` // 运行中任务数 (Running + Waiting + WaitRestart) | |||||
| ErrorCount int64 `json:"errorCount"` // 异常任务数 (Failed + Stopped + Undefined) | |||||
| } | |||||
| // StatusResult 数据库查询结果结构体 | |||||
| type StatusResult struct { | |||||
| Status string `gorm:"column:status" json:"status"` | |||||
| Count int64 `gorm:"column:count" json:"count"` | |||||
| } | |||||
| // GetTaskStatusStatistics 获取任务状态统计 | |||||
| func (l *TaskStatusStatistics) GetTaskStatusStatistics(req *types.XJLABCommonReq) (*TaskStatus, error) { | |||||
| // 构建数据库查询 | |||||
| db := l.svcCtx.DbEngin.Model(&types.TaskModel{}). | |||||
| Table("task"). | |||||
| Where("deleted_at IS NULL") | |||||
| // 查询状态统计 | |||||
| var results []StatusResult | |||||
| err := db. | |||||
| Select("status, COUNT(*) as count"). | |||||
| Group("status"). | |||||
| Find(&results).Error | |||||
| if err != nil { | |||||
| l.Errorf("Failed to query task status statistics: %v", err) | |||||
| return nil, result.NewDefaultError("Failed to get task status statistics") | |||||
| } | |||||
| // 初始化统计结构 | |||||
| stats := &TaskStatus{} | |||||
| // 填充统计数据 | |||||
| for _, r := range results { | |||||
| stats.Total += r.Count | |||||
| l.categorizeTaskStatus(stats, r.Status, r.Count) | |||||
| } | |||||
| l.Infof("Task status statistics retrieved successfully. Total: %d, Normal: %d, Running: %d, Error: %d", | |||||
| stats.Total, stats.NormalCount, stats.RunningCount, stats.ErrorCount) | |||||
| return stats, nil | |||||
| } | |||||
| // categorizeTaskStatus 根据状态分类统计数据 | |||||
| func (l *TaskStatusStatistics) categorizeTaskStatus(stats *TaskStatus, status string, count int64) { | |||||
| switch status { | |||||
| case StatusCompleted: | |||||
| stats.Completed = count | |||||
| stats.NormalCount += count | |||||
| case StatusFailed: | |||||
| stats.Failed = count | |||||
| stats.ErrorCount += count | |||||
| case StatusRunning: | |||||
| stats.Running = count | |||||
| stats.RunningCount += count | |||||
| case StatusSaved: | |||||
| stats.Saved = count | |||||
| stats.NormalCount += count | |||||
| case StatusStopped: | |||||
| stats.Stopped = count | |||||
| stats.ErrorCount += count | |||||
| case StatusSucceeded: | |||||
| stats.Succeeded = count | |||||
| stats.NormalCount += count | |||||
| case StatusUndefined: | |||||
| stats.Undefined = count | |||||
| stats.ErrorCount += count | |||||
| case StatusWaiting: | |||||
| stats.Waiting = count | |||||
| stats.RunningCount += count | |||||
| case StatusWaitRestart: | |||||
| stats.WaitRestart = count | |||||
| stats.RunningCount += count | |||||
| default: | |||||
| // 记录未知状态 | |||||
| l.Logger.Errorf("Unknown task status encountered: %s with count: %d", status, count) | |||||
| stats.ErrorCount += count | |||||
| } | |||||
| } | |||||
| // GetSimpleTaskStatistics 获取简化的任务统计数据 | |||||
| func (l *TaskStatusStatistics) GetSimpleTaskStatistics(req *types.XJLABCommonReq) (*map[string]interface{}, error) { | |||||
| // 获取完整统计 | |||||
| resp, err := l.GetTaskStatusStatistics(req) | |||||
| if err != nil { | |||||
| return nil, err | |||||
| } | |||||
| // 返回简化的统计数据,对应前端UI的三个卡片 | |||||
| simpleStats := map[string]interface{}{ | |||||
| "totalCount": resp.Total, // 任务总数 | |||||
| "normalCount": resp.NormalCount, // 正常任务数 | |||||
| "errorCount": resp.ErrorCount, // 任务告警数 | |||||
| "runningCount": resp.RunningCount, // 运行中任务数 | |||||
| } | |||||
| return &simpleStats, nil | |||||
| } | |||||
| @@ -1,6 +1,10 @@ | |||||
| package database | package database | ||||
| import ( | import ( | ||||
| "strconv" | |||||
| "time" | |||||
| jsoniter "github.com/json-iterator/go" | |||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client" | clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/client" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option" | ||||
| @@ -11,8 +15,6 @@ import ( | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | ||||
| "gorm.io/gorm" | "gorm.io/gorm" | ||||
| "strconv" | |||||
| "time" | |||||
| ) | ) | ||||
| type AiStorage struct { | type AiStorage struct { | ||||
| @@ -125,7 +127,7 @@ func (s *AiStorage) DoesTaskNameExist(name string, taskType string) (bool, error | |||||
| return total > 0, nil | return total > 0, nil | ||||
| } | } | ||||
| func (s *AiStorage) SaveTask(name string, desc string, userId int64, strategyCode int64, synergyStatus int64, aiType string, yaml string, saveToChain func(task models.Task, id int64) error) (int64, error) { | |||||
| func (s *AiStorage) SaveTask(name string, desc string, userId int64, strategyCode int64, synergyStatus int64, aiType string, yaml string, saveToChain func(task models.Task, id int64) error, userName string) (int64, error) { | |||||
| startTime := time.Now() | startTime := time.Now() | ||||
| // 构建主任务结构体 | // 构建主任务结构体 | ||||
| @@ -135,6 +137,7 @@ func (s *AiStorage) SaveTask(name string, desc string, userId int64, strategyCod | |||||
| Description: desc, | Description: desc, | ||||
| Name: name, | Name: name, | ||||
| UserId: userId, | UserId: userId, | ||||
| UserName: userName, | |||||
| SynergyStatus: synergyStatus, | SynergyStatus: synergyStatus, | ||||
| Strategy: strategyCode, | Strategy: strategyCode, | ||||
| AdapterTypeDict: "1", | AdapterTypeDict: "1", | ||||
| @@ -214,25 +217,26 @@ func (s *AiStorage) SaveAiTask(taskId int64, opt option.Option, adapterName stri | |||||
| if err != nil { | if err != nil { | ||||
| return err | return err | ||||
| } | } | ||||
| resourceSpec, _ := jsoniter.MarshalToString(aiOpt.ResourcesRequired) | |||||
| aiTaskModel := models.TaskAi{ | aiTaskModel := models.TaskAi{ | ||||
| TaskId: taskId, | |||||
| AdapterId: aId, | |||||
| AdapterName: adapterName, | |||||
| ClusterId: cId, | |||||
| ClusterName: clusterName, | |||||
| Name: aiOpt.TaskName, | |||||
| Replica: int64(aiOpt.Replica), | |||||
| JobId: jobId, | |||||
| TaskType: aiOpt.TaskType, | |||||
| ModelName: aiOpt.ModelName, | |||||
| Strategy: aiOpt.StrategyName, | |||||
| Status: status, | |||||
| Msg: msg, | |||||
| Output: aiOpt.Output, | |||||
| Card: aiOpt.ComputeCard, | |||||
| StartTime: time.Now().Format(time.RFC3339), | |||||
| CommitTime: time.Now(), | |||||
| TaskId: taskId, | |||||
| AdapterId: aId, | |||||
| AdapterName: adapterName, | |||||
| ClusterId: cId, | |||||
| ClusterName: clusterName, | |||||
| Name: aiOpt.TaskName, | |||||
| Replica: int64(aiOpt.Replica), | |||||
| JobId: jobId, | |||||
| TaskType: aiOpt.TaskType, | |||||
| ModelName: aiOpt.ModelName, | |||||
| Strategy: aiOpt.StrategyName, | |||||
| Status: status, | |||||
| Msg: msg, | |||||
| Output: aiOpt.Output, | |||||
| Card: aiOpt.ComputeCard, | |||||
| StartTime: time.Now().Format(time.RFC3339), | |||||
| CommitTime: time.Now(), | |||||
| ResourceSpec: resourceSpec, | |||||
| } | } | ||||
| // 保存任务数据到数据库 | // 保存任务数据到数据库 | ||||
| tx := s.DbEngin.Create(&aiTaskModel) | tx := s.DbEngin.Create(&aiTaskModel) | ||||
| @@ -16,6 +16,8 @@ package scheduler | |||||
| import ( | import ( | ||||
| "encoding/json" | "encoding/json" | ||||
| "strings" | |||||
| "github.com/pkg/errors" | "github.com/pkg/errors" | ||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/config" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/config" | ||||
| @@ -27,7 +29,6 @@ import ( | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/response" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/response" | ||||
| "gorm.io/gorm" | "gorm.io/gorm" | ||||
| "sigs.k8s.io/yaml" | "sigs.k8s.io/yaml" | ||||
| "strings" | |||||
| ) | ) | ||||
| type Scheduler struct { | type Scheduler struct { | ||||
| @@ -189,13 +190,13 @@ func (s *Scheduler) SaveToDb() error { | |||||
| return nil | return nil | ||||
| } | } | ||||
| func (s *Scheduler) CreateTask(taskName string, desc string, userId int64, synergyCode int64, strategyName string, yaml string, token string, userIp string, config *config.Config) (int64, error) { | |||||
| func (s *Scheduler) CreateTask(taskName string, desc string, userId int64, synergyCode int64, strategyName string, yaml string, token string, userIp string, config *config.Config, userName string) (int64, error) { | |||||
| strategyCode, err := s.AiStorages.GetStrategyCode(strategyName) | strategyCode, err := s.AiStorages.GetStrategyCode(strategyName) | ||||
| if err != nil { | if err != nil { | ||||
| return 0, err | return 0, err | ||||
| } | } | ||||
| id, err := s.AiStorages.SaveTask(taskName, desc, userId, strategyCode, synergyCode, "10", yaml, nil) | |||||
| id, err := s.AiStorages.SaveTask(taskName, desc, userId, strategyCode, synergyCode, "10", yaml, nil, userName) | |||||
| if err != nil { | if err != nil { | ||||
| return 0, err | return 0, err | ||||
| } | } | ||||
| @@ -19,6 +19,10 @@ import ( | |||||
| "encoding/json" | "encoding/json" | ||||
| "errors" | "errors" | ||||
| "fmt" | "fmt" | ||||
| "strconv" | |||||
| "strings" | |||||
| "sync" | |||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| "gitlink.org.cn/JointCloud/pcm-ac/hpcAC" | "gitlink.org.cn/JointCloud/pcm-ac/hpcAC" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler" | ||||
| @@ -39,9 +43,6 @@ import ( | |||||
| omodel "gitlink.org.cn/JointCloud/pcm-octopus/http/model" | omodel "gitlink.org.cn/JointCloud/pcm-octopus/http/model" | ||||
| "gitlink.org.cn/JointCloud/pcm-octopus/octopus" | "gitlink.org.cn/JointCloud/pcm-octopus/octopus" | ||||
| "gitlink.org.cn/JointCloud/pcm-openi/model" | "gitlink.org.cn/JointCloud/pcm-openi/model" | ||||
| "strconv" | |||||
| "strings" | |||||
| "sync" | |||||
| ) | ) | ||||
| type AiScheduler struct { | type AiScheduler struct { | ||||
| @@ -248,7 +249,7 @@ func (as *AiScheduler) handleErrors(errs []interface{}, clusters []*strategy.Ass | |||||
| var taskId int64 | var taskId int64 | ||||
| switch mode { | switch mode { | ||||
| case executor.SUBMIT_MODE_JOINT_CLOUD: | case executor.SUBMIT_MODE_JOINT_CLOUD: | ||||
| tid, err := as.CreateTask(as.option.TaskName, "", 0, synergystatus, as.option.StrategyName, "", "", "", nil) | |||||
| tid, err := as.CreateTask(as.option.TaskName, "", 0, synergystatus, as.option.StrategyName, "", "", "", nil, "0") | |||||
| if err != nil { | if err != nil { | ||||
| return err | return err | ||||
| } | } | ||||
| @@ -3,6 +3,15 @@ package imageInference | |||||
| import ( | import ( | ||||
| "encoding/json" | "encoding/json" | ||||
| "errors" | "errors" | ||||
| "log" | |||||
| "math/rand" | |||||
| "mime/multipart" | |||||
| "net/http" | |||||
| "sort" | |||||
| "strconv" | |||||
| "sync" | |||||
| "time" | |||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/common" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/common" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/database" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/database" | ||||
| @@ -14,14 +23,6 @@ import ( | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" | ||||
| "log" | |||||
| "math/rand" | |||||
| "mime/multipart" | |||||
| "net/http" | |||||
| "sort" | |||||
| "strconv" | |||||
| "sync" | |||||
| "time" | |||||
| ) | ) | ||||
| type IImageInference interface { | type IImageInference interface { | ||||
| @@ -126,7 +127,7 @@ func (i *ImageInference) saveTask() (int64, error) { | |||||
| return 0, err | return 0, err | ||||
| } | } | ||||
| id, err := i.storage.SaveTask(i.opt.TaskName, "", 0, strategyCode, synergystatus, i.inference.GetAiType(), "", nil) | |||||
| id, err := i.storage.SaveTask(i.opt.TaskName, "", 0, strategyCode, synergystatus, i.inference.GetAiType(), "", nil, "0") | |||||
| if err != nil { | if err != nil { | ||||
| return 0, err | return 0, err | ||||
| } | } | ||||
| @@ -70,7 +70,7 @@ func (ti *TextInference) saveTask() (int64, error) { | |||||
| var synergystatus int64 | var synergystatus int64 | ||||
| var strategyCode int64 | var strategyCode int64 | ||||
| id, err := ti.storage.SaveTask(ti.opt.TaskName, "", 0, strategyCode, synergystatus, ti.inference.GetAiType(), "", nil) | |||||
| id, err := ti.storage.SaveTask(ti.opt.TaskName, "", 0, strategyCode, synergystatus, ti.inference.GetAiType(), "", nil, "0") | |||||
| if err != nil { | if err != nil { | ||||
| return 0, err | return 0, err | ||||
| } | } | ||||
| @@ -14,6 +14,7 @@ type CreateParam struct { | |||||
| ContainerGroupName string `json:"containerGroupName"` | ContainerGroupName string `json:"containerGroupName"` | ||||
| Description string `json:"description,omitempty"` | Description string `json:"description,omitempty"` | ||||
| UserId int64 `json:"userId"` | UserId int64 `json:"userId"` | ||||
| UserName string `json:"userName"` | |||||
| Name string `json:"name"` | Name string `json:"name"` | ||||
| Image string `json:"image"` | Image string `json:"image"` | ||||
| ImageRegistry string `json:"imageRegistry,omitempty"` | ImageRegistry string `json:"imageRegistry,omitempty"` | ||||
| @@ -1465,6 +1465,7 @@ type CreateSubnetResp struct { | |||||
| type CreateTaskReq struct { | type CreateTaskReq struct { | ||||
| Name string `json:"name"` | Name string `json:"name"` | ||||
| UserId int64 `json:"userId,optional"` | UserId int64 `json:"userId,optional"` | ||||
| UserName string `json:"userName,optional"` | |||||
| Description string `json:"description,optional"` | Description string `json:"description,optional"` | ||||
| Token string `json:"token,optional"` | Token string `json:"token,optional"` | ||||
| UserIp string `json:"userIp,optional"` | UserIp string `json:"userIp,optional"` | ||||
| @@ -2500,6 +2501,7 @@ type Floatingips struct { | |||||
| type GeneralTaskReq struct { | type GeneralTaskReq struct { | ||||
| Token string `json:"token,optional"` | Token string `json:"token,optional"` | ||||
| UserId int64 `json:"userId,optional"` | UserId int64 `json:"userId,optional"` | ||||
| UserName string `json:"userName,optional"` | |||||
| Name string `json:"name"` | Name string `json:"name"` | ||||
| AdapterIds []string `json:"adapterIds"` | AdapterIds []string `json:"adapterIds"` | ||||
| ClusterIds []string `json:"clusterIds"` | ClusterIds []string `json:"clusterIds"` | ||||
| @@ -5647,6 +5649,7 @@ type TaskModel struct { | |||||
| AdapterTypeDict string `json:"adapterTypeDict" db:"adapter_type_dict" gorm:"adapter_type_dict"` //适配器类型(对应字典表的值 | AdapterTypeDict string `json:"adapterTypeDict" db:"adapter_type_dict" gorm:"adapter_type_dict"` //适配器类型(对应字典表的值 | ||||
| TaskTypeDict string `json:"taskTypeDict" db:"task_type_dict" gorm:"task_type_dict"` //任务类型(对应字典表的值 | TaskTypeDict string `json:"taskTypeDict" db:"task_type_dict" gorm:"task_type_dict"` //任务类型(对应字典表的值 | ||||
| UserId int64 `json:"userId,omitempty" db:"user_id"` | UserId int64 `json:"userId,omitempty" db:"user_id"` | ||||
| UserName string `json:"userName,omitempty" db:"user_name"` | |||||
| } | } | ||||
| type TaskSl struct { | type TaskSl struct { | ||||
| @@ -6537,3 +6540,52 @@ type TaskNumResp struct { | |||||
| History int `json:"history"` | History int `json:"history"` | ||||
| Failed int `json:"failed"` | Failed int `json:"failed"` | ||||
| } | } | ||||
| // jcc 用户信息 | |||||
| type JccUserInfo struct { | |||||
| UserName string `json:"userName,optional"` | |||||
| UserId int64 `json:"userId,optional"` | |||||
| } | |||||
| type XJLABTaskReq struct { | |||||
| Id string `form:"id,optional"` | |||||
| Name string `form:"name,optional"` | |||||
| Status string `form:"status,optional"` | |||||
| UserName string `form:"userName,optional"` | |||||
| AdapterTypeDict string `form:"adapterTypeDict,optional"` | |||||
| StartTime string `form:"startTime,optional"` | |||||
| EndTime string `form:"endTime,optional"` | |||||
| PageInfo | |||||
| JccUserInfo | |||||
| } | |||||
| type XJLABTaskResp struct { | |||||
| Id int64 `json:"id,omitempty,string" db:"id"` // id | |||||
| Name string `json:"name,omitempty" db:"name"` // 作业名称 | |||||
| Description string `json:"description,omitempty" db:"description"` // 作业描述 | |||||
| Status string `json:"status,omitempty" db:"status"` // 作业状态 | |||||
| Strategy int64 `json:"strategy" db:"strategy"` // 策略 | |||||
| SynergyStatus int64 `json:"synergyStatus" db:"synergy_status"` // 协同状态(0-未协同、1-已协同) | |||||
| CommitTime string `json:"commitTime,omitempty" db:"commit_time"` // 提交时间 | |||||
| StartTime string `json:"startTime,omitempty" db:"start_time"` // 开始时间 | |||||
| EndTime string `json:"endTime,omitempty" db:"end_time"` // 结束运行时间 | |||||
| RunningTime int64 `json:"runningTime" db:"running_time"` // 已运行时间(单位秒) | |||||
| YamlString string `json:"yamlString,omitempty" db:"yaml_string"` | |||||
| Result string `json:"result,omitempty" db:"result"` // 作业结果 | |||||
| DeletedAt string `json:"deletedAt,omitempty" gorm:"index" db:"deleted_at"` | |||||
| NsID string `json:"nsId,omitempty" db:"ns_id"` | |||||
| TenantId string `json:"tenantId,omitempty" db:"tenant_id"` | |||||
| CreatedTime string `json:"createdTime,omitempty" db:"created_time" gorm:"autoCreateTime"` | |||||
| UpdatedTime string `json:"updatedTime,omitempty" db:"updated_time"` | |||||
| AdapterTypeDict string `json:"adapterTypeDict" db:"adapter_type_dict" gorm:"adapter_type_dict"` //适配器类型(对应字典表的值 | |||||
| TaskTypeDict string `json:"taskTypeDict" db:"task_type_dict" gorm:"task_type_dict"` //任务类型(对应字典表的值 | |||||
| UserId int64 `json:"userId,omitempty" db:"user_id"` | |||||
| UserName string `json:"userName,omitempty" db:"-"` // 用户名称 | |||||
| ResourceId string `json:"resourceId,omitempty" db:"resource_id"` // 资源ID | |||||
| ResourceName string `json:"resourceName,omitempty" db:"resource_name"` // 资源名称 | |||||
| ClusterId string `json:"clusterId,omitempty" db:"cluster_id"` // 集群ID | |||||
| ClusterName string `json:"clusterName,omitempty" db:"cluster_name"` // 集群 | |||||
| } | |||||
| type XJLABCommonReq struct { | |||||
| } | |||||
| @@ -0,0 +1,24 @@ | |||||
| package constants | |||||
| const ( | |||||
| UserId = "UserId" | |||||
| UserName = "UserName" | |||||
| ) | |||||
| const ( | |||||
| StatusSaved = "Saved" | |||||
| StatusDeploying = "Deploying" | |||||
| ) | |||||
| const ( | |||||
| AdapterTypeCloud = "0" | |||||
| AdapterTypeAI = "1" | |||||
| AdapterTypeHPC = "2" | |||||
| ) | |||||
| // HPCSystemType 超算计算系统类型 | |||||
| type HPCSystemType string | |||||
| const ( | |||||
| HPC_SYSTEM_SLURM HPCSystemType = "slurm" | |||||
| HPC_SYSTEM_AC HPCSystemType = "ac" | |||||
| ) | |||||
| @@ -35,27 +35,28 @@ type ( | |||||
| } | } | ||||
| TaskAi struct { | TaskAi struct { | ||||
| Id int64 `db:"id"` // id | |||||
| TaskId int64 `db:"task_id"` // 任务id | |||||
| AdapterId int64 `db:"adapter_id"` // 适配器id | |||||
| AdapterName string `db:"adapter_name"` // 适配器名称 | |||||
| ClusterId int64 `db:"cluster_id"` // 集群id | |||||
| ClusterName string `db:"cluster_name"` // 集群名称 | |||||
| Name string `db:"name"` // 任务名 | |||||
| Replica int64 `db:"replica"` // 执行数 | |||||
| JobId string `db:"job_id"` // 集群返回任务id | |||||
| Strategy string `db:"strategy"` // 主任务使用策略 | |||||
| Status string `db:"status"` // 任务状态 | |||||
| Msg string `db:"msg"` // 集群返回任务信息 | |||||
| CommitTime time.Time `db:"commit_time"` // 提交时间 | |||||
| StartTime string `db:"start_time"` // 开始时间 | |||||
| EndTime string `db:"end_time"` // 结束时间 | |||||
| TaskType string `db:"task_type"` | |||||
| DeletedAt *time.Time `db:"deleted_at"` | |||||
| Card string `db:"card"` | |||||
| InferUrl string `db:"infer_url"` | |||||
| ModelName string `db:"model_name"` | |||||
| Output string `db:"output"` | |||||
| Id int64 `db:"id"` // id | |||||
| TaskId int64 `db:"task_id"` // 任务id | |||||
| AdapterId int64 `db:"adapter_id"` // 适配器id | |||||
| AdapterName string `db:"adapter_name"` // 适配器名称 | |||||
| ClusterId int64 `db:"cluster_id"` // 集群id | |||||
| ClusterName string `db:"cluster_name"` // 集群名称 | |||||
| Name string `db:"name"` // 任务名 | |||||
| Replica int64 `db:"replica"` // 执行数 | |||||
| JobId string `db:"job_id"` // 集群返回任务id | |||||
| Strategy string `db:"strategy"` // 主任务使用策略 | |||||
| Status string `db:"status"` // 任务状态 | |||||
| Msg string `db:"msg"` // 集群返回任务信息 | |||||
| CommitTime time.Time `db:"commit_time"` // 提交时间 | |||||
| StartTime string `db:"start_time"` // 开始时间 | |||||
| EndTime string `db:"end_time"` // 结束时间 | |||||
| TaskType string `db:"task_type"` | |||||
| DeletedAt *time.Time `db:"deleted_at"` | |||||
| Card string `db:"card"` | |||||
| InferUrl string `db:"infer_url"` | |||||
| ModelName string `db:"model_name"` | |||||
| Output string `db:"output"` | |||||
| ResourceSpec string `db:"resource_spec"` | |||||
| } | } | ||||
| ) | ) | ||||
| @@ -36,55 +36,64 @@ type ( | |||||
| } | } | ||||
| TaskHpc struct { | TaskHpc struct { | ||||
| Id int64 `db:"id"` // id | |||||
| TaskId int64 `db:"task_id"` // 任务id | |||||
| JobId string `db:"job_id"` // 作业id(在第三方系统中的作业id) | |||||
| AdapterId int64 `db:"adapter_d"` // 适配器id | |||||
| AdapterName string `db:"adapter_name"` //适配器名称 | |||||
| ClusterId int64 `db:"cluster_id"` //集群id | |||||
| ClusterName string `db:"cluster_name"` //集群名称 | |||||
| Name string `db:"name"` // 名称 | |||||
| Backend string `db:"backend"` // 平台类型 | |||||
| OperateType string `db:"operate_type"` // 操作类型 | |||||
| Status string `db:"status"` // 状态 | |||||
| CmdScript string `db:"cmd_script"` | |||||
| StartTime string `db:"start_time"` // 开始时间 | |||||
| EndTime string `db:"end_time"` // 结束时间 | |||||
| RunningTime int64 `db:"running_time"` // 运行时间 | |||||
| DerivedEs string `db:"derived_es"` | |||||
| Cluster string `db:"cluster"` | |||||
| BlockId int64 `db:"block_id"` | |||||
| AllocNodes int64 `db:"alloc_nodes"` | |||||
| AllocCpu int64 `db:"alloc_cpu"` | |||||
| CardCount int64 `db:"card_count"` // 卡数 | |||||
| Version string `db:"version"` | |||||
| Account string `db:"account"` | |||||
| WorkDir string `db:"work_dir"` // 工作路径 | |||||
| AssocId int64 `db:"assoc_id"` | |||||
| ExitCode int64 `db:"exit_code"` | |||||
| WallTime string `db:"wall_time"` // 最大运行时间 | |||||
| Result string `db:"result"` // 运行结果 | |||||
| DeletedAt sql.NullTime `db:"deleted_at"` // 删除时间 | |||||
| YamlString string `db:"yaml_string"` | |||||
| AppType string `db:"app_type"` // 应用类型 | |||||
| AppName string `db:"app_name"` // 应用名称 | |||||
| Queue string `db:"queue"` // 队列名称 | |||||
| SubmitType string `db:"submit_type"` // cmd(命令行模式) | |||||
| NNode string `db:"n_node"` // 节点个数(当指定该参数时,GAP_NODE_STRING必须为"") | |||||
| StdOutFile string `db:"std_out_file"` // 工作路径/std.err.%j | |||||
| StdErrFile string `db:"std_err_file"` // 工作路径/std.err.%j | |||||
| StdInput string `db:"std_input"` | |||||
| Partition string `db:"partition"` | |||||
| DeletedFlag int64 `db:"deleted_flag"` // 是否删除(0-否,1-是) | |||||
| CreatedBy int64 `db:"created_by"` // 创建人 | |||||
| CreatedTime time.Time `db:"created_time"` // 创建时间 | |||||
| UpdatedBy int64 `db:"updated_by"` // 更新人 | |||||
| UpdatedTime time.Time `db:"updated_time"` // 更新时间 | |||||
| UserId int64 `db:"user_id"` | |||||
| TimeLimit int64 `db:"time_limit"` | |||||
| Params string `db:"params"` // 渲染参数 | |||||
| Script string `db:"script"` // 生成的脚本 | |||||
| TemplateId int64 `db:"template_id"` // 模板ID | |||||
| Id int64 `db:"id"` // id | |||||
| TaskId int64 `db:"task_id"` // 任务id | |||||
| JobId string `db:"job_id"` // 作业id(在第三方系统中的作业id) | |||||
| AdapterId int64 `db:"adapter_d"` // 适配器id | |||||
| AdapterName string `db:"adapter_name"` //适配器名称 | |||||
| ClusterId int64 `db:"cluster_id"` //集群id | |||||
| ClusterName string `db:"cluster_name"` //集群名称 | |||||
| Name string `db:"name"` // 名称 | |||||
| Backend string `db:"backend"` // 平台类型 | |||||
| OperateType string `db:"operate_type"` // 操作类型 | |||||
| Status string `db:"status"` // 状态 | |||||
| CmdScript string `db:"cmd_script"` | |||||
| StartTime string `db:"start_time"` // 开始时间 | |||||
| EndTime string `db:"end_time"` // 结束时间 | |||||
| RunningTime int64 `db:"running_time"` // 运行时间 | |||||
| DerivedEs string `db:"derived_es"` | |||||
| Cluster string `db:"cluster"` | |||||
| BlockId int64 `db:"block_id"` | |||||
| AllocNodes int64 `db:"alloc_nodes"` | |||||
| AllocCpu int64 `db:"alloc_cpu"` | |||||
| CardCount int64 `db:"card_count"` // 卡数 | |||||
| Version string `db:"version"` | |||||
| Account string `db:"account"` | |||||
| WorkDir string `db:"work_dir"` // 工作路径 | |||||
| AssocId int64 `db:"assoc_id"` | |||||
| ExitCode int64 `db:"exit_code"` | |||||
| WallTime string `db:"wall_time"` // 最大运行时间 | |||||
| Result string `db:"result"` // 运行结果 | |||||
| DeletedAt sql.NullTime `db:"deleted_at"` // 删除时间 | |||||
| YamlString string `db:"yaml_string"` | |||||
| AppType string `db:"app_type"` // 应用类型 | |||||
| AppName string `db:"app_name"` // 应用名称 | |||||
| Queue string `db:"queue"` // 队列名称 | |||||
| SubmitType string `db:"submit_type"` // cmd(命令行模式) | |||||
| NNode string `db:"n_node"` // 节点个数(当指定该参数时,GAP_NODE_STRING必须为"") | |||||
| StdOutFile string `db:"std_out_file"` // 工作路径/std.err.%j | |||||
| StdErrFile string `db:"std_err_file"` // 工作路径/std.err.%j | |||||
| StdInput string `db:"std_input"` | |||||
| Partition string `db:"partition"` | |||||
| DeletedFlag int64 `db:"deleted_flag"` // 是否删除(0-否,1-是) | |||||
| CreatedBy int64 `db:"created_by"` // 创建人 | |||||
| CreatedTime time.Time `db:"created_time"` // 创建时间 | |||||
| UpdatedBy int64 `db:"updated_by"` // 更新人 | |||||
| UpdatedTime time.Time `db:"updated_time"` // 更新时间 | |||||
| UserId int64 `db:"user_id"` | |||||
| TimeLimit int64 `db:"time_limit"` | |||||
| Params string `db:"params"` // 渲染参数 | |||||
| Script string `db:"script"` // 生成的脚本 | |||||
| TemplateId int64 `db:"template_id"` // 模板ID | |||||
| ResourceSpec ResourceSpec `json:"resourceSpec" gorm:"serializer:json"` | |||||
| } | |||||
| ResourceSpec struct { | |||||
| ResourceId string `json:"resourceId"` // 资源ID | |||||
| ResourceName string `json:"resourceName"` // 资源名称 | |||||
| ResourceType string `json:"resourceType"` // 资源类型 | |||||
| Partition string `json:"partition"` // 分区 | |||||
| Specifications interface{} `json:"specifications"` | |||||
| } | } | ||||
| ) | ) | ||||
| @@ -52,6 +52,7 @@ type ( | |||||
| AdapterTypeDict string `db:"adapter_type_dict" json:"adapterTypeDict"` //任务类型(对应字典表的值) | AdapterTypeDict string `db:"adapter_type_dict" json:"adapterTypeDict"` //任务类型(对应字典表的值) | ||||
| TaskTypeDict string `db:"task_type_dict" json:"taskTypeDict"` | TaskTypeDict string `db:"task_type_dict" json:"taskTypeDict"` | ||||
| UserId int64 `db:"user_id" json:"userId"` | UserId int64 `db:"user_id" json:"userId"` | ||||
| UserName string `db:"user_name" json:"userName"` // 提交人 | |||||
| } | } | ||||
| ) | ) | ||||
| @@ -0,0 +1,394 @@ | |||||
| package utils | |||||
| import ( | |||||
| "bufio" | |||||
| "fmt" | |||||
| "os" | |||||
| "regexp" | |||||
| "strconv" | |||||
| "strings" | |||||
| ) | |||||
| // SlurmResource 定义SLURM资源规格结构体 | |||||
| type SlurmResource struct { | |||||
| JobName string `json:"job_name"` | |||||
| CPUs string `json:"cpus"` // 每任务CPU数 | |||||
| Memory string `json:"memory"` // 内存 | |||||
| Nodes string `json:"nodes"` // 节点数 | |||||
| Tasks string `json:"tasks"` // 总任务数 | |||||
| TasksPerNode string `json:"tasks_per_node"` // 每节点任务数 | |||||
| CPUsPerTask string `json:"cpus_per_task"` // 每任务CPU数 | |||||
| Partition string `json:"partition"` // 队列/分区 | |||||
| Time string `json:"time"` // 时间限制 | |||||
| Output string `json:"output"` // 输出文件 | |||||
| Error string `json:"error"` // 错误文件 | |||||
| QOS string `json:"qos"` // 服务质量 | |||||
| Account string `json:"account"` // 账户 | |||||
| GPUs string `json:"gpus"` // GPU数量 | |||||
| GPUType string `json:"gpu_type"` // GPU类型 | |||||
| Constraint string `json:"constraint"` // 节点约束 | |||||
| Exclusive bool `json:"exclusive"` // 独占节点 | |||||
| ArrayJobID string `json:"array_job_id"` // 数组作业ID | |||||
| WorkingDir string `json:"working_dir"` // 工作目录 | |||||
| MailType string `json:"mail_type"` // 邮件类型 | |||||
| MailUser string `json:"mail_user"` // 邮件用户 | |||||
| } | |||||
| // SlurmParser SLURM解析器 | |||||
| type SlurmParser struct { | |||||
| patterns map[string][]*regexp.Regexp | |||||
| } | |||||
| // NewSlurmParser 创建新的SLURM解析器 | |||||
| func NewSlurmParser() *SlurmParser { | |||||
| parser := &SlurmParser{ | |||||
| patterns: make(map[string][]*regexp.Regexp), | |||||
| } | |||||
| parser.initPatterns() | |||||
| return parser | |||||
| } | |||||
| // initPatterns 初始化所有匹配模式 | |||||
| func (p *SlurmParser) initPatterns() { | |||||
| // 作业名称的各种写法 | |||||
| p.patterns["job_name"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--job-name[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-J\s+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--job-name\s*=\s*"([^"]+)"`), | |||||
| regexp.MustCompile(`#SBATCH\s+--job-name\s*=\s*'([^']+)'`), | |||||
| } | |||||
| // CPU相关的各种写法 | |||||
| p.patterns["cpus_per_task"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--cpus-per-task[=\s]+(\d+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-c\s+(\d+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--cpus-per-task\s*=\s*(\d+)`), | |||||
| } | |||||
| // 内存的各种写法 | |||||
| p.patterns["memory"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--mem[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-m\s+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--mem\s*=\s*([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--mem-per-cpu[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--mem-per-node[=\s]+([^\s]+)`), | |||||
| } | |||||
| // 节点数的各种写法 | |||||
| p.patterns["nodes"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--nodes[=\s]+(\d+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-N\s+(\d+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--nodes\s*=\s*(\d+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--nodes[=\s]+(\d+-\d+)`), // 范围格式 | |||||
| } | |||||
| // 任务数的各种写法 | |||||
| p.patterns["tasks"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--ntasks[=\s]+(\d+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-n\s+(\d+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--ntasks\s*=\s*(\d+)`), | |||||
| } | |||||
| // 每节点任务数 | |||||
| p.patterns["tasks_per_node"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--ntasks-per-node[=\s]+(\d+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--ntasks-per-node\s*=\s*(\d+)`), | |||||
| } | |||||
| // 分区/队列的各种写法 | |||||
| p.patterns["partition"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--partition[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-p\s+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--partition\s*=\s*([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--partition\s*=\s*"([^"]+)"`), | |||||
| } | |||||
| // 时间限制的各种写法 | |||||
| p.patterns["time"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--time[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-t\s+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--time\s*=\s*([^\s]+)`), | |||||
| } | |||||
| // 输出文件 | |||||
| p.patterns["output"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--output[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-o\s+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--output\s*=\s*([^\s]+)`), | |||||
| } | |||||
| // 错误文件 | |||||
| p.patterns["error"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--error[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-e\s+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--error\s*=\s*([^\s]+)`), | |||||
| } | |||||
| // 服务质量 | |||||
| p.patterns["qos"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--qos[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--qos\s*=\s*([^\s]+)`), | |||||
| } | |||||
| // 账户 | |||||
| p.patterns["account"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--account[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-A\s+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--account\s*=\s*([^\s]+)`), | |||||
| } | |||||
| // GPU相关 | |||||
| p.patterns["gpus"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--gpus[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--gpus-per-node[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--gpus-per-task[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--gres[=\s]+gpu:(\d+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--gres[=\s]+gpu:([^:]+):(\d+)`), // gpu类型:数量 | |||||
| } | |||||
| // 约束条件 | |||||
| p.patterns["constraint"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--constraint[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-C\s+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--constraint\s*=\s*"([^"]+)"`), | |||||
| } | |||||
| // 独占节点 | |||||
| p.patterns["exclusive"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--exclusive`), | |||||
| } | |||||
| // 数组作业 | |||||
| p.patterns["array"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--array[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-a\s+([^\s]+)`), | |||||
| } | |||||
| // 工作目录 | |||||
| p.patterns["workdir"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--chdir[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+--workdir[=\s]+([^\s]+)`), | |||||
| regexp.MustCompile(`#SBATCH\s+-D\s+([^\s]+)`), | |||||
| } | |||||
| // 邮件通知 | |||||
| p.patterns["mail_type"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--mail-type[=\s]+([^\s]+)`), | |||||
| } | |||||
| p.patterns["mail_user"] = []*regexp.Regexp{ | |||||
| regexp.MustCompile(`#SBATCH\s+--mail-user[=\s]+([^\s]+)`), | |||||
| } | |||||
| } | |||||
| // ParseScript 解析SLURM脚本 | |||||
| func (p *SlurmParser) ParseScript(scriptContent string) *SlurmResource { | |||||
| resource := &SlurmResource{} | |||||
| scanner := bufio.NewScanner(strings.NewReader(scriptContent)) | |||||
| for scanner.Scan() { | |||||
| line := strings.TrimSpace(scanner.Text()) | |||||
| // 跳过非SBATCH行和注释行 | |||||
| if !strings.HasPrefix(line, "#SBATCH") { | |||||
| continue | |||||
| } | |||||
| // 处理每个字段 | |||||
| p.parseField(line, "job_name", &resource.JobName) | |||||
| p.parseField(line, "cpus_per_task", &resource.CPUsPerTask) | |||||
| p.parseField(line, "memory", &resource.Memory) | |||||
| p.parseField(line, "nodes", &resource.Nodes) | |||||
| p.parseField(line, "tasks", &resource.Tasks) | |||||
| p.parseField(line, "tasks_per_node", &resource.TasksPerNode) | |||||
| p.parseField(line, "partition", &resource.Partition) | |||||
| p.parseField(line, "time", &resource.Time) | |||||
| p.parseField(line, "output", &resource.Output) | |||||
| p.parseField(line, "error", &resource.Error) | |||||
| p.parseField(line, "qos", &resource.QOS) | |||||
| p.parseField(line, "account", &resource.Account) | |||||
| p.parseField(line, "constraint", &resource.Constraint) | |||||
| p.parseField(line, "array", &resource.ArrayJobID) | |||||
| p.parseField(line, "workdir", &resource.WorkingDir) | |||||
| p.parseField(line, "mail_type", &resource.MailType) | |||||
| p.parseField(line, "mail_user", &resource.MailUser) | |||||
| // 处理GPU | |||||
| p.parseGPU(line, resource) | |||||
| // 处理exclusive | |||||
| if p.matchPattern(line, "exclusive") != "" { | |||||
| resource.Exclusive = true | |||||
| } | |||||
| } | |||||
| // 后处理:推导缺失的信息 | |||||
| p.postProcess(resource) | |||||
| return resource | |||||
| } | |||||
| // parseField 解析单个字段 | |||||
| func (p *SlurmParser) parseField(line, field string, target *string) { | |||||
| if *target == "" { // 只在字段为空时才设置 | |||||
| if value := p.matchPattern(line, field); value != "" { | |||||
| *target = value | |||||
| } | |||||
| } | |||||
| } | |||||
| // parseGPU 解析GPU相关信息 | |||||
| func (p *SlurmParser) parseGPU(line string, resource *SlurmResource) { | |||||
| if patterns, exists := p.patterns["gpus"]; exists { | |||||
| for _, pattern := range patterns { | |||||
| if matches := pattern.FindStringSubmatch(line); len(matches) > 1 { | |||||
| if strings.Contains(pattern.String(), "gres.*gpu:([^:]+):(\\d+)") && len(matches) > 2 { | |||||
| // gpu类型:数量格式 | |||||
| resource.GPUType = matches[1] | |||||
| resource.GPUs = matches[2] | |||||
| } else { | |||||
| resource.GPUs = matches[1] | |||||
| } | |||||
| break | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| // matchPattern 匹配模式并返回值 | |||||
| func (p *SlurmParser) matchPattern(line, field string) string { | |||||
| if patterns, exists := p.patterns[field]; exists { | |||||
| for _, pattern := range patterns { | |||||
| if matches := pattern.FindStringSubmatch(line); len(matches) > 1 { | |||||
| return matches[1] | |||||
| } | |||||
| } | |||||
| } | |||||
| return "" | |||||
| } | |||||
| // postProcess 后处理,推导缺失信息 | |||||
| func (p *SlurmParser) postProcess(resource *SlurmResource) { | |||||
| // 如果没有指定CPUs但有tasks和cpus_per_task,计算总CPU数 | |||||
| if resource.CPUs == "" && resource.Tasks != "" && resource.CPUsPerTask != "" { | |||||
| if tasks, err1 := strconv.Atoi(resource.Tasks); err1 == nil { | |||||
| if cpusPerTask, err2 := strconv.Atoi(resource.CPUsPerTask); err2 == nil { | |||||
| resource.CPUs = strconv.Itoa(tasks * cpusPerTask) | |||||
| } | |||||
| } | |||||
| } | |||||
| // 如果只有tasks但没有nodes,假设为单节点 | |||||
| if resource.Tasks != "" && resource.Nodes == "" && resource.TasksPerNode == "" { | |||||
| resource.Nodes = "1" | |||||
| } | |||||
| } | |||||
| // ParseFile 从文件解析SLURM脚本 | |||||
| func (p *SlurmParser) ParseFile(filename string) (*SlurmResource, error) { | |||||
| content, err := os.ReadFile(filename) | |||||
| if err != nil { | |||||
| return nil, fmt.Errorf("读取文件失败: %v", err) | |||||
| } | |||||
| return p.ParseScript(string(content)), nil | |||||
| } | |||||
| // String 格式化输出 | |||||
| func (r *SlurmResource) String() string { | |||||
| var result strings.Builder | |||||
| result.WriteString("SLURM资源规格:\n") | |||||
| result.WriteString("====================\n") | |||||
| if r.JobName != "" { | |||||
| result.WriteString(fmt.Sprintf("作业名称: %s\n", r.JobName)) | |||||
| } | |||||
| if r.Partition != "" { | |||||
| result.WriteString(fmt.Sprintf("队列/分区: %s\n", r.Partition)) | |||||
| } | |||||
| if r.Nodes != "" { | |||||
| result.WriteString(fmt.Sprintf("节点数: %s\n", r.Nodes)) | |||||
| } | |||||
| if r.Tasks != "" { | |||||
| result.WriteString(fmt.Sprintf("任务数: %s\n", r.Tasks)) | |||||
| } | |||||
| if r.TasksPerNode != "" { | |||||
| result.WriteString(fmt.Sprintf("每节点任务数: %s\n", r.TasksPerNode)) | |||||
| } | |||||
| if r.CPUsPerTask != "" { | |||||
| result.WriteString(fmt.Sprintf("每任务CPU数: %s\n", r.CPUsPerTask)) | |||||
| } | |||||
| if r.CPUs != "" { | |||||
| result.WriteString(fmt.Sprintf("总CPU数: %s\n", r.CPUs)) | |||||
| } | |||||
| if r.Memory != "" { | |||||
| result.WriteString(fmt.Sprintf("内存: %s\n", r.Memory)) | |||||
| } | |||||
| if r.GPUs != "" { | |||||
| result.WriteString(fmt.Sprintf("GPU数量: %s\n", r.GPUs)) | |||||
| if r.GPUType != "" { | |||||
| result.WriteString(fmt.Sprintf("GPU类型: %s\n", r.GPUType)) | |||||
| } | |||||
| } | |||||
| if r.Time != "" { | |||||
| result.WriteString(fmt.Sprintf("运行时间: %s\n", r.Time)) | |||||
| } | |||||
| if r.Account != "" { | |||||
| result.WriteString(fmt.Sprintf("账户: %s\n", r.Account)) | |||||
| } | |||||
| if r.QOS != "" { | |||||
| result.WriteString(fmt.Sprintf("服务质量: %s\n", r.QOS)) | |||||
| } | |||||
| if r.Constraint != "" { | |||||
| result.WriteString(fmt.Sprintf("节点约束: %s\n", r.Constraint)) | |||||
| } | |||||
| if r.Exclusive { | |||||
| result.WriteString("独占节点: 是\n") | |||||
| } | |||||
| if r.ArrayJobID != "" { | |||||
| result.WriteString(fmt.Sprintf("数组作业: %s\n", r.ArrayJobID)) | |||||
| } | |||||
| if r.Output != "" { | |||||
| result.WriteString(fmt.Sprintf("输出文件: %s\n", r.Output)) | |||||
| } | |||||
| if r.Error != "" { | |||||
| result.WriteString(fmt.Sprintf("错误文件: %s\n", r.Error)) | |||||
| } | |||||
| return result.String() | |||||
| } | |||||
| // GetResourceSummary 获取核心资源摘要 | |||||
| func (r *SlurmResource) GetResourceSummary() map[string]string { | |||||
| summary := make(map[string]string) | |||||
| if r.JobName != "" { | |||||
| summary["job_name"] = r.JobName | |||||
| } | |||||
| if r.Partition != "" { | |||||
| summary["queue"] = r.Partition | |||||
| } | |||||
| if r.Nodes != "" { | |||||
| summary["nodes"] = r.Nodes | |||||
| } | |||||
| if r.Tasks != "" { | |||||
| summary["tasks"] = r.Tasks | |||||
| } | |||||
| if r.CPUs != "" { | |||||
| summary["total_cpus"] = r.CPUs | |||||
| } | |||||
| if r.CPUsPerTask != "" { | |||||
| summary["cpus_per_task"] = r.CPUsPerTask | |||||
| } | |||||
| if r.Memory != "" { | |||||
| summary["memory"] = r.Memory | |||||
| } | |||||
| if r.GPUs != "" { | |||||
| summary["gpus"] = r.GPUs | |||||
| } | |||||
| if r.Time != "" { | |||||
| summary["time_limit"] = r.Time | |||||
| } | |||||
| return summary | |||||
| } | |||||