Former-commit-id: 4c9cce1bec
pull/116/head
| @@ -906,13 +906,13 @@ service pcm { | |||
| get /schedule/ai/getTaskTypes returns (AiTaskTypesResp) | |||
| @handler ScheduleGetDatasetsHandler | |||
| get /schedule/ai/getDatasets returns (AiDatasetsResp) | |||
| get /schedule/ai/getDatasets/:adapterId (AiDatasetsReq) returns (AiDatasetsResp) | |||
| @handler ScheduleGetStrategyHandler | |||
| get /schedule/ai/getStrategies returns (AiStrategyResp) | |||
| @handler ScheduleGetAlgorithmsHandler | |||
| get /schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp) | |||
| get /schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp) | |||
| @handler ScheduleSubmitHandler | |||
| post /schedule/submit (ScheduleReq) returns (ScheduleResp) | |||
| @@ -26,7 +26,8 @@ type ( | |||
| AiOption { | |||
| TaskName string `json:"taskName"` | |||
| AiClusterId string `json:"aiClusterId,optional"` | |||
| AdapterId string `json:"adapterId"` | |||
| AiClusterIds []string `json:"aiClusterIds"` | |||
| ResourceType string `json:"resourceType"` | |||
| Tops float64 `json:"Tops,optional"` | |||
| TaskType string `json:"taskType"` | |||
| @@ -47,6 +48,10 @@ type ( | |||
| TaskTypes []string `json:"taskTypes"` | |||
| } | |||
| AiDatasetsReq { | |||
| AdapterId string `path:"adapterId"` | |||
| } | |||
| AiDatasetsResp { | |||
| Datasets []string `json:"datasets"` | |||
| } | |||
| @@ -56,6 +61,7 @@ type ( | |||
| } | |||
| AiAlgorithmsReq { | |||
| AdapterId string `path:"adapterId"` | |||
| ResourceType string `path:"resourceType"` | |||
| TaskType string `path:"taskType"` | |||
| Dataset string `path:"dataset"` | |||
| @@ -1122,7 +1122,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { | |||
| }, | |||
| { | |||
| Method: http.MethodGet, | |||
| Path: "/schedule/ai/getDatasets", | |||
| Path: "/schedule/ai/getDatasets/:adapterId", | |||
| Handler: schedule.ScheduleGetDatasetsHandler(serverCtx), | |||
| }, | |||
| { | |||
| @@ -1132,7 +1132,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { | |||
| }, | |||
| { | |||
| Method: http.MethodGet, | |||
| Path: "/schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset", | |||
| Path: "/schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset", | |||
| Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx), | |||
| }, | |||
| { | |||
| @@ -1,16 +1,24 @@ | |||
| package schedule | |||
| import ( | |||
| "github.com/zeromicro/go-zero/rest/httpx" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/schedule" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | |||
| "net/http" | |||
| ) | |||
| func ScheduleGetDatasetsHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { | |||
| return func(w http.ResponseWriter, r *http.Request) { | |||
| var req types.AiDatasetsReq | |||
| if err := httpx.Parse(r, &req); err != nil { | |||
| result.ParamErrorResult(r, w, err) | |||
| return | |||
| } | |||
| l := schedule.NewScheduleGetDatasetsLogic(r.Context(), svcCtx) | |||
| resp, err := l.ScheduleGetDatasets() | |||
| resp, err := l.ScheduleGetDatasets(&req) | |||
| result.HttpResult(r, w, resp, err) | |||
| } | |||
| } | |||
| @@ -26,7 +26,7 @@ func NewScheduleGetAlgorithmsLogic(ctx context.Context, svcCtx *svc.ServiceConte | |||
| func (l *ScheduleGetAlgorithmsLogic) ScheduleGetAlgorithms(req *types.AiAlgorithmsReq) (resp *types.AiAlgorithmsResp, err error) { | |||
| resp = &types.AiAlgorithmsResp{} | |||
| algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.ResourceCollector, req.ResourceType, req.TaskType, req.Dataset) | |||
| algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId], req.ResourceType, req.TaskType, req.Dataset) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| @@ -3,6 +3,7 @@ package schedule | |||
| import ( | |||
| "context" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" | |||
| @@ -23,9 +24,9 @@ func NewScheduleGetDatasetsLogic(ctx context.Context, svcCtx *svc.ServiceContext | |||
| } | |||
| } | |||
| func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets() (resp *types.AiDatasetsResp, err error) { | |||
| func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets(req *types.AiDatasetsReq) (resp *types.AiDatasetsResp, err error) { | |||
| resp = &types.AiDatasetsResp{} | |||
| names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.ResourceCollector) | |||
| names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId]) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| @@ -27,6 +27,7 @@ func NewScheduleSubmitLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Sc | |||
| func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) { | |||
| resp = &types.ScheduleResp{} | |||
| opt := &option.AiOption{ | |||
| AdapterId: req.AiOption.AdapterId, | |||
| ResourceType: req.AiOption.ResourceType, | |||
| Tops: req.AiOption.Tops, | |||
| TaskType: req.AiOption.TaskType, | |||
| @@ -33,6 +33,21 @@ func (s *AiStorage) GetClustersByAdapterId(id string) (*types.ClusterListResp, e | |||
| return &resp, nil | |||
| } | |||
| func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) { | |||
| var list []types.AdapterInfo | |||
| var ids []string | |||
| db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter") | |||
| db = db.Where("type = ?", adapterType) | |||
| err := db.Order("create_time desc").Find(&list).Error | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| for _, info := range list { | |||
| ids = append(ids, info.Id) | |||
| } | |||
| return ids, nil | |||
| } | |||
| func (s *AiStorage) SaveTask(name string) error { | |||
| // 构建主任务结构体 | |||
| taskModel := models.Task{ | |||
| @@ -20,8 +20,7 @@ import ( | |||
| "github.com/zeromicro/go-zero/core/logx" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/common" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/rpc/client/participantservice" | |||
| @@ -32,16 +31,15 @@ import ( | |||
| ) | |||
| type Scheduler struct { | |||
| task *response.TaskInfo | |||
| participantIds []int64 | |||
| subSchedule SubSchedule | |||
| dbEngin *gorm.DB | |||
| result []string //pID:子任务yamlstring 键值对 | |||
| participantRpc participantservice.ParticipantService | |||
| ResourceCollector *map[string]collector.AiCollector | |||
| AiStorages *database.AiStorage | |||
| AiExecutor *map[string]executor.AiExecutor | |||
| mu sync.RWMutex | |||
| task *response.TaskInfo | |||
| participantIds []int64 | |||
| subSchedule SubSchedule | |||
| dbEngin *gorm.DB | |||
| result []string //pID:子任务yamlstring 键值对 | |||
| participantRpc participantservice.ParticipantService | |||
| AiStorages *database.AiStorage | |||
| AiService *service.AiService | |||
| mu sync.RWMutex | |||
| } | |||
| type SubSchedule interface { | |||
| @@ -59,8 +57,8 @@ func NewScheduler(subSchedule SubSchedule, val string, dbEngin *gorm.DB, partici | |||
| return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil | |||
| } | |||
| func NewSchdlr(resourceCollector *map[string]collector.AiCollector, storages *database.AiStorage, aiExecutor *map[string]executor.AiExecutor) *Scheduler { | |||
| return &Scheduler{ResourceCollector: resourceCollector, AiStorages: storages, AiExecutor: aiExecutor} | |||
| func NewSchdlr(aiService *service.AiService, storages *database.AiStorage) *Scheduler { | |||
| return &Scheduler{AiService: aiService, AiStorages: storages} | |||
| } | |||
| func (s *Scheduler) SpecifyClusters() { | |||
| @@ -64,9 +64,8 @@ func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource strin | |||
| } | |||
| func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) { | |||
| if as.option.AiClusterId != "" { | |||
| // TODO database operation Find | |||
| return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ClusterId: "", Replicas: 1}}, nil | |||
| if len(as.option.ClusterIds) == 1 { | |||
| return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ClusterId: as.option.ClusterIds[0], Replicas: 1}}, nil | |||
| } | |||
| resources, err := as.findClustersWithResources() | |||
| @@ -131,7 +130,7 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa | |||
| var ch = make(chan *AiResult, len(clusters)) | |||
| var errCh = make(chan interface{}, len(clusters)) | |||
| executorMap := *as.AiExecutor | |||
| executorMap := as.AiService.AiExecutorAdapterMap[as.option.AdapterId] | |||
| for _, cluster := range clusters { | |||
| c := cluster | |||
| wg.Add(1) | |||
| @@ -202,13 +201,14 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa | |||
| func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) { | |||
| var wg sync.WaitGroup | |||
| var ch = make(chan *collector.ResourceStats, len(*as.ResourceCollector)) | |||
| var errCh = make(chan interface{}, len(*as.ResourceCollector)) | |||
| var clustersNum = len(as.AiService.AiCollectorAdapterMap[as.option.AdapterId]) | |||
| var ch = make(chan *collector.ResourceStats, clustersNum) | |||
| var errCh = make(chan interface{}, clustersNum) | |||
| var resourceSpecs []*collector.ResourceStats | |||
| var errs []interface{} | |||
| for s, resourceCollector := range *as.ResourceCollector { | |||
| for s, resourceCollector := range as.AiService.AiCollectorAdapterMap[as.option.AdapterId] { | |||
| wg.Add(1) | |||
| rc := resourceCollector | |||
| id := s | |||
| @@ -242,7 +242,7 @@ func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, | |||
| errs = append(errs, e) | |||
| } | |||
| if len(errs) == len(*as.ResourceCollector) { | |||
| if len(errs) == clustersNum { | |||
| return nil, errors.New("get resources failed") | |||
| } | |||
| @@ -1,7 +1,8 @@ | |||
| package option | |||
| type AiOption struct { | |||
| AiClusterId string // shuguangAi /octopus ClusterId | |||
| AdapterId string | |||
| ClusterIds []string | |||
| TaskName string | |||
| ResourceType string // cpu/gpu/compute card | |||
| CpuCoreNum int64 | |||
| @@ -1,11 +1,14 @@ | |||
| package service | |||
| import ( | |||
| "github.com/zeromicro/go-zero/zrpc" | |||
| "gitlink.org.cn/JointCloud/pcm-ac/hpcacclient" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/config" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" | |||
| "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" | |||
| "gitlink.org.cn/JointCloud/pcm-octopus/octopusclient" | |||
| "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice" | |||
| "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice" | |||
| @@ -18,30 +21,60 @@ const ( | |||
| SHUGUANGAI = "shuguangAi" | |||
| ) | |||
| func InitAiClusterMap(octopusRpc octopusclient.Octopus, modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, aCRpc hpcacclient.HpcAC, storages *database.AiStorage) (*map[string]executor.AiExecutor, *map[string]collector.AiCollector) { | |||
| clusters, _ := storages.GetClustersByAdapterId("1777144940459986944") | |||
| type AiService struct { | |||
| AiExecutorAdapterMap map[string]map[string]executor.AiExecutor | |||
| AiCollectorAdapterMap map[string]map[string]collector.AiCollector | |||
| } | |||
| func NewAiService(conf *config.Config, storages *database.AiStorage) (*AiService, error) { | |||
| var aiType = "1" | |||
| adapterIds, err := storages.GetAdapterIdsByType(aiType) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| aiService := &AiService{ | |||
| AiExecutorAdapterMap: make(map[string]map[string]executor.AiExecutor), | |||
| AiCollectorAdapterMap: make(map[string]map[string]collector.AiCollector), | |||
| } | |||
| for _, id := range adapterIds { | |||
| clusters, err := storages.GetClustersByAdapterId(id) | |||
| if err != nil { | |||
| return nil, err | |||
| } | |||
| exeClusterMap, colClusterMap := InitAiClusterMap(conf, clusters.List) | |||
| aiService.AiExecutorAdapterMap[id] = exeClusterMap | |||
| aiService.AiCollectorAdapterMap[id] = colClusterMap | |||
| } | |||
| return aiService, nil | |||
| } | |||
| func InitAiClusterMap(conf *config.Config, clusters []types.ClusterInfo) (map[string]executor.AiExecutor, map[string]collector.AiCollector) { | |||
| executorMap := make(map[string]executor.AiExecutor) | |||
| collectorMap := make(map[string]collector.AiCollector) | |||
| for _, c := range clusters.List { | |||
| for _, c := range clusters { | |||
| switch c.Name { | |||
| case OCTOPUS: | |||
| id, _ := strconv.ParseInt(c.Id, 10, 64) | |||
| octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(conf.OctopusRpcConf)) | |||
| octopus := storeLink.NewOctopusLink(octopusRpc, c.Nickname, id) | |||
| collectorMap[c.Id] = octopus | |||
| executorMap[c.Id] = octopus | |||
| case MODELARTS: | |||
| id, _ := strconv.ParseInt(c.Id, 10, 64) | |||
| modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(conf.ModelArtsRpcConf)) | |||
| modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(conf.ModelArtsImgRpcConf)) | |||
| modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Nickname, id) | |||
| collectorMap[c.Id] = modelarts | |||
| executorMap[c.Id] = modelarts | |||
| case SHUGUANGAI: | |||
| id, _ := strconv.ParseInt(c.Id, 10, 64) | |||
| aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(conf.ACRpcConf)) | |||
| sgai := storeLink.NewShuguangAi(aCRpc, c.Nickname, id) | |||
| collectorMap[c.Id] = sgai | |||
| executorMap[c.Id] = sgai | |||
| } | |||
| } | |||
| return &executorMap, &collectorMap | |||
| return executorMap, collectorMap | |||
| } | |||
| @@ -128,13 +128,13 @@ func GetResourceTypes() []string { | |||
| return resourceTypes | |||
| } | |||
| func GetDatasetsNames(ctx context.Context, collectorMap *map[string]collector.AiCollector) ([]string, error) { | |||
| func GetDatasetsNames(ctx context.Context, collectorMap map[string]collector.AiCollector) ([]string, error) { | |||
| var wg sync.WaitGroup | |||
| var errCh = make(chan interface{}, len(*collectorMap)) | |||
| var errCh = make(chan interface{}, len(collectorMap)) | |||
| var errs []interface{} | |||
| var names []string | |||
| var mu sync.Mutex | |||
| colMap := *collectorMap | |||
| colMap := collectorMap | |||
| for s, col := range colMap { | |||
| wg.Add(1) | |||
| c := col | |||
| @@ -200,14 +200,14 @@ func GetDatasetsNames(ctx context.Context, collectorMap *map[string]collector.Ai | |||
| return names, nil | |||
| } | |||
| func GetAlgorithms(ctx context.Context, collectorMap *map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) { | |||
| func GetAlgorithms(ctx context.Context, collectorMap map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) { | |||
| var names []string | |||
| var wg sync.WaitGroup | |||
| var errCh = make(chan interface{}, len(*collectorMap)) | |||
| var errCh = make(chan interface{}, len(collectorMap)) | |||
| var errs []interface{} | |||
| var mu sync.Mutex | |||
| colMap := *collectorMap | |||
| colMap := collectorMap | |||
| for s, col := range colMap { | |||
| wg.Add(1) | |||
| c := col | |||
| @@ -116,24 +116,28 @@ func NewServiceContext(c config.Config) *ServiceContext { | |||
| }) | |||
| // scheduler | |||
| octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)) | |||
| aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)) | |||
| modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)) | |||
| modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)) | |||
| //octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)) | |||
| //aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)) | |||
| //modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)) | |||
| //modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)) | |||
| storage := &database.AiStorage{DbEngin: dbEngin} | |||
| aiExecutor, resourceCollector := service.InitAiClusterMap(octopusRpc, modelArtsRpc, modelArtsImgRpc, aCRpc, storage) | |||
| scheduler := scheduler.NewSchdlr(resourceCollector, storage, aiExecutor) | |||
| aiService, err := service.NewAiService(&c, storage) | |||
| if err != nil { | |||
| logx.Error(err.Error()) | |||
| return nil | |||
| } | |||
| scheduler := scheduler.NewSchdlr(aiService, storage) | |||
| return &ServiceContext{ | |||
| Cron: cron.New(cron.WithSeconds()), | |||
| DbEngin: dbEngin, | |||
| Config: c, | |||
| RedisClient: redisClient, | |||
| ModelArtsRpc: modelArtsRpc, | |||
| ModelArtsImgRpc: modelArtsImgRpc, | |||
| ModelArtsRpc: modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)), | |||
| ModelArtsImgRpc: imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)), | |||
| CephRpc: cephclient.NewCeph(zrpc.MustNewClient(c.CephRpcConf)), | |||
| ACRpc: aCRpc, | |||
| OctopusRpc: octopusRpc, | |||
| ACRpc: hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)), | |||
| OctopusRpc: octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)), | |||
| OpenstackRpc: openstackclient.NewOpenstack(zrpc.MustNewClient(c.OpenstackRpcConf)), | |||
| K8sRpc: kubernetesclient.NewKubernetes(zrpc.MustNewClient(c.K8sNativeConf)), | |||
| MonitorClient: make(map[int64]tracker.Prometheus), | |||
| @@ -5278,7 +5278,8 @@ type ScheduleResult struct { | |||
| type AiOption struct { | |||
| TaskName string `json:"taskName"` | |||
| AiClusterId string `json:"aiClusterId,optional"` | |||
| AdapterId string `json:"adapterId"` | |||
| AiClusterIds []string `json:"aiClusterIds"` | |||
| ResourceType string `json:"resourceType"` | |||
| Tops float64 `json:"Tops,optional"` | |||
| TaskType string `json:"taskType"` | |||
| @@ -5299,6 +5300,10 @@ type AiTaskTypesResp struct { | |||
| TaskTypes []string `json:"taskTypes"` | |||
| } | |||
| type AiDatasetsReq struct { | |||
| AdapterId string `path:"adapterId"` | |||
| } | |||
| type AiDatasetsResp struct { | |||
| Datasets []string `json:"datasets"` | |||
| } | |||
| @@ -5308,6 +5313,7 @@ type AiStrategyResp struct { | |||
| } | |||
| type AiAlgorithmsReq struct { | |||
| AdapterId string `path:"adapterId"` | |||
| ResourceType string `path:"resourceType"` | |||
| TaskType string `path:"taskType"` | |||
| Dataset string `path:"dataset"` | |||
| @@ -5317,6 +5323,156 @@ type AiAlgorithmsResp struct { | |||
| Algorithms []string `json:"algorithms"` | |||
| } | |||
| type PullTaskInfoReq struct { | |||
| AdapterId int64 `form:"adapterId"` | |||
| } | |||
| type PullTaskInfoResp struct { | |||
| HpcInfoList []*HpcInfo `json:"HpcInfoList,omitempty"` | |||
| CloudInfoList []*CloudInfo `json:"CloudInfoList,omitempty"` | |||
| AiInfoList []*AiInfo `json:"AiInfoList,omitempty"` | |||
| VmInfoList []*VmInfo `json:"VmInfoList,omitempty"` | |||
| } | |||
| type HpcInfo struct { | |||
| Id int64 `json:"id"` // id | |||
| TaskId int64 `json:"task_id"` // 任务id | |||
| JobId string `json:"job_id"` // 作业id(在第三方系统中的作业id) | |||
| AdapterId int64 `json:"adapter_id"` // 执行任务的适配器id | |||
| ClusterId int64 `json:"cluster_id"` // 执行任务的集群id | |||
| ClusterType string `json:"cluster_type"` // 执行任务的集群类型 | |||
| Name string `json:"name"` // 名称 | |||
| Status string `json:"status"` // 状态 | |||
| CmdScript string `json:"cmd_script"` | |||
| StartTime string `json:"start_time"` // 开始时间 | |||
| RunningTime int64 `json:"running_time"` // 运行时间 | |||
| DerivedEs string `json:"derived_es"` | |||
| Cluster string `json:"cluster"` | |||
| BlockId int64 `json:"block_id"` | |||
| AllocNodes int64 `json:"alloc_nodes"` | |||
| AllocCpu int64 `json:"alloc_cpu"` | |||
| CardCount int64 `json:"card_count"` // 卡数 | |||
| Version string `json:"version"` | |||
| Account string `json:"account"` | |||
| WorkDir string `json:"work_dir"` // 工作路径 | |||
| AssocId int64 `json:"assoc_id"` | |||
| ExitCode int64 `json:"exit_code"` | |||
| WallTime string `json:"wall_time"` // 最大运行时间 | |||
| Result string `json:"result"` // 运行结果 | |||
| DeletedAt string `json:"deleted_at"` // 删除时间 | |||
| YamlString string `json:"yaml_string"` | |||
| AppType string `json:"app_type"` // 应用类型 | |||
| AppName string `json:"app_name"` // 应用名称 | |||
| Queue string `json:"queue"` // 队列名称 | |||
| SubmitType string `json:"submit_type"` // cmd(命令行模式) | |||
| NNode string `json:"n_node"` // 节点个数(当指定该参数时,GAP_NODE_STRING必须为"") | |||
| StdOutFile string `json:"std_out_file"` // 工作路径/std.err.%j | |||
| StdErrFile string `json:"std_err_file"` // 工作路径/std.err.%j | |||
| StdInput string `json:"std_input"` | |||
| Environment string `json:"environment"` | |||
| DeletedFlag int64 `json:"deleted_flag"` // 是否删除(0-否,1-是) | |||
| CreatedBy int64 `json:"created_by"` // 创建人 | |||
| CreatedTime string `json:"created_time"` // 创建时间 | |||
| UpdatedBy int64 `json:"updated_by"` // 更新人 | |||
| UpdatedTime string `json:"updated_time"` // 更新时间 | |||
| } | |||
| type CloudInfo struct { | |||
| Participant int64 `json:"participant,omitempty"` | |||
| Id int64 `json:"id,omitempty"` | |||
| TaskId int64 `json:"taskId,omitempty"` | |||
| ApiVersion string `json:"apiVersion,omitempty"` | |||
| Kind string `json:"kind,omitempty"` | |||
| Namespace string `json:"namespace,omitempty"` | |||
| Name string `json:"name,omitempty"` | |||
| Status string `json:"status,omitempty"` | |||
| StartTime string `json:"startTime,omitempty"` | |||
| RunningTime int64 `json:"runningTime,omitempty"` | |||
| Result string `json:"result,omitempty"` | |||
| YamlString string `json:"yamlString,omitempty"` | |||
| } | |||
| type AiInfo struct { | |||
| ParticipantId int64 `json:"participantId,omitempty"` | |||
| TaskId int64 `json:"taskId,omitempty"` | |||
| ProjectId string `json:"project_id,omitempty"` | |||
| Name string `json:"name,omitempty"` | |||
| Status string `json:"status,omitempty"` | |||
| StartTime string `json:"startTime,omitempty"` | |||
| RunningTime int64 `json:"runningTime,omitempty"` | |||
| Result string `json:"result,omitempty"` | |||
| JobId string `json:"jobId,omitempty"` | |||
| CreateTime string `json:"createTime,omitempty"` | |||
| ImageUrl string `json:"imageUrl,omitempty"` | |||
| Command string `json:"command,omitempty"` | |||
| FlavorId string `json:"flavorId,omitempty"` | |||
| SubscriptionId string `json:"subscriptionId,omitempty"` | |||
| ItemVersionId string `json:"itemVersionId,omitempty"` | |||
| } | |||
| type VmInfo struct { | |||
| ParticipantId int64 `json:"participantId,omitempty"` | |||
| TaskId int64 `json:"taskId,omitempty"` | |||
| Name string `json:"name,omitempty"` | |||
| FlavorRef string `json:"flavor_ref,omitempty"` | |||
| ImageRef string `json:"image_ref,omitempty"` | |||
| NetworkUuid string `json:"network_uuid,omitempty"` | |||
| BlockUuid string `json:"block_uuid,omitempty"` | |||
| SourceType string `json:"source_type,omitempty"` | |||
| DeleteOnTermination bool `json:"delete_on_termination,omitempty"` | |||
| Status string `json:"status,omitempty"` | |||
| MinCount string `json:"min_count,omitempty"` | |||
| Platform string `json:"platform,omitempty"` | |||
| Uuid string `json:"uuid,omitempty"` | |||
| } | |||
| type PushTaskInfoReq struct { | |||
| AdapterId int64 `json:"adapterId"` | |||
| HpcInfoList []*HpcInfo `json:"hpcInfoList"` | |||
| CloudInfoList []*CloudInfo `json:"cloudInfoList"` | |||
| AiInfoList []*AiInfo `json:"aiInfoList"` | |||
| VmInfoList []*VmInfo `json:"vmInfoList"` | |||
| } | |||
| type PushTaskInfoResp struct { | |||
| Code int64 `json:"code"` | |||
| Msg string `json:"msg"` | |||
| } | |||
| type PushResourceInfoReq struct { | |||
| AdapterId int64 `json:"adapterId"` | |||
| ResourceStats []ResourceStats `json:"resourceStats"` | |||
| } | |||
| type PushResourceInfoResp struct { | |||
| Code int64 `json:"code"` | |||
| Msg string `json:"msg"` | |||
| } | |||
| type ResourceStats struct { | |||
| ClusterId int64 `json:"clusterId"` | |||
| Name string `json:"name"` | |||
| CpuCoreAvail int64 `json:"cpuCoreAvail"` | |||
| CpuCoreTotal int64 `json:"cpuCoreTotal"` | |||
| MemAvail float64 `json:"memAvail"` | |||
| MemTotal float64 `json:"memTotal"` | |||
| DiskAvail float64 `json:"diskAvail"` | |||
| DiskTotal float64 `json:"diskTotal"` | |||
| GpuAvail int64 `json:"gpuAvail"` | |||
| CardsAvail []*Card `json:"cardsAvail"` | |||
| CpuCoreHours float64 `json:"cpuCoreHours"` | |||
| Balance float64 `json:"balance"` | |||
| } | |||
| type Card struct { | |||
| Platform string `json:"platform"` | |||
| Type string `json:"type"` | |||
| Name string `json:"name"` | |||
| TOpsAtFp16 float64 `json:"TOpsAtFp16"` | |||
| CardHours float64 `json:"cardHours"` | |||
| CardNum int32 `json:"cardNum"` | |||
| } | |||
| type CreateAlertRuleReq struct { | |||
| CLusterId int64 `json:"clusterId"` | |||
| ClusterName string `json:"clusterName"` | |||