Browse Source

modified all missing request context

Former-commit-id: 9cfed9e9e3
pull/88/head
tzwang 1 year ago
parent
commit
96f3a16b71
20 changed files with 213 additions and 198 deletions
  1. +8
    -3
      api/internal/logic/schedule/schedulegetalgorithmslogic.go
  2. +1
    -4
      api/internal/logic/schedule/schedulegetdatasetslogic.go
  3. +2
    -2
      api/internal/logic/storelink/deletelinkimagelogic.go
  4. +2
    -2
      api/internal/logic/storelink/deletelinktasklogic.go
  5. +2
    -2
      api/internal/logic/storelink/getaispecslogic.go
  6. +2
    -2
      api/internal/logic/storelink/getlinkimagelistlogic.go
  7. +2
    -2
      api/internal/logic/storelink/getlinktasklogic.go
  8. +2
    -2
      api/internal/logic/storelink/submitlinktasklogic.go
  9. +2
    -2
      api/internal/logic/storelink/uploadlinkimagelogic.go
  10. +6
    -11
      api/internal/mqs/ScheduleAi.go
  11. +6
    -4
      api/internal/scheduler/schedulers/aiScheduler.go
  12. +8
    -6
      api/internal/scheduler/service/aiService.go
  13. +5
    -3
      api/internal/scheduler/service/collector/collector.go
  14. +2
    -1
      api/internal/scheduler/service/executor/aiExecutor.go
  15. +32
    -31
      api/internal/storeLink/modelarts.go
  16. +43
    -44
      api/internal/storeLink/octopus.go
  17. +15
    -16
      api/internal/storeLink/shuguangHpc.go
  18. +40
    -41
      api/internal/storeLink/shuguangai.go
  19. +16
    -16
      api/internal/storeLink/storeLink.go
  20. +17
    -4
      api/internal/svc/servicecontext.go

+ 8
- 3
api/internal/logic/schedule/schedulegetalgorithmslogic.go View File

@@ -2,6 +2,7 @@ package schedule


import ( import (
"context" "context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"


"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
@@ -24,7 +25,11 @@ func NewScheduleGetAlgorithmsLogic(ctx context.Context, svcCtx *svc.ServiceConte
} }


func (l *ScheduleGetAlgorithmsLogic) ScheduleGetAlgorithms(req *types.AiAlgorithmsReq) (resp *types.AiAlgorithmsResp, err error) { func (l *ScheduleGetAlgorithmsLogic) ScheduleGetAlgorithms(req *types.AiAlgorithmsReq) (resp *types.AiAlgorithmsResp, err error) {
// todo: add your logic here and delete this line

return
resp = &types.AiAlgorithmsResp{}
algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.ResourceCollector, req.ResourceType, req.TaskType, req.Dataset)
if err != nil {
return nil, err
}
resp.Algorithms = algorithms
return resp, nil
} }

+ 1
- 4
api/internal/logic/schedule/schedulegetdatasetslogic.go View File

@@ -2,9 +2,7 @@ package schedule


import ( import (
"context" "context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"

"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"


@@ -27,8 +25,7 @@ func NewScheduleGetDatasetsLogic(ctx context.Context, svcCtx *svc.ServiceContext


func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets() (resp *types.AiDatasetsResp, err error) { func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets() (resp *types.AiDatasetsResp, err error) {
resp = &types.AiDatasetsResp{} resp = &types.AiDatasetsResp{}
_, colMap := service.InitAiClusterMap(l.ctx, l.svcCtx)
names, err := storeLink.GetDatasetsNames(colMap)
names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.ResourceCollector)
if err != nil { if err != nil {
return nil, err return nil, err
} }


+ 2
- 2
api/internal/logic/storelink/deletelinkimagelogic.go View File

@@ -47,12 +47,12 @@ func (l *DeleteLinkImageLogic) DeleteLinkImage(req *types.DeleteLinkImageReq) (r
return resp, nil return resp, nil
} }


storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant)
storelink := storeLink.NewStoreLink(l.svcCtx, participant)
if storelink == nil { if storelink == nil {
return nil, nil return nil, nil
} }


img, err := storelink.ILinkage.DeleteImage(req.ImageId)
img, err := storelink.ILinkage.DeleteImage(l.ctx, req.ImageId)
if err != nil { if err != nil {
return nil, err return nil, err
} }


+ 2
- 2
api/internal/logic/storelink/deletelinktasklogic.go View File

@@ -47,12 +47,12 @@ func (l *DeleteLinkTaskLogic) DeleteLinkTask(req *types.DeleteLinkTaskReq) (resp
return resp, nil return resp, nil
} }


storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant)
storelink := storeLink.NewStoreLink(l.svcCtx, participant)
if storelink == nil { if storelink == nil {
return nil, nil return nil, nil
} }


task, err := storelink.ILinkage.DeleteTask(req.TaskId)
task, err := storelink.ILinkage.DeleteTask(l.ctx, req.TaskId)
if err != nil { if err != nil {
return nil, err return nil, err
} }


+ 2
- 2
api/internal/logic/storelink/getaispecslogic.go View File

@@ -47,12 +47,12 @@ func (l *GetAISpecsLogic) GetAISpecs(req *types.GetResourceSpecsReq) (resp *type
return resp, nil return resp, nil
} }


storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant)
storelink := storeLink.NewStoreLink(l.svcCtx, participant)
if storelink == nil { if storelink == nil {
return nil, nil return nil, nil
} }


specs, err := storelink.ILinkage.QuerySpecs()
specs, err := storelink.ILinkage.QuerySpecs(l.ctx)
if err != nil { if err != nil {
return nil, err return nil, err
} }


+ 2
- 2
api/internal/logic/storelink/getlinkimagelistlogic.go View File

@@ -47,12 +47,12 @@ func (l *GetLinkImageListLogic) GetLinkImageList(req *types.GetLinkImageListReq)
return resp, nil return resp, nil
} }


storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant)
storelink := storeLink.NewStoreLink(l.svcCtx, participant)
if storelink == nil { if storelink == nil {
return nil, nil return nil, nil
} }


list, err := storelink.ILinkage.QueryImageList()
list, err := storelink.ILinkage.QueryImageList(l.ctx)
if err != nil { if err != nil {
return nil, err return nil, err
} }


+ 2
- 2
api/internal/logic/storelink/getlinktasklogic.go View File

@@ -48,12 +48,12 @@ func (l *GetLinkTaskLogic) GetLinkTask(req *types.GetLinkTaskReq) (resp *types.G
return resp, nil return resp, nil
} }


storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant)
storelink := storeLink.NewStoreLink(l.svcCtx, participant)
if storelink == nil { if storelink == nil {
return nil, nil return nil, nil
} }


task, err := storelink.ILinkage.QueryTask(req.TaskId)
task, err := storelink.ILinkage.QueryTask(l.ctx, req.TaskId)
if err != nil { if err != nil {
return nil, err return nil, err
} }


+ 2
- 2
api/internal/logic/storelink/submitlinktasklogic.go View File

@@ -48,7 +48,7 @@ func (l *SubmitLinkTaskLogic) SubmitLinkTask(req *types.SubmitLinkTaskReq) (resp
return resp, nil return resp, nil
} }


storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant)
storelink := storeLink.NewStoreLink(l.svcCtx, participant)
if storelink == nil { if storelink == nil {
return nil, nil return nil, nil
} }
@@ -67,7 +67,7 @@ func (l *SubmitLinkTaskLogic) SubmitLinkTask(req *types.SubmitLinkTaskReq) (resp
envs = append(envs, env) envs = append(envs, env)
} }
} }
task, err := storelink.ILinkage.SubmitTask(req.ImageId, req.Cmd, envs, params, req.ResourceId, "", "", "pytorch")
task, err := storelink.ILinkage.SubmitTask(l.ctx, req.ImageId, req.Cmd, envs, params, req.ResourceId, "", "", "pytorch")
if err != nil { if err != nil {
return nil, err return nil, err
} }


+ 2
- 2
api/internal/logic/storelink/uploadlinkimagelogic.go View File

@@ -48,12 +48,12 @@ func (l *UploadLinkImageLogic) UploadLinkImage(req *types.UploadLinkImageReq) (r
return resp, nil return resp, nil
} }


storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant)
storelink := storeLink.NewStoreLink(l.svcCtx, participant)
if storelink == nil { if storelink == nil {
return nil, nil return nil, nil
} }


img, err := storelink.ILinkage.UploadImage(req.FilePath)
img, err := storelink.ILinkage.UploadImage(l.ctx, req.FilePath)
if err != nil { if err != nil {
return nil, err return nil, err
} }


+ 6
- 11
api/internal/mqs/ScheduleAi.go View File

@@ -16,9 +16,7 @@ package mqs


import ( import (
"context" "context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
) )


@@ -27,25 +25,22 @@ import (
Listening to the payment flow status change notification message queue Listening to the payment flow status change notification message queue
*/ */
type AiQueue struct { type AiQueue struct {
ctx context.Context
svcCtx *svc.ServiceContext
scheduler *scheduler.Scheduler
ctx context.Context
svcCtx *svc.ServiceContext
} }


func NewAiMq(ctx context.Context, svcCtx *svc.ServiceContext) *AiQueue { func NewAiMq(ctx context.Context, svcCtx *svc.ServiceContext) *AiQueue {
aiExecutorMap, aiCollectorMap := service.InitAiClusterMap(ctx, svcCtx)
return &AiQueue{ return &AiQueue{
ctx: ctx,
svcCtx: svcCtx,
scheduler: scheduler.NewSchdlr(aiCollectorMap, nil, aiExecutorMap),
ctx: ctx,
svcCtx: svcCtx,
} }
} }


func (l *AiQueue) Consume(val string) error { func (l *AiQueue) Consume(val string) error {
aiSchdl, _ := schedulers.NewAiScheduler(val, l.scheduler, nil)
aiSchdl, _ := schedulers.NewAiScheduler(l.ctx, val, l.svcCtx.Scheduler, nil)


// 调度算法 // 调度算法
err := l.scheduler.AssignAndSchedule(aiSchdl)
err := l.svcCtx.Scheduler.AssignAndSchedule(aiSchdl)
if err != nil { if err != nil {
return err return err
} }


+ 6
- 4
api/internal/scheduler/schedulers/aiScheduler.go View File

@@ -15,6 +15,7 @@
package schedulers package schedulers


import ( import (
"context"
"errors" "errors"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
@@ -32,10 +33,11 @@ type AiScheduler struct {
task *response.TaskInfo task *response.TaskInfo
*scheduler.Scheduler *scheduler.Scheduler
option *option.AiOption option *option.AiOption
ctx context.Context
} }


func NewAiScheduler(val string, scheduler *scheduler.Scheduler, option *option.AiOption) (*AiScheduler, error) {
return &AiScheduler{yamlString: val, Scheduler: scheduler, option: option}, nil
func NewAiScheduler(ctx context.Context, val string, scheduler *scheduler.Scheduler, option *option.AiOption) (*AiScheduler, error) {
return &AiScheduler{ctx: ctx, yamlString: val, Scheduler: scheduler, option: option}, nil
} }


func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
@@ -104,7 +106,7 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error {
continue continue
} }
go func() { go func() {
_, err := executorMap[c.Name].Execute(as.option)
_, err := executorMap[c.Name].Execute(as.ctx, as.option)
if err != nil { if err != nil {
// TODO: database operation // TODO: database operation
} }
@@ -127,7 +129,7 @@ func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats,
wg.Add(1) wg.Add(1)
rc := resourceCollector rc := resourceCollector
go func() { go func() {
spec, err := rc.GetResourceStats()
spec, err := rc.GetResourceStats(as.ctx)
if err != nil { if err != nil {
errCh <- err errCh <- err
wg.Done() wg.Done()


+ 8
- 6
api/internal/scheduler/service/aiService.go View File

@@ -1,11 +1,13 @@
package service package service


import ( import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient"
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice"
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice"
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient"
) )


const ( const (
@@ -24,21 +26,21 @@ var (
} }
) )


func InitAiClusterMap(ctx context.Context, svcCtx *svc.ServiceContext) (*map[string]executor.AiExecutor, *map[string]collector.AiCollector) {
func InitAiClusterMap(octopusRpc octopusclient.Octopus, modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, aCRpc hpcacclient.HpcAC) (*map[string]executor.AiExecutor, *map[string]collector.AiCollector) {
executorMap := make(map[string]executor.AiExecutor) executorMap := make(map[string]executor.AiExecutor)
collectorMap := make(map[string]collector.AiCollector) collectorMap := make(map[string]collector.AiCollector)
for k, v := range AiTypeMap { for k, v := range AiTypeMap {
switch v { switch v {
case OCTOPUS: case OCTOPUS:
octopus := storeLink.NewOctopusLink(ctx, svcCtx, k, 0)
octopus := storeLink.NewOctopusLink(octopusRpc, k, 0)
collectorMap[k] = octopus collectorMap[k] = octopus
executorMap[k] = octopus executorMap[k] = octopus
case MODELARTS: case MODELARTS:
modelarts := storeLink.NewModelArtsLink(ctx, svcCtx, k, 0)
modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, k, 0)
collectorMap[k] = modelarts collectorMap[k] = modelarts
executorMap[k] = modelarts executorMap[k] = modelarts
case SHUGUANGAI: case SHUGUANGAI:
sgai := storeLink.NewShuguangAi(ctx, svcCtx, k, 0)
sgai := storeLink.NewShuguangAi(aCRpc, k, 0)
collectorMap[k] = sgai collectorMap[k] = sgai
executorMap[k] = sgai executorMap[k] = sgai
} }


+ 5
- 3
api/internal/scheduler/service/collector/collector.go View File

@@ -1,9 +1,11 @@
package collector package collector


import "context"

type AiCollector interface { type AiCollector interface {
GetResourceStats() (*ResourceStats, error)
GetDatasetsSpecs() ([]*DatasetsSpecs, error)
GetAlgorithms() ([]*Algorithm, error)
GetResourceStats(ctx context.Context) (*ResourceStats, error)
GetDatasetsSpecs(ctx context.Context) ([]*DatasetsSpecs, error)
GetAlgorithms(ctx context.Context) ([]*Algorithm, error)
} }


type ResourceStats struct { type ResourceStats struct {


+ 2
- 1
api/internal/scheduler/service/executor/aiExecutor.go View File

@@ -1,9 +1,10 @@
package executor package executor


import ( import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
) )


type AiExecutor interface { type AiExecutor interface {
Execute(option *option.AiOption) (interface{}, error)
Execute(ctx context.Context, option *option.AiOption) (interface{}, error)
} }

+ 32
- 31
api/internal/storeLink/modelarts.go View File

@@ -18,44 +18,45 @@ import (
"context" "context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice"
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice"
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/modelarts" "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/modelarts"
"strconv" "strconv"
"strings" "strings"
) )


type ModelArtsLink struct { type ModelArtsLink struct {
ctx context.Context
svcCtx *svc.ServiceContext
platform string
participantId int64
pageIndex int32
pageSize int32
modelArtsRpc modelartsservice.ModelArtsService
modelArtsImgRpc imagesservice.ImagesService
platform string
participantId int64
pageIndex int32
pageSize int32
} }


func NewModelArtsLink(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ModelArtsLink {
return &ModelArtsLink{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id, pageIndex: 1, pageSize: 100}
func NewModelArtsLink(modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, name string, id int64) *ModelArtsLink {
return &ModelArtsLink{modelArtsRpc: modelArtsRpc, modelArtsImgRpc: modelArtsImgRpc, platform: name, participantId: id, pageIndex: 1, pageSize: 100}
} }


func (m *ModelArtsLink) UploadImage(path string) (interface{}, error) {
func (m *ModelArtsLink) UploadImage(ctx context.Context, path string) (interface{}, error) {
//TODO modelArts上传镜像 //TODO modelArts上传镜像
return nil, nil return nil, nil
} }


func (m *ModelArtsLink) DeleteImage(imageId string) (interface{}, error) {
func (m *ModelArtsLink) DeleteImage(ctx context.Context, imageId string) (interface{}, error) {
// TODO modelArts删除镜像 // TODO modelArts删除镜像
return nil, nil return nil, nil
} }


func (m *ModelArtsLink) QueryImageList() (interface{}, error) {
func (m *ModelArtsLink) QueryImageList(ctx context.Context) (interface{}, error) {
// modelArts获取镜像列表 // modelArts获取镜像列表
req := &modelarts.ListRepoReq{ req := &modelarts.ListRepoReq{
Offset: "0", Offset: "0",
Limit: strconv.Itoa(int(m.pageSize)), Limit: strconv.Itoa(int(m.pageSize)),
Platform: m.platform, Platform: m.platform,
} }
resp, err := m.svcCtx.ModelArtsImgRpc.ListReposDetails(m.ctx, req)
resp, err := m.modelArtsImgRpc.ListReposDetails(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -63,7 +64,7 @@ func (m *ModelArtsLink) QueryImageList() (interface{}, error) {
return resp, nil return resp, nil
} }


func (m *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) {
func (m *ModelArtsLink) SubmitTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) {
// modelArts提交任务 // modelArts提交任务
environments := make(map[string]string) environments := make(map[string]string)
parameters := make([]*modelarts.ParametersTrainJob, 0) parameters := make([]*modelarts.ParametersTrainJob, 0)
@@ -100,7 +101,7 @@ func (m *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, pa
}, },
Platform: m.platform, Platform: m.platform,
} }
resp, err := m.svcCtx.ModelArtsRpc.CreateTrainingJob(m.ctx, req)
resp, err := m.modelArtsRpc.CreateTrainingJob(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -108,13 +109,13 @@ func (m *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, pa
return resp, nil return resp, nil
} }


func (m *ModelArtsLink) QueryTask(taskId string) (interface{}, error) {
func (m *ModelArtsLink) QueryTask(ctx context.Context, taskId string) (interface{}, error) {
// 获取任务 // 获取任务
req := &modelarts.DetailTrainingJobsReq{ req := &modelarts.DetailTrainingJobsReq{
TrainingJobId: taskId, TrainingJobId: taskId,
Platform: m.platform, Platform: m.platform,
} }
resp, err := m.svcCtx.ModelArtsRpc.GetTrainingJobs(m.ctx, req)
resp, err := m.modelArtsRpc.GetTrainingJobs(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -122,13 +123,13 @@ func (m *ModelArtsLink) QueryTask(taskId string) (interface{}, error) {
return resp, nil return resp, nil
} }


func (m *ModelArtsLink) DeleteTask(taskId string) (interface{}, error) {
func (m *ModelArtsLink) DeleteTask(ctx context.Context, taskId string) (interface{}, error) {
// 删除任务 // 删除任务
req := &modelarts.DeleteTrainingJobReq{ req := &modelarts.DeleteTrainingJobReq{
TrainingJobId: taskId, TrainingJobId: taskId,
Platform: m.platform, Platform: m.platform,
} }
resp, err := m.svcCtx.ModelArtsRpc.DeleteTrainingJob(m.ctx, req)
resp, err := m.modelArtsRpc.DeleteTrainingJob(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -136,12 +137,12 @@ func (m *ModelArtsLink) DeleteTask(taskId string) (interface{}, error) {
return resp, nil return resp, nil
} }


func (m *ModelArtsLink) QuerySpecs() (interface{}, error) {
func (m *ModelArtsLink) QuerySpecs(ctx context.Context) (interface{}, error) {
// octopus查询资源规格 // octopus查询资源规格
req := &modelarts.TrainingJobFlavorsReq{ req := &modelarts.TrainingJobFlavorsReq{
Platform: m.platform, Platform: m.platform,
} }
resp, err := m.svcCtx.ModelArtsRpc.GetTrainingJobFlavors(m.ctx, req)
resp, err := m.modelArtsRpc.GetTrainingJobFlavors(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -149,32 +150,32 @@ func (m *ModelArtsLink) QuerySpecs() (interface{}, error) {
return resp, nil return resp, nil
} }


func (m *ModelArtsLink) GetResourceStats() (*collector.ResourceStats, error) {
func (m *ModelArtsLink) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) {
return nil, nil return nil, nil
} }


func (m *ModelArtsLink) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) {
func (m *ModelArtsLink) GetDatasetsSpecs(ctx context.Context) ([]*collector.DatasetsSpecs, error) {
return nil, nil return nil, nil
} }


func (m *ModelArtsLink) GetAlgorithms() ([]*collector.Algorithm, error) {
func (m *ModelArtsLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm, error) {
return nil, nil return nil, nil
} }


func (m *ModelArtsLink) Execute(option *option.AiOption) (interface{}, error) {
err := m.GenerateSubmitParams(option)
func (m *ModelArtsLink) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) {
err := m.GenerateSubmitParams(ctx, option)
if err != nil { if err != nil {
return nil, err return nil, err
} }
task, err := m.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType)
task, err := m.SubmitTask(ctx, option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return task, nil return task, nil
} }


func (m *ModelArtsLink) GenerateSubmitParams(option *option.AiOption) error {
err := m.generateResourceId(option)
func (m *ModelArtsLink) GenerateSubmitParams(ctx context.Context, option *option.AiOption) error {
err := m.generateResourceId(ctx, option)
if err != nil { if err != nil {
return err return err
} }
@@ -197,8 +198,8 @@ func (m *ModelArtsLink) GenerateSubmitParams(option *option.AiOption) error {
return nil return nil
} }


func (m *ModelArtsLink) generateResourceId(option *option.AiOption) error {
_, err := m.QuerySpecs()
func (m *ModelArtsLink) generateResourceId(ctx context.Context, option *option.AiOption) error {
_, err := m.QuerySpecs(ctx)
if err != nil { if err != nil {
return err return err
} }


+ 43
- 44
api/internal/storeLink/octopus.go View File

@@ -19,17 +19,16 @@ import (
"errors" "errors"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus" "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus"
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient"
"math" "math"
"strconv" "strconv"
"strings" "strings"
) )


type OctopusLink struct { type OctopusLink struct {
ctx context.Context
svcCtx *svc.ServiceContext
octopusRpc octopusclient.Octopus
pageIndex int32 pageIndex int32
pageSize int32 pageSize int32
platform string platform string
@@ -66,11 +65,11 @@ var (
} }
) )


func NewOctopusLink(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *OctopusLink {
return &OctopusLink{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id, pageIndex: 1, pageSize: 100}
func NewOctopusLink(octopusRpc octopusclient.Octopus, name string, id int64) *OctopusLink {
return &OctopusLink{octopusRpc: octopusRpc, platform: name, participantId: id, pageIndex: 1, pageSize: 100}
} }


func (o *OctopusLink) UploadImage(path string) (interface{}, error) {
func (o *OctopusLink) UploadImage(ctx context.Context, path string) (interface{}, error) {
// octopus创建镜像 // octopus创建镜像
createReq := &octopus.CreateImageReq{ createReq := &octopus.CreateImageReq{
Platform: o.platform, Platform: o.platform,
@@ -80,7 +79,7 @@ func (o *OctopusLink) UploadImage(path string) (interface{}, error) {
ImageVersion: IMG_VERSION_PREFIX + utils.RandomString(7), ImageVersion: IMG_VERSION_PREFIX + utils.RandomString(7),
}, },
} }
createResp, err := o.svcCtx.OctopusRpc.CreateImage(o.ctx, createReq)
createResp, err := o.octopusRpc.CreateImage(ctx, createReq)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -94,7 +93,7 @@ func (o *OctopusLink) UploadImage(path string) (interface{}, error) {
FileName: "", FileName: "",
}, },
} }
uploadResp, err := o.svcCtx.OctopusRpc.UploadImage(o.ctx, uploadReq)
uploadResp, err := o.octopusRpc.UploadImage(ctx, uploadReq)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -104,13 +103,13 @@ func (o *OctopusLink) UploadImage(path string) (interface{}, error) {
return uploadResp, nil return uploadResp, nil
} }


func (o *OctopusLink) DeleteImage(imageId string) (interface{}, error) {
func (o *OctopusLink) DeleteImage(ctx context.Context, imageId string) (interface{}, error) {
// octopus删除镜像 // octopus删除镜像
req := &octopus.DeleteImageReq{ req := &octopus.DeleteImageReq{
Platform: o.platform, Platform: o.platform,
ImageId: imageId, ImageId: imageId,
} }
resp, err := o.svcCtx.OctopusRpc.DeleteImage(o.ctx, req)
resp, err := o.octopusRpc.DeleteImage(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -118,14 +117,14 @@ func (o *OctopusLink) DeleteImage(imageId string) (interface{}, error) {
return resp, nil return resp, nil
} }


func (o *OctopusLink) QueryImageList() (interface{}, error) {
func (o *OctopusLink) QueryImageList(ctx context.Context) (interface{}, error) {
// octopus获取镜像列表 // octopus获取镜像列表
req := &octopus.GetUserImageListReq{ req := &octopus.GetUserImageListReq{
Platform: o.platform, Platform: o.platform,
PageIndex: o.pageIndex, PageIndex: o.pageIndex,
PageSize: o.pageSize, PageSize: o.pageSize,
} }
resp, err := o.svcCtx.OctopusRpc.GetUserImageList(o.ctx, req)
resp, err := o.octopusRpc.GetUserImageList(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -133,7 +132,7 @@ func (o *OctopusLink) QueryImageList() (interface{}, error) {
return resp, nil return resp, nil
} }


func (o *OctopusLink) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) {
func (o *OctopusLink) SubmitTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) {
// octopus提交任务 // octopus提交任务


// python参数 // python参数
@@ -176,7 +175,7 @@ func (o *OctopusLink) SubmitTask(imageId string, cmd string, envs []string, para
AlgorithmVersion: VERSION, AlgorithmVersion: VERSION,
}, },
} }
resp, err := o.svcCtx.OctopusRpc.CreateTrainJob(o.ctx, req)
resp, err := o.octopusRpc.CreateTrainJob(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -184,13 +183,13 @@ func (o *OctopusLink) SubmitTask(imageId string, cmd string, envs []string, para
return resp, nil return resp, nil
} }


func (o *OctopusLink) QueryTask(taskId string) (interface{}, error) {
func (o *OctopusLink) QueryTask(ctx context.Context, taskId string) (interface{}, error) {
// octopus获取任务 // octopus获取任务
req := &octopus.GetTrainJobReq{ req := &octopus.GetTrainJobReq{
Platform: o.platform, Platform: o.platform,
Id: taskId, Id: taskId,
} }
resp, err := o.svcCtx.OctopusRpc.GetTrainJob(o.ctx, req)
resp, err := o.octopusRpc.GetTrainJob(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -198,13 +197,13 @@ func (o *OctopusLink) QueryTask(taskId string) (interface{}, error) {
return resp, nil return resp, nil
} }


func (o *OctopusLink) DeleteTask(taskId string) (interface{}, error) {
func (o *OctopusLink) DeleteTask(ctx context.Context, taskId string) (interface{}, error) {
// octopus删除任务 // octopus删除任务
req := &octopus.DeleteTrainJobReq{ req := &octopus.DeleteTrainJobReq{
Platform: o.platform, Platform: o.platform,
JobIds: []string{taskId}, JobIds: []string{taskId},
} }
resp, err := o.svcCtx.OctopusRpc.DeleteTrainJob(o.ctx, req)
resp, err := o.octopusRpc.DeleteTrainJob(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -212,13 +211,13 @@ func (o *OctopusLink) DeleteTask(taskId string) (interface{}, error) {
return resp, nil return resp, nil
} }


func (o *OctopusLink) QuerySpecs() (interface{}, error) {
func (o *OctopusLink) QuerySpecs(ctx context.Context) (interface{}, error) {
// octopus查询资源规格 // octopus查询资源规格
req := &octopus.GetResourceSpecsReq{ req := &octopus.GetResourceSpecsReq{
Platform: o.platform, Platform: o.platform,
ResourcePool: RESOURCE_POOL, ResourcePool: RESOURCE_POOL,
} }
resp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req)
resp, err := o.octopusRpc.GetResourceSpecs(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -226,12 +225,12 @@ func (o *OctopusLink) QuerySpecs() (interface{}, error) {
return resp, nil return resp, nil
} }


func (o *OctopusLink) GetResourceStats() (*collector.ResourceStats, error) {
func (o *OctopusLink) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) {
req := &octopus.GetResourceSpecsReq{ req := &octopus.GetResourceSpecsReq{
Platform: o.platform, Platform: o.platform,
ResourcePool: RESOURCE_POOL, ResourcePool: RESOURCE_POOL,
} }
specResp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req)
specResp, err := o.octopusRpc.GetResourceSpecs(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -241,7 +240,7 @@ func (o *OctopusLink) GetResourceStats() (*collector.ResourceStats, error) {
balanceReq := &octopus.GetUserBalanceReq{ balanceReq := &octopus.GetUserBalanceReq{
Platform: o.platform, Platform: o.platform,
} }
balanceResp, err := o.svcCtx.OctopusRpc.GetUserBalance(o.ctx, balanceReq)
balanceResp, err := o.octopusRpc.GetUserBalance(ctx, balanceReq)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -294,13 +293,13 @@ func (o *OctopusLink) GetResourceStats() (*collector.ResourceStats, error) {
return resourceStats, nil return resourceStats, nil
} }


func (o *OctopusLink) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) {
func (o *OctopusLink) GetDatasetsSpecs(ctx context.Context) ([]*collector.DatasetsSpecs, error) {
req := &octopus.GetMyDatasetListReq{ req := &octopus.GetMyDatasetListReq{
Platform: o.platform, Platform: o.platform,
PageIndex: o.pageIndex, PageIndex: o.pageIndex,
PageSize: o.pageSize, PageSize: o.pageSize,
} }
resp, err := o.svcCtx.OctopusRpc.GetMyDatasetList(o.ctx, req)
resp, err := o.octopusRpc.GetMyDatasetList(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -315,7 +314,7 @@ func (o *OctopusLink) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) {
return specs, nil return specs, nil
} }


func (o *OctopusLink) GetAlgorithms() ([]*collector.Algorithm, error) {
func (o *OctopusLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm, error) {
var algorithms []*collector.Algorithm var algorithms []*collector.Algorithm


req := &octopus.GetMyAlgorithmListReq{ req := &octopus.GetMyAlgorithmListReq{
@@ -323,7 +322,7 @@ func (o *OctopusLink) GetAlgorithms() ([]*collector.Algorithm, error) {
PageIndex: o.pageIndex, PageIndex: o.pageIndex,
PageSize: o.pageSize, PageSize: o.pageSize,
} }
resp, err := o.svcCtx.OctopusRpc.GetMyAlgorithmList(o.ctx, req)
resp, err := o.octopusRpc.GetMyAlgorithmList(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -338,32 +337,32 @@ func (o *OctopusLink) GetAlgorithms() ([]*collector.Algorithm, error) {
return algorithms, nil return algorithms, nil
} }


func (o *OctopusLink) Execute(option *option.AiOption) (interface{}, error) {
err := o.GenerateSubmitParams(option)
func (o *OctopusLink) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) {
err := o.GenerateSubmitParams(ctx, option)
if err != nil { if err != nil {
return nil, err return nil, err
} }
task, err := o.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType)
task, err := o.SubmitTask(ctx, option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return task, nil return task, nil
} }


func (o *OctopusLink) GenerateSubmitParams(option *option.AiOption) error {
err := o.generateResourceId(option)
func (o *OctopusLink) GenerateSubmitParams(ctx context.Context, option *option.AiOption) error {
err := o.generateResourceId(ctx, option)
if err != nil { if err != nil {
return err return err
} }
err = o.generateDatasetsId(option)
err = o.generateDatasetsId(ctx, option)
if err != nil { if err != nil {
return err return err
} }
err = o.generateImageId(option)
err = o.generateImageId(ctx, option)
if err != nil { if err != nil {
return err return err
} }
err = o.generateAlgorithmId(option)
err = o.generateAlgorithmId(ctx, option)
if err != nil { if err != nil {
return err return err
} }
@@ -382,7 +381,7 @@ func (o *OctopusLink) GenerateSubmitParams(option *option.AiOption) error {
return nil return nil
} }


func (o *OctopusLink) generateResourceId(option *option.AiOption) error {
func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiOption) error {
if option.ResourceType == "" { if option.ResourceType == "" {
return errors.New("ResourceType not set") return errors.New("ResourceType not set")
} }
@@ -390,7 +389,7 @@ func (o *OctopusLink) generateResourceId(option *option.AiOption) error {
Platform: o.platform, Platform: o.platform,
ResourcePool: RESOURCE_POOL, ResourcePool: RESOURCE_POOL,
} }
specResp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req)
specResp, err := o.octopusRpc.GetResourceSpecs(ctx, req)
if err != nil { if err != nil {
return err return err
} }
@@ -418,7 +417,7 @@ func (o *OctopusLink) generateResourceId(option *option.AiOption) error {
return errors.New("failed to get ResourceId") return errors.New("failed to get ResourceId")
} }


func (o *OctopusLink) generateDatasetsId(option *option.AiOption) error {
func (o *OctopusLink) generateDatasetsId(ctx context.Context, option *option.AiOption) error {
if option.DatasetsName == "" { if option.DatasetsName == "" {
return errors.New("DatasetsName not set") return errors.New("DatasetsName not set")
} }
@@ -427,7 +426,7 @@ func (o *OctopusLink) generateDatasetsId(option *option.AiOption) error {
PageIndex: o.pageIndex, PageIndex: o.pageIndex,
PageSize: o.pageSize, PageSize: o.pageSize,
} }
resp, err := o.svcCtx.OctopusRpc.GetMyDatasetList(o.ctx, req)
resp, err := o.octopusRpc.GetMyDatasetList(ctx, req)
if err != nil { if err != nil {
return err return err
} }
@@ -443,7 +442,7 @@ func (o *OctopusLink) generateDatasetsId(option *option.AiOption) error {
return errors.New("failed to get DatasetsId") return errors.New("failed to get DatasetsId")
} }


func (o *OctopusLink) generateImageId(option *option.AiOption) error {
func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOption) error {
if option.TaskType == "" { if option.TaskType == "" {
return errors.New("TaskType not set") return errors.New("TaskType not set")
} }
@@ -453,7 +452,7 @@ func (o *OctopusLink) generateImageId(option *option.AiOption) error {
PageIndex: o.pageIndex, PageIndex: o.pageIndex,
PageSize: o.pageSize, PageSize: o.pageSize,
} }
resp, err := o.svcCtx.OctopusRpc.GetUserImageList(o.ctx, req)
resp, err := o.octopusRpc.GetUserImageList(ctx, req)
if err != nil { if err != nil {
return err return err
} }
@@ -475,7 +474,7 @@ func (o *OctopusLink) generateImageId(option *option.AiOption) error {
PageIndex: o.pageIndex, PageIndex: o.pageIndex,
PageSize: o.pageSize, PageSize: o.pageSize,
} }
preImgResp, err := o.svcCtx.OctopusRpc.GetPresetImageList(o.ctx, preImgReq)
preImgResp, err := o.octopusRpc.GetPresetImageList(ctx, preImgReq)
if err != nil { if err != nil {
return err return err
} }
@@ -495,7 +494,7 @@ func (o *OctopusLink) generateImageId(option *option.AiOption) error {
return errors.New("failed to get ImageId") return errors.New("failed to get ImageId")
} }


func (o *OctopusLink) generateAlgorithmId(option *option.AiOption) error {
func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.AiOption) error {
// temporarily set algorithm to cnn // temporarily set algorithm to cnn
if option.AlgorithmName == "" { if option.AlgorithmName == "" {
switch option.DatasetsName { switch option.DatasetsName {
@@ -511,7 +510,7 @@ func (o *OctopusLink) generateAlgorithmId(option *option.AiOption) error {
PageIndex: o.pageIndex, PageIndex: o.pageIndex,
PageSize: o.pageSize, PageSize: o.pageSize,
} }
resp, err := o.svcCtx.OctopusRpc.GetMyAlgorithmList(o.ctx, req)
resp, err := o.octopusRpc.GetMyAlgorithmList(ctx, req)
if err != nil { if err != nil {
return err return err
} }


+ 15
- 16
api/internal/storeLink/shuguangHpc.go View File

@@ -4,17 +4,16 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC" "gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient"
"strconv" "strconv"
"strings" "strings"
) )


type ShuguangHpc struct { type ShuguangHpc struct {
ctx context.Context
svcCtx *svc.ServiceContext
aCRpc hpcacclient.HpcAC
platform string platform string
participantId int64 participantId int64
} }
@@ -128,23 +127,23 @@ type ResourceSpecHpc struct {
GAP_NDCU string GAP_NDCU string
} }


func NewShuguangHpc(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ShuguangHpc {
return &ShuguangHpc{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id}
func NewShuguangHpc(aCRpc hpcacclient.HpcAC, name string, id int64) *ShuguangHpc {
return &ShuguangHpc{aCRpc: aCRpc, platform: name, participantId: id}
} }


func (s ShuguangHpc) UploadImage(path string) (interface{}, error) {
func (s ShuguangHpc) UploadImage(ctx context.Context, path string) (interface{}, error) {
return nil, nil return nil, nil
} }


func (s ShuguangHpc) DeleteImage(imageId string) (interface{}, error) {
func (s ShuguangHpc) DeleteImage(ctx context.Context, imageId string) (interface{}, error) {
return nil, nil return nil, nil
} }


func (s ShuguangHpc) QueryImageList() (interface{}, error) {
func (s ShuguangHpc) QueryImageList(ctx context.Context) (interface{}, error) {
return nil, nil return nil, nil
} }


func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) {
func (s ShuguangHpc) SubmitTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) {
// shuguangHpc提交任务 // shuguangHpc提交任务


//判断是否resourceId匹配自定义资源Id //判断是否resourceId匹配自定义资源Id
@@ -194,7 +193,7 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param


updateSGHpcRequestByResourceId(resourceId, req) updateSGHpcRequestByResourceId(resourceId, req)


resp, err := s.svcCtx.ACRpc.SubmitJob(s.ctx, req)
resp, err := s.aCRpc.SubmitJob(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -203,12 +202,12 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param


} }


func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) {
func (s ShuguangHpc) QueryTask(ctx context.Context, taskId string) (interface{}, error) {
//实时作业 //实时作业
reqC := &hpcAC.JobDetailReq{ reqC := &hpcAC.JobDetailReq{
JobId: taskId, JobId: taskId,
} }
respC, err := s.svcCtx.ACRpc.GetJobDetail(s.ctx, reqC)
respC, err := s.aCRpc.GetJobDetail(ctx, reqC)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -223,7 +222,7 @@ func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) {
JobmanagerId: strconv.Itoa(StrJobManagerID), JobmanagerId: strconv.Itoa(StrJobManagerID),
} }


respH, err := s.svcCtx.ACRpc.HistoryJobDetail(s.ctx, reqH)
respH, err := s.aCRpc.HistoryJobDetail(ctx, reqH)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -232,7 +231,7 @@ func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) {
} }
} }


func (s ShuguangHpc) QuerySpecs() (interface{}, error) {
func (s ShuguangHpc) QuerySpecs(ctx context.Context) (interface{}, error) {
resp := &types.GetResourceSpecsResp{} resp := &types.GetResourceSpecsResp{}


for k, v := range RESOURCESPECSHPC { for k, v := range RESOURCESPECSHPC {
@@ -248,12 +247,12 @@ func (s ShuguangHpc) QuerySpecs() (interface{}, error) {
return resp, nil return resp, nil
} }


func (s ShuguangHpc) DeleteTask(taskId string) (interface{}, error) {
func (s ShuguangHpc) DeleteTask(ctx context.Context, taskId string) (interface{}, error) {
strJobInfoMap := fmt.Sprintf(StrJobInfoMap, StrJobManagerID, Username, taskId) strJobInfoMap := fmt.Sprintf(StrJobInfoMap, StrJobManagerID, Username, taskId)
req := &hpcAC.DeleteJobReq{ req := &hpcAC.DeleteJobReq{
StrJobInfoMap: strJobInfoMap, StrJobInfoMap: strJobInfoMap,
} }
resp, err := s.svcCtx.ACRpc.DeleteJob(s.ctx, req)
resp, err := s.aCRpc.DeleteJob(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }


+ 40
- 41
api/internal/storeLink/shuguangai.go View File

@@ -20,10 +20,10 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/common" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/common"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC" "gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient"
"strconv" "strconv"
"strings" "strings"
) )
@@ -91,31 +91,30 @@ type ResourceSpecSGAI struct {
} }


type ShuguangAi struct { type ShuguangAi struct {
ctx context.Context
svcCtx *svc.ServiceContext
aCRpc hpcacclient.HpcAC
platform string platform string
participantId int64 participantId int64
} }


func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ShuguangAi {
return &ShuguangAi{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id}
func NewShuguangAi(aCRpc hpcacclient.HpcAC, name string, id int64) *ShuguangAi {
return &ShuguangAi{aCRpc: aCRpc, platform: name, participantId: id}
} }


func (s *ShuguangAi) UploadImage(path string) (interface{}, error) {
func (s *ShuguangAi) UploadImage(ctx context.Context, path string) (interface{}, error) {
return nil, nil return nil, nil
} }


func (s *ShuguangAi) DeleteImage(imageId string) (interface{}, error) {
func (s *ShuguangAi) DeleteImage(ctx context.Context, imageId string) (interface{}, error) {
return nil, nil return nil, nil
} }


func (s *ShuguangAi) QueryImageList() (interface{}, error) {
func (s *ShuguangAi) QueryImageList(ctx context.Context) (interface{}, error) {
// shuguangAi获取镜像列表 // shuguangAi获取镜像列表
req := &hpcAC.GetImageListAiReq{ req := &hpcAC.GetImageListAiReq{
AcceleratorType: DCU, AcceleratorType: DCU,
TaskType: PYTORCH, TaskType: PYTORCH,
} }
resp, err := s.svcCtx.ACRpc.GetImageListAi(s.ctx, req)
resp, err := s.aCRpc.GetImageListAi(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -123,7 +122,7 @@ func (s *ShuguangAi) QueryImageList() (interface{}, error) {
return resp, nil return resp, nil
} }


func (s *ShuguangAi) SubmitPytorchTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string) (interface{}, error) {
func (s *ShuguangAi) SubmitPytorchTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string) (interface{}, error) {
//判断是否resourceId匹配自定义资源Id //判断是否resourceId匹配自定义资源Id
_, isMapContainsKey := RESOURCESPECSAI[resourceId] _, isMapContainsKey := RESOURCESPECSAI[resourceId]
if !isMapContainsKey { if !isMapContainsKey {
@@ -132,7 +131,7 @@ func (s *ShuguangAi) SubmitPytorchTask(imageId string, cmd string, envs []string


//根据imageId获取imagePath, version //根据imageId获取imagePath, version
imageReq := &hpcAC.GetImageAiByIdReq{ImageId: imageId} imageReq := &hpcAC.GetImageAiByIdReq{ImageId: imageId}
imageResp, err := s.svcCtx.ACRpc.GetImageAiById(s.ctx, imageReq)
imageResp, err := s.aCRpc.GetImageAiById(ctx, imageReq)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -176,7 +175,7 @@ func (s *ShuguangAi) SubmitPytorchTask(imageId string, cmd string, envs []string


updateSGAIRequestByResourceId(resourceId, req) updateSGAIRequestByResourceId(resourceId, req)


resp, err := s.svcCtx.ACRpc.SubmitPytorchTask(s.ctx, req)
resp, err := s.aCRpc.SubmitPytorchTask(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -191,7 +190,7 @@ func updateSGAIRequestByResourceId(resourceId string, req *hpcAC.SubmitPytorchTa
req.Params.WorkerRamSize = spec.RAM req.Params.WorkerRamSize = spec.RAM
} }


func (s *ShuguangAi) SubmitTensorflowTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string) (interface{}, error) {
func (s *ShuguangAi) SubmitTensorflowTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string) (interface{}, error) {
//req := &hpcAC.SubmitTensorflowTaskReq{ //req := &hpcAC.SubmitTensorflowTaskReq{
// Params: &hpcAC.SubmitTensorflowTaskParams{ // Params: &hpcAC.SubmitTensorflowTaskParams{
// //
@@ -200,7 +199,7 @@ func (s *ShuguangAi) SubmitTensorflowTask(imageId string, cmd string, envs []str
return nil, nil return nil, nil
} }


func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) {
func (s *ShuguangAi) SubmitTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) {
// set algorithmId temporarily for storelink submit // set algorithmId temporarily for storelink submit
if algorithmId == "" { if algorithmId == "" {
algorithmId = "pytorch-mnist-fcn" algorithmId = "pytorch-mnist-fcn"
@@ -209,13 +208,13 @@ func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, param
// shuguangAi提交任务 // shuguangAi提交任务
switch aiType { switch aiType {
case PYTORCH_TASK: case PYTORCH_TASK:
task, err := s.SubmitPytorchTask(imageId, cmd, envs, params, resourceId, datasetsId, algorithmId)
task, err := s.SubmitPytorchTask(ctx, imageId, cmd, envs, params, resourceId, datasetsId, algorithmId)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return task, nil return task, nil
case TENSORFLOW_TASK: case TENSORFLOW_TASK:
task, err := s.SubmitTensorflowTask(imageId, cmd, envs, params, resourceId, datasetsId, algorithmId)
task, err := s.SubmitTensorflowTask(ctx, imageId, cmd, envs, params, resourceId, datasetsId, algorithmId)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -224,12 +223,12 @@ func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, param
return nil, errors.New("shuguangAi不支持的任务类型") return nil, errors.New("shuguangAi不支持的任务类型")
} }


func (s *ShuguangAi) QueryTask(taskId string) (interface{}, error) {
func (s *ShuguangAi) QueryTask(ctx context.Context, taskId string) (interface{}, error) {
// shuguangAi获取任务 // shuguangAi获取任务
req := &hpcAC.GetPytorchTaskReq{ req := &hpcAC.GetPytorchTaskReq{
Id: taskId, Id: taskId,
} }
resp, err := s.svcCtx.ACRpc.GetPytorchTask(s.ctx, req)
resp, err := s.aCRpc.GetPytorchTask(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -237,12 +236,12 @@ func (s *ShuguangAi) QueryTask(taskId string) (interface{}, error) {
return resp, nil return resp, nil
} }


func (s *ShuguangAi) DeleteTask(taskId string) (interface{}, error) {
func (s *ShuguangAi) DeleteTask(ctx context.Context, taskId string) (interface{}, error) {
// shuguangAi删除任务 // shuguangAi删除任务
req := &hpcAC.DeleteTaskAiReq{ req := &hpcAC.DeleteTaskAiReq{
Ids: taskId, Ids: taskId,
} }
resp, err := s.svcCtx.ACRpc.DeleteTaskAi(s.ctx, req)
resp, err := s.aCRpc.DeleteTaskAi(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -250,7 +249,7 @@ func (s *ShuguangAi) DeleteTask(taskId string) (interface{}, error) {
return resp, nil return resp, nil
} }


func (s *ShuguangAi) QuerySpecs() (interface{}, error) {
func (s *ShuguangAi) QuerySpecs(ctx context.Context) (interface{}, error) {
resp := &types.GetResourceSpecsResp{} resp := &types.GetResourceSpecsResp{}


for k, v := range RESOURCESPECSAI { for k, v := range RESOURCESPECSAI {
@@ -266,10 +265,10 @@ func (s *ShuguangAi) QuerySpecs() (interface{}, error) {
return resp, nil return resp, nil
} }


func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) {
func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) {
//balance //balance
userReq := &hpcAC.GetUserInfoReq{} userReq := &hpcAC.GetUserInfoReq{}
userinfo, err := s.svcCtx.ACRpc.GetUserInfo(s.ctx, userReq)
userinfo, err := s.aCRpc.GetUserInfo(ctx, userReq)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -277,7 +276,7 @@ func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) {


//resource limit //resource limit
limitReq := &hpcAC.QueueReq{} limitReq := &hpcAC.QueueReq{}
limitResp, err := s.svcCtx.ACRpc.QueryUserQuotasLimit(s.ctx, limitReq)
limitResp, err := s.aCRpc.QueryUserQuotasLimit(ctx, limitReq)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -286,7 +285,7 @@ func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) {


//disk //disk
diskReq := &hpcAC.ParaStorQuotaReq{} diskReq := &hpcAC.ParaStorQuotaReq{}
diskResp, err := s.svcCtx.ACRpc.ParaStorQuota(s.ctx, diskReq)
diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -295,14 +294,14 @@ func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) {
availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3) availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3)


//memory //memory
nodeResp, err := s.svcCtx.ACRpc.GetNodeResources(s.ctx, nil)
nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil)
if err != nil { if err != nil {
return nil, err return nil, err
} }
memSize := common.RoundFloat(float64(nodeResp.Data.MemorySize)*KB*KB, 3) // MB to BYTES memSize := common.RoundFloat(float64(nodeResp.Data.MemorySize)*KB*KB, 3) // MB to BYTES


//resources being occupied //resources being occupied
memberJobResp, err := s.svcCtx.ACRpc.GetMemberJobs(s.ctx, nil)
memberJobResp, err := s.aCRpc.GetMemberJobs(ctx, nil)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -361,9 +360,9 @@ func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) {
return resourceStats, nil return resourceStats, nil
} }


func (s *ShuguangAi) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) {
func (s *ShuguangAi) GetDatasetsSpecs(ctx context.Context) ([]*collector.DatasetsSpecs, error) {
req := &hpcAC.GetFileListReq{Limit: 100, Path: DATASETS_DIR, Start: 0} req := &hpcAC.GetFileListReq{Limit: 100, Path: DATASETS_DIR, Start: 0}
list, err := s.svcCtx.ACRpc.GetFileList(s.ctx, req)
list, err := s.aCRpc.GetFileList(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -378,12 +377,12 @@ func (s *ShuguangAi) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) {
return specs, nil return specs, nil
} }


func (s *ShuguangAi) GetAlgorithms() ([]*collector.Algorithm, error) {
func (s *ShuguangAi) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm, error) {
var algorithms []*collector.Algorithm var algorithms []*collector.Algorithm
for _, t := range GetTaskTypes() { for _, t := range GetTaskTypes() {
taskType := t taskType := t
req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + taskType, Start: 0} req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + taskType, Start: 0}
list, err := s.svcCtx.ACRpc.GetFileList(s.ctx, req)
list, err := s.aCRpc.GetFileList(ctx, req)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -398,28 +397,28 @@ func (s *ShuguangAi) GetAlgorithms() ([]*collector.Algorithm, error) {
return algorithms, nil return algorithms, nil
} }


func (s *ShuguangAi) Execute(option *option.AiOption) (interface{}, error) {
err := s.GenerateSubmitParams(option)
func (s *ShuguangAi) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) {
err := s.GenerateSubmitParams(ctx, option)
if err != nil { if err != nil {
return nil, err return nil, err
} }
task, err := s.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType)
task, err := s.SubmitTask(ctx, option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return task, nil return task, nil
} }


func (s *ShuguangAi) GenerateSubmitParams(option *option.AiOption) error {
func (s *ShuguangAi) GenerateSubmitParams(ctx context.Context, option *option.AiOption) error {
err := s.generateResourceId(option) err := s.generateResourceId(option)
if err != nil { if err != nil {
return err return err
} }
err = s.generateImageId(option)
err = s.generateImageId(ctx, option)
if err != nil { if err != nil {
return err return err
} }
err = s.generateAlgorithmId(option)
err = s.generateAlgorithmId(ctx, option)
if err != nil { if err != nil {
return err return err
} }
@@ -473,7 +472,7 @@ func (s *ShuguangAi) generateResourceId(option *option.AiOption) error {
return errors.New("failed to get ResourceId") return errors.New("failed to get ResourceId")
} }


func (s *ShuguangAi) generateImageId(option *option.AiOption) error {
func (s *ShuguangAi) generateImageId(ctx context.Context, option *option.AiOption) error {
if option.TaskType == "" { if option.TaskType == "" {
return errors.New("TaskType not set") return errors.New("TaskType not set")
} }
@@ -482,7 +481,7 @@ func (s *ShuguangAi) generateImageId(option *option.AiOption) error {
AcceleratorType: DCU, AcceleratorType: DCU,
TaskType: taskType, TaskType: taskType,
} }
resp, err := s.svcCtx.ACRpc.GetImageListAi(s.ctx, req)
resp, err := s.aCRpc.GetImageListAi(ctx, req)
if err != nil { if err != nil {
return err return err
} }
@@ -502,13 +501,13 @@ func (s *ShuguangAi) generateImageId(option *option.AiOption) error {
return errors.New("failed to get ImageId") return errors.New("failed to get ImageId")
} }


func (s *ShuguangAi) generateAlgorithmId(option *option.AiOption) error {
func (s *ShuguangAi) generateAlgorithmId(ctx context.Context, option *option.AiOption) error {
if option.DatasetsName == "" { if option.DatasetsName == "" {
return errors.New("DatasetsName not set") return errors.New("DatasetsName not set")
} }


req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + option.TaskType, Start: 0} req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + option.TaskType, Start: 0}
list, err := s.svcCtx.ACRpc.GetFileList(s.ctx, req)
list, err := s.aCRpc.GetFileList(ctx, req)
if err != nil { if err != nil {
return err return err
} }


+ 16
- 16
api/internal/storeLink/storeLink.go View File

@@ -32,13 +32,13 @@ import (
) )


type Linkage interface { type Linkage interface {
UploadImage(path string) (interface{}, error)
DeleteImage(imageId string) (interface{}, error)
QueryImageList() (interface{}, error)
SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error)
QueryTask(taskId string) (interface{}, error)
QuerySpecs() (interface{}, error)
DeleteTask(taskId string) (interface{}, error)
UploadImage(ctx context.Context, path string) (interface{}, error)
DeleteImage(ctx context.Context, imageId string) (interface{}, error)
QueryImageList(ctx context.Context) (interface{}, error)
SubmitTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error)
QueryTask(ctx context.Context, taskId string) (interface{}, error)
QuerySpecs(ctx context.Context) (interface{}, error)
DeleteTask(ctx context.Context, taskId string) (interface{}, error)
} }


const ( const (
@@ -89,19 +89,19 @@ type StoreLink struct {
ILinkage Linkage ILinkage Linkage
} }


func NewStoreLink(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *StoreLink {
func NewStoreLink(svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *StoreLink {
switch participant.Type { switch participant.Type {
case TYPE_OCTOPUS: case TYPE_OCTOPUS:
linkStruct := NewOctopusLink(ctx, svcCtx, participant.Name, participant.Id)
linkStruct := NewOctopusLink(svcCtx.OctopusRpc, participant.Name, participant.Id)
return &StoreLink{ILinkage: linkStruct} return &StoreLink{ILinkage: linkStruct}
case TYPE_MODELARTS: case TYPE_MODELARTS:
linkStruct := NewModelArtsLink(ctx, svcCtx, participant.Name, participant.Id)
linkStruct := NewModelArtsLink(svcCtx.ModelArtsRpc, svcCtx.ModelArtsImgRpc, participant.Name, participant.Id)
return &StoreLink{ILinkage: linkStruct} return &StoreLink{ILinkage: linkStruct}
case TYPE_SHUGUANGAI: case TYPE_SHUGUANGAI:
linkStruct := NewShuguangAi(ctx, svcCtx, participant.Name, participant.Id)
linkStruct := NewShuguangAi(svcCtx.ACRpc, participant.Name, participant.Id)
return &StoreLink{ILinkage: linkStruct} return &StoreLink{ILinkage: linkStruct}
case TYPE_SHUGUANGHPC: case TYPE_SHUGUANGHPC:
linkStruct := NewShuguangHpc(ctx, svcCtx, participant.Name, participant.Id)
linkStruct := NewShuguangHpc(svcCtx.ACRpc, participant.Name, participant.Id)
return &StoreLink{ILinkage: linkStruct} return &StoreLink{ILinkage: linkStruct}
default: default:
return nil return nil
@@ -124,7 +124,7 @@ func GetResourceTypes() []string {
return resourceTypes return resourceTypes
} }


func GetDatasetsNames(collectorMap *map[string]collector.AiCollector) ([]string, error) {
func GetDatasetsNames(ctx context.Context, collectorMap *map[string]collector.AiCollector) ([]string, error) {
var wg sync.WaitGroup var wg sync.WaitGroup
var errCh = make(chan error, len(*collectorMap)) var errCh = make(chan error, len(*collectorMap))
var errs []error var errs []error
@@ -136,7 +136,7 @@ func GetDatasetsNames(collectorMap *map[string]collector.AiCollector) ([]string,
c := col c := col
go func() { go func() {
var ns []string var ns []string
specs, err := c.GetDatasetsSpecs()
specs, err := c.GetDatasetsSpecs(ctx)
if err != nil { if err != nil {
errCh <- err errCh <- err
wg.Done() wg.Done()
@@ -176,7 +176,7 @@ func GetDatasetsNames(collectorMap *map[string]collector.AiCollector) ([]string,
return names, nil return names, nil
} }


func GetAlgorithms(collectorMap *map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) {
func GetAlgorithms(ctx context.Context, collectorMap *map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) {
var names []string var names []string
var wg sync.WaitGroup var wg sync.WaitGroup
var errCh = make(chan error, len(*collectorMap)) var errCh = make(chan error, len(*collectorMap))
@@ -189,7 +189,7 @@ func GetAlgorithms(collectorMap *map[string]collector.AiCollector, resourceType
c := col c := col
go func() { go func() {
var ns []string var ns []string
algorithms, err := c.GetAlgorithms()
algorithms, err := c.GetAlgorithms(ctx)
if err != nil { if err != nil {
errCh <- err errCh <- err
wg.Done() wg.Done()


+ 17
- 4
api/internal/svc/servicecontext.go View File

@@ -22,6 +22,7 @@ import (
"github.com/zeromicro/go-zero/core/logx" "github.com/zeromicro/go-zero/core/logx"
"github.com/zeromicro/go-zero/zrpc" "github.com/zeromicro/go-zero/zrpc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/config" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/config"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/JointCloud/pcm-coordinator/rpc/client/participantservice" "gitlink.org.cn/JointCloud/pcm-coordinator/rpc/client/participantservice"
@@ -58,6 +59,7 @@ type ServiceContext struct {
PromClient tracker.Prometheus PromClient tracker.Prometheus
AlertClient *alert.AlertmanagerAPI AlertClient *alert.AlertmanagerAPI
HttpClient *resty.Client HttpClient *resty.Client
Scheduler *scheduler.Scheduler
} }


func NewServiceContext(c config.Config) *ServiceContext { func NewServiceContext(c config.Config) *ServiceContext {
@@ -110,16 +112,26 @@ func NewServiceContext(c config.Config) *ServiceContext {
Addr: c.Redis.Host, Addr: c.Redis.Host,
Password: c.Redis.Pass, Password: c.Redis.Pass,
}) })

// scheduler
octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf))
aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf))
modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf))
modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf))
//aiExecutor, resourceCollector := service2.InitAiClusterMap(octopusRpc, modelArtsRpc, modelArtsImgRpc, aCRpc)
//storage := &database.AiStorage{DbEngin: dbEngin}
scheduler := scheduler.NewSchdlr(nil, nil, nil)

return &ServiceContext{ return &ServiceContext{
Cron: cron.New(cron.WithSeconds()), Cron: cron.New(cron.WithSeconds()),
DbEngin: dbEngin, DbEngin: dbEngin,
Config: c, Config: c,
RedisClient: redisClient, RedisClient: redisClient,
ModelArtsRpc: modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)),
ModelArtsImgRpc: imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)),
ModelArtsRpc: modelArtsRpc,
ModelArtsImgRpc: modelArtsImgRpc,
CephRpc: cephclient.NewCeph(zrpc.MustNewClient(c.CephRpcConf)), CephRpc: cephclient.NewCeph(zrpc.MustNewClient(c.CephRpcConf)),
ACRpc: hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)),
OctopusRpc: octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)),
ACRpc: aCRpc,
OctopusRpc: octopusRpc,
OpenstackRpc: openstackclient.NewOpenstack(zrpc.MustNewClient(c.OpenstackRpcConf)), OpenstackRpc: openstackclient.NewOpenstack(zrpc.MustNewClient(c.OpenstackRpcConf)),
K8sRpc: kubernetesclient.NewKubernetes(zrpc.MustNewClient(c.K8sNativeConf)), K8sRpc: kubernetesclient.NewKubernetes(zrpc.MustNewClient(c.K8sNativeConf)),
MonitorClient: make(map[int64]tracker.Prometheus), MonitorClient: make(map[int64]tracker.Prometheus),
@@ -127,5 +139,6 @@ func NewServiceContext(c config.Config) *ServiceContext {
PromClient: promClient, PromClient: promClient,
AlertClient: alertClient, AlertClient: alertClient,
HttpClient: httpClient, HttpClient: httpClient,
Scheduler: scheduler,
} }
} }

Loading…
Cancel
Save