package service import ( "github.com/zeromicro/go-zero/zrpc" "gitlink.org.cn/JointCloud/pcm-ac/hpcacclient" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/config" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/database" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/collector" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/executor" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/task/tasksync" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink/octopusHttp" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-modelarts/client/imagesservice" "gitlink.org.cn/JointCloud/pcm-modelarts/client/modelartsservice" "gitlink.org.cn/JointCloud/pcm-octopus/octopusclient" "strconv" "sync" "time" ) const ( OCTOPUS = "octopus" MODELARTS = "modelarts" SHUGUANGAI = "shuguangAi" OPENI = "openI" ) type AiService struct { AiExecutorAdapterMap map[string]map[string]executor.AiExecutor AiCollectorAdapterMap map[string]map[string]collector.AiCollector InferenceAdapterMap map[string]map[string]inference.ICluster Storage *database.AiStorage LocalCache map[string]interface{} Conf *config.Config TaskSyncLock sync.Mutex St *tasksync.SyncTrain Si *tasksync.SyncInfer } func NewAiService(conf *config.Config, storages *database.AiStorage, localCache map[string]interface{}) (*AiService, error) { //var aiType = "1" var tempAdapterId = "1777144940459986944" adapterIds := []string{tempAdapterId} //adapterIds, err := storages.GetAdapterIdsByType(aiType) //if err != nil { // return nil, err //} aiService := &AiService{ AiExecutorAdapterMap: make(map[string]map[string]executor.AiExecutor), AiCollectorAdapterMap: make(map[string]map[string]collector.AiCollector), InferenceAdapterMap: make(map[string]map[string]inference.ICluster), Storage: storages, LocalCache: localCache, Conf: conf, } for _, id := range adapterIds { clusters, err := storages.GetClustersByAdapterId(id) if err != nil { return nil, err } if len(clusters.List) == 0 { continue } exeClusterMap, colClusterMap, inferMap := InitAiClusterMap(conf, clusters.List) aiService.AiExecutorAdapterMap[id] = exeClusterMap aiService.AiCollectorAdapterMap[id] = colClusterMap aiService.InferenceAdapterMap[id] = inferMap } st := tasksync.NewTrainTask(storages, aiService.AiCollectorAdapterMap, conf) si := tasksync.NewInferTask(storages, aiService.InferenceAdapterMap, conf) aiService.St = st aiService.Si = si return aiService, nil } func InitAiClusterMap(conf *config.Config, clusters []types.ClusterInfo) (map[string]executor.AiExecutor, map[string]collector.AiCollector, map[string]inference.ICluster) { executorMap := make(map[string]executor.AiExecutor) collectorMap := make(map[string]collector.AiCollector) inferenceMap := make(map[string]inference.ICluster) for _, c := range clusters { switch c.Driver { case OCTOPUS: id, _ := strconv.ParseInt(c.Id, 10, 64) octopus := octopusHttp.NewOctopusHttp(id, c.Nickname, c.Server, c.Address, c.Username, c.Password) collectorMap[c.Id] = octopus executorMap[c.Id] = octopus inferenceMap[c.Id] = octopus case MODELARTS: id, _ := strconv.ParseInt(c.Id, 10, 64) modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(conf.ModelArtsRpcConf)) modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(conf.ModelArtsImgRpcConf)) modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Name, id, c.Nickname) collectorMap[c.Id] = modelarts executorMap[c.Id] = modelarts inferenceMap[c.Id] = modelarts case SHUGUANGAI: id, _ := strconv.ParseInt(c.Id, 10, 64) aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(conf.ACRpcConf)) sgai := storeLink.NewShuguangAi(aCRpc, c.Nickname, id) collectorMap[c.Id] = sgai executorMap[c.Id] = sgai inferenceMap[c.Id] = sgai case OPENI: id, _ := strconv.ParseInt(c.Id, 10, 64) openi := storeLink.NewOpenI(c.Server, id, c.Username, c.Token, c.Nickname) collectorMap[c.Id] = openi executorMap[c.Id] = openi inferenceMap[c.Id] = openi } } return executorMap, collectorMap, inferenceMap } func (as *AiService) UpdateClusterMaps(conf *config.Config, adapterId string, clusters []types.ClusterInfo) { for _, c := range clusters { _, ok := as.AiExecutorAdapterMap[adapterId][c.Id] _, ok2 := as.AiCollectorAdapterMap[adapterId][c.Id] _, ok3 := as.InferenceAdapterMap[adapterId][c.Id] if !ok && !ok2 && !ok3 { switch c.Name { case OCTOPUS: id, _ := strconv.ParseInt(c.Id, 10, 64) octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(conf.OctopusRpcConf)) octopus := storeLink.NewOctopusLink(octopusRpc, c.Nickname, id) as.AiExecutorAdapterMap[adapterId][c.Id] = octopus as.AiCollectorAdapterMap[adapterId][c.Id] = octopus as.InferenceAdapterMap[adapterId][c.Id] = octopus case MODELARTS: id, _ := strconv.ParseInt(c.Id, 10, 64) modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(conf.ModelArtsRpcConf)) modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(conf.ModelArtsImgRpcConf)) modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Name, id, c.Nickname) as.AiExecutorAdapterMap[adapterId][c.Id] = modelarts as.AiCollectorAdapterMap[adapterId][c.Id] = modelarts as.InferenceAdapterMap[adapterId][c.Id] = modelarts case SHUGUANGAI: id, _ := strconv.ParseInt(c.Id, 10, 64) aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(conf.ACRpcConf)) sgai := storeLink.NewShuguangAi(aCRpc, c.Nickname, id) as.AiExecutorAdapterMap[adapterId][c.Id] = sgai as.AiCollectorAdapterMap[adapterId][c.Id] = sgai as.InferenceAdapterMap[adapterId][c.Id] = sgai } } else { continue } } } func (as *AiService) HandleDuplicateTaskName(name string, taskType string) (string, error) { exist, err := as.Storage.DoesTaskNameExist(name, taskType) if err != nil { return "", err } if exist { return name + "_" + time.Now().Format(constants.Layout_Time_Suffix), nil } return name, nil }