| @@ -16,7 +16,8 @@ package cron | |||||
| import ( | import ( | ||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/stat" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| ) | ) | ||||
| @@ -28,8 +29,8 @@ func AddCronGroup(svc *svc.ServiceContext) { | |||||
| logx.Errorf(err.Error()) | logx.Errorf(err.Error()) | ||||
| return | return | ||||
| } | } | ||||
| updater.UpdateTaskStatus(svc, list) | |||||
| updater.UpdateAiTaskStatus(svc, list) | |||||
| status.UpdateTaskStatus(svc, list) | |||||
| status.UpdateAiTaskStatus(svc, list) | |||||
| }) | }) | ||||
| svc.Cron.AddFunc("*/5 * * * * ?", func() { | svc.Cron.AddFunc("*/5 * * * * ?", func() { | ||||
| @@ -42,6 +43,6 @@ func AddCronGroup(svc *svc.ServiceContext) { | |||||
| logx.Errorf(err.Error()) | logx.Errorf(err.Error()) | ||||
| return | return | ||||
| } | } | ||||
| updater.UpdateClusterResources(svc, adapterList) | |||||
| stat.UpdateClusterResources(svc, adapterList) | |||||
| }) | }) | ||||
| } | } | ||||
| @@ -3,7 +3,7 @@ package ai | |||||
| import ( | import ( | ||||
| "context" | "context" | ||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/stat" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| ) | ) | ||||
| @@ -37,7 +37,7 @@ func (l *GetCenterOverviewLogic) GetCenterOverview() (resp *types.CenterOverview | |||||
| centerNum = int32(len(adapterList)) | centerNum = int32(len(adapterList)) | ||||
| resp.CenterNum = centerNum | resp.CenterNum = centerNum | ||||
| go updater.UpdateClusterResources(l.svcCtx, adapterList) | |||||
| go stat.UpdateClusterResources(l.svcCtx, adapterList) | |||||
| for _, adapter := range adapterList { | for _, adapter := range adapterList { | ||||
| taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id) | taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id) | ||||
| @@ -3,7 +3,7 @@ package ai | |||||
| import ( | import ( | ||||
| "context" | "context" | ||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | ||||
| @@ -32,7 +32,7 @@ func (l *GetCenterTaskListLogic) GetCenterTaskList() (resp *types.CenterTaskList | |||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| go updater.UpdateTrainingTaskStatus(l.svcCtx, adapterList) | |||||
| go status.UpdateTrainingTaskStatus(l.svcCtx, adapterList) | |||||
| for _, adapter := range adapterList { | for _, adapter := range adapterList { | ||||
| taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id) | taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id) | ||||
| @@ -2,7 +2,7 @@ package core | |||||
| import ( | import ( | ||||
| "context" | "context" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" | ||||
| @@ -52,8 +52,8 @@ func (l *PageListTaskLogic) PageListTask(req *types.PageTaskReq) (resp *types.Pa | |||||
| } | } | ||||
| // 更新智算任务状态 | // 更新智算任务状态 | ||||
| go updater.UpdateTaskStatus(l.svcCtx, list) | |||||
| go updater.UpdateAiTaskStatus(l.svcCtx, list) | |||||
| go status.UpdateTaskStatus(l.svcCtx, list) | |||||
| go status.UpdateAiTaskStatus(l.svcCtx, list) | |||||
| for _, model := range list { | for _, model := range list { | ||||
| if model.StartTime != "" && model.EndTime == "" { | if model.StartTime != "" && model.EndTime == "" { | ||||
| @@ -5,7 +5,7 @@ import ( | |||||
| "errors" | "errors" | ||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/common" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/common" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | ||||
| @@ -71,7 +71,7 @@ func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceLi | |||||
| list := common.ConcatMultipleSlices(slices) | list := common.ConcatMultipleSlices(slices) | ||||
| if len(list) != 0 { | if len(list) != 0 { | ||||
| go updater.UpdateDeployInstanceStatusBatch(l.svcCtx, list) | |||||
| go status.UpdateDeployInstanceStatusBatch(l.svcCtx, list, true) | |||||
| ins := list[0] | ins := list[0] | ||||
| for i := range list { | for i := range list { | ||||
| @@ -82,8 +82,8 @@ func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceLi | |||||
| } | } | ||||
| } | } | ||||
| go updater.UpdateDeployInstanceStatus(l.svcCtx, ins, true) | |||||
| go updater.UpdateDeployTaskStatus(l.svcCtx) | |||||
| go status.UpdateDeployInstanceStatus(l.svcCtx, ins, true) | |||||
| go status.UpdateDeployTaskStatus(l.svcCtx) | |||||
| } | } | ||||
| resp.List = &deployTasks | resp.List = &deployTasks | ||||
| @@ -5,8 +5,7 @@ import ( | |||||
| "errors" | "errors" | ||||
| "fmt" | "fmt" | ||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | ||||
| @@ -84,7 +83,7 @@ func (l *StartAllByDeployTaskIdLogic) startAll(list []*models.AiInferDeployInsta | |||||
| <-buf | <-buf | ||||
| return | return | ||||
| } | } | ||||
| if checkStopStatus(in) { | |||||
| if status.CheckStopStatus(in) { | |||||
| success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StartInferDeployInstance(l.ctx, ins.InstanceId) | success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StartInferDeployInstance(l.ctx, ins.InstanceId) | ||||
| if !success { | if !success { | ||||
| e := struct { | e := struct { | ||||
| @@ -136,31 +135,3 @@ func (l *StartAllByDeployTaskIdLogic) startAll(list []*models.AiInferDeployInsta | |||||
| return nil | return nil | ||||
| } | } | ||||
| func checkStopStatus(in *inference.DeployInstance) bool { | |||||
| switch in.ClusterType { | |||||
| case storeLink.TYPE_OCTOPUS: | |||||
| switch in.Status { | |||||
| case "stopped": | |||||
| return true | |||||
| default: | |||||
| return false | |||||
| } | |||||
| case storeLink.TYPE_MODELARTS: | |||||
| switch in.Status { | |||||
| case "stopped": | |||||
| return true | |||||
| default: | |||||
| return false | |||||
| } | |||||
| case storeLink.TYPE_SHUGUANGAI: | |||||
| switch in.Status { | |||||
| case "Terminated": | |||||
| return true | |||||
| default: | |||||
| return false | |||||
| } | |||||
| default: | |||||
| return false | |||||
| } | |||||
| } | |||||
| @@ -4,7 +4,7 @@ import ( | |||||
| "context" | "context" | ||||
| "errors" | "errors" | ||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "strconv" | "strconv" | ||||
| @@ -33,12 +33,16 @@ func (l *StartDeployInstanceListLogic) StartDeployInstanceList(req *types.StartD | |||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StartInferDeployInstance(l.ctx, req.InstanceId) | |||||
| if !success { | |||||
| return nil, errors.New("start instance failed") | |||||
| in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId) | |||||
| if status.CheckStopStatus(in) { | |||||
| success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StartInferDeployInstance(l.ctx, req.InstanceId) | |||||
| if !success { | |||||
| return nil, errors.New("start instance failed") | |||||
| } | |||||
| } | } | ||||
| go updater.UpdateDeployInstanceStatus(l.svcCtx, ins, true) | |||||
| go status.UpdateDeployInstanceStatus(l.svcCtx, ins, true) | |||||
| return resp, nil | return resp, nil | ||||
| } | } | ||||
| @@ -4,8 +4,7 @@ import ( | |||||
| "context" | "context" | ||||
| "errors" | "errors" | ||||
| "fmt" | "fmt" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" | ||||
| @@ -85,7 +84,7 @@ func (l *StopAllByDeployTaskIdLogic) stopAll(list []*models.AiInferDeployInstanc | |||||
| <-buf | <-buf | ||||
| return | return | ||||
| } | } | ||||
| if checkStatus(in) { | |||||
| if status.CheckRunningStatus(in) { | |||||
| success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StopInferDeployInstance(l.ctx, ins.InstanceId) | success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StopInferDeployInstance(l.ctx, ins.InstanceId) | ||||
| if !success { | if !success { | ||||
| e := struct { | e := struct { | ||||
| @@ -137,31 +136,3 @@ func (l *StopAllByDeployTaskIdLogic) stopAll(list []*models.AiInferDeployInstanc | |||||
| return nil | return nil | ||||
| } | } | ||||
| func checkStatus(in *inference.DeployInstance) bool { | |||||
| switch in.ClusterType { | |||||
| case storeLink.TYPE_OCTOPUS: | |||||
| switch in.Status { | |||||
| case "running": | |||||
| return true | |||||
| default: | |||||
| return false | |||||
| } | |||||
| case storeLink.TYPE_MODELARTS: | |||||
| switch in.Status { | |||||
| case "running": | |||||
| return true | |||||
| default: | |||||
| return false | |||||
| } | |||||
| case storeLink.TYPE_SHUGUANGAI: | |||||
| switch in.Status { | |||||
| case "Running": | |||||
| return true | |||||
| default: | |||||
| return false | |||||
| } | |||||
| default: | |||||
| return false | |||||
| } | |||||
| } | |||||
| @@ -4,7 +4,7 @@ import ( | |||||
| "context" | "context" | ||||
| "errors" | "errors" | ||||
| "github.com/zeromicro/go-zero/core/logx" | "github.com/zeromicro/go-zero/core/logx" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" | ||||
| "strconv" | "strconv" | ||||
| @@ -33,12 +33,16 @@ func (l *StopDeployInstanceLogic) StopDeployInstance(req *types.StopDeployInstan | |||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StopInferDeployInstance(l.ctx, req.InstanceId) | |||||
| if !success { | |||||
| return nil, errors.New("stop instance failed") | |||||
| in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId) | |||||
| if status.CheckRunningStatus(in) { | |||||
| success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StopInferDeployInstance(l.ctx, req.InstanceId) | |||||
| if !success { | |||||
| return nil, errors.New("stop instance failed") | |||||
| } | |||||
| } | } | ||||
| go updater.UpdateDeployInstanceStatus(l.svcCtx, ins, true) | |||||
| go status.UpdateDeployInstanceStatus(l.svcCtx, ins, true) | |||||
| return resp, nil | return resp, nil | ||||
| } | } | ||||
| @@ -1,4 +1,4 @@ | |||||
| package updater | |||||
| package stat | |||||
| import ( | import ( | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| @@ -1,6 +1,7 @@ | |||||
| package updater | |||||
| package status | |||||
| import ( | import ( | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference" | |||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" | ||||
| "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" | ||||
| @@ -10,12 +11,15 @@ import ( | |||||
| "time" | "time" | ||||
| ) | ) | ||||
| func UpdateDeployInstanceStatusBatch(svc *svc.ServiceContext, insList []*models.AiInferDeployInstance) { | |||||
| func UpdateDeployInstanceStatusBatch(svc *svc.ServiceContext, insList []*models.AiInferDeployInstance, needfilter bool) { | |||||
| list := make([]*models.AiInferDeployInstance, len(insList)) | list := make([]*models.AiInferDeployInstance, len(insList)) | ||||
| copy(list, insList) | copy(list, insList) | ||||
| for i := len(list) - 1; i >= 0; i-- { | |||||
| if list[i].Status == constants.Running || list[i].Status == constants.Stopped { | |||||
| list = append(list[:i], list[i+1:]...) | |||||
| if needfilter { | |||||
| for i := len(list) - 1; i >= 0; i-- { | |||||
| if list[i].Status == constants.Running || list[i].Status == constants.Stopped { | |||||
| list = append(list[:i], list[i+1:]...) | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| @@ -119,3 +123,72 @@ func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInfe | |||||
| return | return | ||||
| } | } | ||||
| } | } | ||||
| func UpdateAutoStoppedInstance(svc *svc.ServiceContext) { | |||||
| list, err := svc.Scheduler.AiStorages.GetInferDeployInstanceList() | |||||
| if err != nil { | |||||
| return | |||||
| } | |||||
| if len(list) == 0 { | |||||
| return | |||||
| } | |||||
| UpdateDeployInstanceStatusBatch(svc, list, false) | |||||
| } | |||||
| func CheckStopStatus(in *inference.DeployInstance) bool { | |||||
| switch in.ClusterType { | |||||
| case storeLink.TYPE_OCTOPUS: | |||||
| switch in.Status { | |||||
| case "stopped": | |||||
| return true | |||||
| default: | |||||
| return false | |||||
| } | |||||
| case storeLink.TYPE_MODELARTS: | |||||
| switch in.Status { | |||||
| case "stopped": | |||||
| return true | |||||
| default: | |||||
| return false | |||||
| } | |||||
| case storeLink.TYPE_SHUGUANGAI: | |||||
| switch in.Status { | |||||
| case "Terminated": | |||||
| return true | |||||
| default: | |||||
| return false | |||||
| } | |||||
| default: | |||||
| return false | |||||
| } | |||||
| } | |||||
| func CheckRunningStatus(in *inference.DeployInstance) bool { | |||||
| switch in.ClusterType { | |||||
| case storeLink.TYPE_OCTOPUS: | |||||
| switch in.Status { | |||||
| case "running": | |||||
| return true | |||||
| default: | |||||
| return false | |||||
| } | |||||
| case storeLink.TYPE_MODELARTS: | |||||
| switch in.Status { | |||||
| case "running": | |||||
| return true | |||||
| default: | |||||
| return false | |||||
| } | |||||
| case storeLink.TYPE_SHUGUANGAI: | |||||
| switch in.Status { | |||||
| case "Running": | |||||
| return true | |||||
| default: | |||||
| return false | |||||
| } | |||||
| default: | |||||
| return false | |||||
| } | |||||
| } | |||||
| @@ -1,4 +1,4 @@ | |||||
| package updater | |||||
| package status | |||||
| import ( | import ( | ||||
| "errors" | "errors" | ||||