package schedule import ( "context" "encoding/json" "errors" "fmt" "github.com/zeromicro/go-zero/core/logx" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/entity" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/executor" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/strategy" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gopkg.in/yaml.v2" "strings" ) type ScheduleRunTaskLogic struct { logx.Logger ctx context.Context svcCtx *svc.ServiceContext } func NewScheduleRunTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ScheduleRunTaskLogic { return &ScheduleRunTaskLogic{ Logger: logx.WithContext(ctx), ctx: ctx, svcCtx: svcCtx, } } func (l *ScheduleRunTaskLogic) ScheduleRunTask(req *types.RunTaskReq) (resp *types.RunTaskResp, err error) { // find task task, err := l.svcCtx.Scheduler.AiStorages.GetTaskById(req.TaskID) if err != nil { return nil, err } if task == nil { return nil, errors.New("task not found ") } if task.Status != constants.Saved { switch task.Status { case constants.Cancelled: return nil, errors.New("task has been cancelled ") case constants.Failed: return nil, errors.New("task was already failed ") case constants.Running: return nil, errors.New("task is running ") case constants.Succeeded: return nil, errors.New("task is completed ") default: return nil, fmt.Errorf("task is being: %s", task.Status) } } var clustersWithDataDistributes ClustersWithDataDistributes err = yaml.Unmarshal([]byte(task.YamlString), &clustersWithDataDistributes) if err != nil { return nil, err } opt := &option.AiOption{ AdapterId: ADAPTERID, TaskName: task.Name, TaskId: task.Id, StrategyName: "", } // update assignedClusters assignedClusters, err := updateClustersByScheduledDatas(task.Id, &clustersWithDataDistributes, req.ScheduledDatas) if err != nil { return nil, err } aiSchdl, err := schedulers.NewAiScheduler(l.ctx, "", l.svcCtx.Scheduler, opt) if err != nil { return nil, err } results, err := l.svcCtx.Scheduler.AssignAndSchedule(aiSchdl, executor.SUBMIT_MODE_STORAGE_SCHEDULE, assignedClusters) if err != nil { return nil, err } rs := (results).([]*schedulers.AiResult) err = l.SaveResult(task, rs, opt) if err != nil { return nil, err } return } func (l *ScheduleRunTaskLogic) SaveResult(task *models.Task, results []*schedulers.AiResult, opt *option.AiOption) error { for _, r := range results { opt.ComputeCard = strings.ToUpper(r.Card) opt.Replica = r.Replica opt.Output = r.Output adapterName, err := l.svcCtx.Scheduler.AiStorages.GetAdapterNameById(r.AdapterId) if err != nil { return err } clusterName, _ := l.svcCtx.Scheduler.AiStorages.GetClusterNameById(r.ClusterId) err = l.svcCtx.Scheduler.AiStorages.SaveAiTask(task.Id, opt, adapterName, r.ClusterId, clusterName, r.JobId, constants.Saved, r.Msg) if err != nil { return err } l.svcCtx.Scheduler.AiStorages.AddNoticeInfo(r.AdapterId, adapterName, r.ClusterId, clusterName, r.TaskName, "create", "任务创建中") } return nil } func updateClustersByScheduledDatas(taskId int64, clustersWithDataDistributes *ClustersWithDataDistributes, scheduledDatas []*types.DataScheduleResults) ([]*strategy.AssignedCluster, error) { assignedClusters := make([]*strategy.AssignedCluster, 0) if len(scheduledDatas) == 0 { for _, cluster := range clustersWithDataDistributes.Clusters { assignedClusters = append(assignedClusters, cluster) } } else { // handle pass-in scheduledDatas for _, cluster := range clustersWithDataDistributes.Clusters { for _, data := range scheduledDatas { switch data.DataType { case "dataset": for _, result := range data.Results { if !result.Status { continue } for _, c := range result.Clusters { if cluster.ClusterId == c.ClusterID { if c.JsonData == "" { continue } jsonData := entity.JsonData{} err := json.Unmarshal([]byte(c.JsonData), &jsonData) if err != nil { return nil, fmt.Errorf("pass-in jsonData convert failed, task %d, cluster %s, datatype: %s", taskId, cluster.ClusterId, "dataset") } cluster.DatasetId = jsonData.Id } } } case "image": for _, result := range data.Results { if !result.Status { continue } for _, c := range result.Clusters { if cluster.ClusterId == c.ClusterID { if c.JsonData == "" { continue } jsonData := entity.JsonData{} err := json.Unmarshal([]byte(c.JsonData), &jsonData) if err != nil { return nil, fmt.Errorf("pass-in jsonData convert failed, task %d, cluster %s, datatype: %s", taskId, cluster.ClusterId, "image") } cluster.ImageId = jsonData.Id } } } case "code": for _, result := range data.Results { if !result.Status { continue } for _, c := range result.Clusters { if cluster.ClusterId == c.ClusterID { if c.JsonData == "" { continue } jsonData := entity.JsonData{} err := json.Unmarshal([]byte(c.JsonData), &jsonData) if err != nil { return nil, fmt.Errorf("pass-in jsonData convert failed, task %d, cluster %s, datatype: %s", taskId, cluster.ClusterId, "code") } cluster.CodeId = jsonData.Id } } } case "model": for _, result := range data.Results { if !result.Status { continue } for _, c := range result.Clusters { if cluster.ClusterId == c.ClusterID { if c.JsonData == "" { continue } jsonData := entity.JsonData{} err := json.Unmarshal([]byte(c.JsonData), &jsonData) if err != nil { return nil, fmt.Errorf("pass-in jsonData convert failed, task %d, cluster %s, datatype: %s", taskId, cluster.ClusterId, "model") } cluster.ModelId = jsonData.Id } } } } } assignedClusters = append(assignedClusters, cluster) } } // handle db yaml clustersWithDataDistributes for _, cluster := range assignedClusters { if cluster.DatasetId == "" { for _, distribute := range clustersWithDataDistributes.DataDistributes.Dataset { for _, c := range distribute.Clusters { if cluster.ClusterId == c.ClusterID { if c.JsonData == "" { continue } jsonData := entity.JsonData{} err := json.Unmarshal([]byte(c.JsonData), &jsonData) if err != nil { return nil, fmt.Errorf("db yaml jsonData convert failed, task %d, cluster %s, datatype: %s", taskId, cluster.ClusterId, "dataset") } cluster.DatasetId = jsonData.Id } } } } if cluster.ImageId == "" { for _, distribute := range clustersWithDataDistributes.DataDistributes.Image { for _, c := range distribute.Clusters { if cluster.ClusterId == c.ClusterID { if c.JsonData == "" { continue } jsonData := entity.JsonData{} err := json.Unmarshal([]byte(c.JsonData), &jsonData) if err != nil { return nil, fmt.Errorf("db yaml jsonData convert failed, task %d, cluster %s, datatype: %s", taskId, cluster.ClusterId, "image") } cluster.ImageId = jsonData.Id } } } } //if cluster.CodeId == "" { for _, distribute := range clustersWithDataDistributes.DataDistributes.Code { for _, c := range distribute.Clusters { if cluster.ClusterId == c.ClusterID { cluster.Output = distribute.Output if cluster.CodeId == "" { if c.JsonData == "" { continue } jsonData := entity.JsonData{} err := json.Unmarshal([]byte(c.JsonData), &jsonData) if err != nil { return nil, fmt.Errorf("db yaml jsonData convert failed, task %d, cluster %s, datatype: %s", taskId, cluster.ClusterId, "code") } cluster.CodeId = jsonData.Id } } } } if cluster.ModelId == "" { for _, distribute := range clustersWithDataDistributes.DataDistributes.Model { for _, c := range distribute.Clusters { if cluster.ClusterId == c.ClusterID { if c.JsonData == "" { continue } jsonData := entity.JsonData{} err := json.Unmarshal([]byte(c.JsonData), &jsonData) if err != nil { return nil, fmt.Errorf("jsonData convert failed, task %d, cluster %s, datatype: %s", taskId, cluster.ClusterId, "model") } cluster.ModelId = jsonData.Id } } } } } // check empty data for _, cluster := range assignedClusters { if cluster.DatasetId == "" { return nil, fmt.Errorf("failed to run task %d, cluster %s cannot find %s", taskId, cluster.ClusterId, "DatasetId") } if cluster.ImageId == "" { return nil, fmt.Errorf("failed to run task %d, cluster %s cannot find %s", taskId, cluster.ClusterId, "ImageId") } if cluster.CodeId == "" { return nil, fmt.Errorf("failed to run task %d, cluster %s cannot find %s", taskId, cluster.ClusterId, "CodeId") } } return assignedClusters, nil }