|
- package schedule
-
- import (
- "context"
- "encoding/json"
- "errors"
- "fmt"
- "github.com/zeromicro/go-zero/core/logx"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/strategy"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
- "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
- "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
- "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
- "gopkg.in/yaml.v2"
- "strings"
- )
-
- type ScheduleRunTaskLogic struct {
- logx.Logger
- ctx context.Context
- svcCtx *svc.ServiceContext
- }
-
- func NewScheduleRunTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ScheduleRunTaskLogic {
- return &ScheduleRunTaskLogic{
- Logger: logx.WithContext(ctx),
- ctx: ctx,
- svcCtx: svcCtx,
- }
- }
-
- func (l *ScheduleRunTaskLogic) ScheduleRunTask(req *types.RunTaskReq) (resp *types.RunTaskResp, err error) {
- // find task
- task, err := l.svcCtx.Scheduler.AiStorages.GetTaskById(req.TaskID)
- if err != nil {
- return nil, err
- }
-
- if task == nil {
- return nil, errors.New("task not found ")
- }
-
- if task.Status == constants.Cancelled {
- return nil, errors.New("task has been cancelled ")
- }
-
- var clusters []*strategy.AssignedCluster
- err = yaml.Unmarshal([]byte(task.YamlString), &clusters)
- if err != nil {
- return nil, err
- }
-
- opt := &option.AiOption{
- AdapterId: ADAPTERID,
- TaskName: task.Name,
- }
- // update assignedClusters
- err = updateClustersByScheduledDatas(task.Id, &clusters, req.ScheduledDatas)
- if err != nil {
- return nil, err
- }
-
- aiSchdl, err := schedulers.NewAiScheduler(l.ctx, "", l.svcCtx.Scheduler, opt)
- if err != nil {
- return nil, err
- }
-
- results, err := l.svcCtx.Scheduler.AssignAndSchedule(aiSchdl, scheduler.SUBMIT_MODE_STORAGE_SCHEDULE, clusters)
- if err != nil {
- return nil, err
- }
-
- rs := (results).([]*schedulers.AiResult)
-
- err = l.SaveResult(task, rs, opt)
- if err != nil {
- return nil, err
- }
-
- return
- }
-
- func (l *ScheduleRunTaskLogic) SaveResult(task *models.Task, results []*schedulers.AiResult, opt *option.AiOption) error {
-
- for _, r := range results {
-
- opt.ComputeCard = strings.ToUpper(r.Card)
-
- adapterName, err := l.svcCtx.Scheduler.AiStorages.GetAdapterNameById(r.AdapterId)
- if err != nil {
- return err
- }
-
- clusterName, _ := l.svcCtx.Scheduler.AiStorages.GetClusterNameById(r.ClusterId)
-
- err = l.svcCtx.Scheduler.AiStorages.SaveAiTask(task.Id, opt, adapterName, r.ClusterId, clusterName, r.JobId, constants.Saved, r.Msg)
- if err != nil {
- return err
- }
-
- l.svcCtx.Scheduler.AiStorages.AddNoticeInfo(r.AdapterId, adapterName, r.ClusterId, clusterName, r.TaskName, "create", "任务创建中")
-
- }
-
- return nil
-
- }
-
- func updateClustersByScheduledDatas(taskId int64, assignedClusters *[]*strategy.AssignedCluster, scheduledDatas []*types.DataScheduleResults) error {
- for _, cluster := range *assignedClusters {
- for _, data := range scheduledDatas {
- switch data.DataType {
- case "dataset":
- for _, result := range data.Results {
- if !result.Status {
- continue
- }
- for _, c := range result.Clusters {
- if cluster.ClusterId == c.ClusterID {
- if c.JsonData == "" {
- continue
- }
- jsonData := struct {
- Name string `json:"name"`
- Id string `json:"id"`
- }{}
- err := json.Unmarshal([]byte(c.JsonData), &jsonData)
- if err != nil {
- return fmt.Errorf("jsonData convert failed, task %d, cluster %s, datatype %s", taskId, cluster.ClusterId, "dataset")
- }
- cluster.DatasetId = jsonData.Id
- }
- }
- }
- case "image":
- for _, result := range data.Results {
- if !result.Status {
- continue
- }
- for _, c := range result.Clusters {
- if cluster.ClusterId == c.ClusterID {
- if c.JsonData == "" {
- continue
- }
- jsonData := struct {
- Name string `json:"name"`
- Id string `json:"id"`
- }{}
- err := json.Unmarshal([]byte(c.JsonData), &jsonData)
- if err != nil {
- return fmt.Errorf("jsonData convert failed, task %d, cluster %s, datatype %s", taskId, cluster.ClusterId, "image")
- }
- cluster.ImageId = jsonData.Id
- }
- }
- }
- case "code":
- for _, result := range data.Results {
- if !result.Status {
- continue
- }
- for _, c := range result.Clusters {
- if cluster.ClusterId == c.ClusterID {
- if c.JsonData == "" {
- continue
- }
- jsonData := struct {
- Name string `json:"name"`
- Id string `json:"id"`
- }{}
- err := json.Unmarshal([]byte(c.JsonData), &jsonData)
- if err != nil {
- return fmt.Errorf("jsonData convert failed, task %d, cluster %s, datatype %s", taskId, cluster.ClusterId, "code")
- }
- cluster.CodeId = jsonData.Id
- }
- }
- }
- case "model":
- for _, result := range data.Results {
- if !result.Status {
- continue
- }
- for _, c := range result.Clusters {
- if cluster.ClusterId == c.ClusterID {
- if c.JsonData == "" {
- continue
- }
- jsonData := struct {
- Name string `json:"name"`
- Id string `json:"id"`
- }{}
- err := json.Unmarshal([]byte(c.JsonData), &jsonData)
- if err != nil {
- return fmt.Errorf("jsonData convert failed, task %d, cluster %s, datatype %s", taskId, cluster.ClusterId, "model")
- }
- cluster.ModelId = jsonData.Id
- }
- }
- }
- }
- }
- }
-
- for _, cluster := range *assignedClusters {
- if cluster.DatasetId == "" {
- return fmt.Errorf("failed to run task %d, cluster %s cannot find %s", taskId, cluster.ClusterId, "DatasetId")
- }
-
- if cluster.ImageId == "" {
- return fmt.Errorf("failed to run task %d, cluster %s cannot find %s", taskId, cluster.ClusterId, "ImageId")
- }
-
- if cluster.CodeId == "" {
- return fmt.Errorf("failed to run task %d, cluster %s cannot find %s", taskId, cluster.ClusterId, "CodeId")
- }
- }
-
- return nil
- }
|