Browse Source

update createTasks

pull/375/head
tzwang 11 months ago
parent
commit
ff35ffef90
8 changed files with 139 additions and 71 deletions
  1. +72
    -43
      internal/logic/schedule/schedulecreatetasklogic.go
  2. +22
    -16
      internal/logic/schedule/scheduleruntasklogic.go
  3. +24
    -0
      internal/scheduler/common/common.go
  4. +3
    -12
      internal/scheduler/scheduler.go
  5. +10
    -0
      internal/scheduler/schedulers/aiScheduler.go
  6. +4
    -0
      internal/scheduler/strategy/dataLocality.go
  7. +3
    -0
      internal/scheduler/strategy/strategy.go
  8. +1
    -0
      pkg/constants/task.go

+ 72
- 43
internal/logic/schedule/schedulecreatetasklogic.go View File

@@ -57,7 +57,12 @@ func (l *ScheduleCreateTaskLogic) ScheduleCreateTask(req *types.CreateTaskReq) (
if err != nil { if err != nil {
return nil, err return nil, err
} }
taskId, err := l.createTask("SCHEDULE_TASK_"+utils.RandomString(TRAINNING_TASK_SUFFIX_LEN), req.JobResources.ScheduleStrategy, req.JobResources.Clusters)

assignedClusters := copyParams([]*strategy.AssignedCluster{{
ClusterId: req.JobResources.Clusters[0].ClusterID,
}}, req.JobResources.Clusters)

taskId, err := l.createTask("SCHEDULE_TASK_"+utils.RandomString(TRAINNING_TASK_SUFFIX_LEN), req.JobResources.ScheduleStrategy, assignedClusters)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -66,24 +71,24 @@ func (l *ScheduleCreateTaskLogic) ScheduleCreateTask(req *types.CreateTaskReq) (
return resp, nil return resp, nil


} else { } else {
clusterInfos, err := l.getClusterInfosByStrategy(&req.JobResources)
assignedClusters, err := l.getAssignedClustersByStrategy(&req.JobResources)
if err != nil { if err != nil {
return nil, err return nil, err
} }


if len(clusterInfos) == 0 {
if len(assignedClusters) == 0 {
return nil, fmt.Errorf("failed to create task, no scheduled cluster found") return nil, fmt.Errorf("failed to create task, no scheduled cluster found")
} }


for _, info := range clusterInfos {
clusters = append(clusters, info.ClusterID)
for _, c := range assignedClusters {
clusters = append(clusters, c.ClusterId)
} }


schedatas, err := l.generateScheduleResult(req.DataDistributes, clusters) schedatas, err := l.generateScheduleResult(req.DataDistributes, clusters)
if err != nil { if err != nil {
return nil, err return nil, err
} }
taskId, err := l.createTask("SCHEDULE_TASK_"+utils.RandomString(TRAINNING_TASK_SUFFIX_LEN), req.JobResources.ScheduleStrategy, clusterInfos)
taskId, err := l.createTask("SCHEDULE_TASK_"+utils.RandomString(TRAINNING_TASK_SUFFIX_LEN), req.JobResources.ScheduleStrategy, assignedClusters)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -93,69 +98,93 @@ func (l *ScheduleCreateTaskLogic) ScheduleCreateTask(req *types.CreateTaskReq) (
} }
} }


func (l *ScheduleCreateTaskLogic) getClusterInfosByStrategy(resources *types.JobResources) ([]*types.JobClusterInfo, error) {
func (l *ScheduleCreateTaskLogic) getAssignedClustersByStrategy(resources *types.JobResources) ([]*strategy.AssignedCluster, error) {
var assignedClusters []*strategy.AssignedCluster
switch resources.ScheduleStrategy {
case strategy.LEASTLOADFIRST:
var resSpecs []*collector.ResourceSpec
var resCount int
for i := 0; i < QUERY_RESOURCE_RETRY; i++ {
defer time.Sleep(time.Second)
qResources, err := l.queryResource.queryResources(make([]string, 0))
if err != nil {
continue
}


var resSpecs []*collector.ResourceSpec
var resCount int
for i := 0; i < QUERY_RESOURCE_RETRY; i++ {
defer time.Sleep(time.Second)
qResources, err := l.queryResource.queryResources(make([]string, 0))
if err != nil {
continue
}
for _, resource := range qResources {
if resource.Resources != nil {
resCount++
}
}


for _, resource := range qResources {
if resource.Resources != nil {
resCount++
if resCount >= 1 {
resSpecs = qResources
break
} else {
resCount = 0
continue
} }
} }


if resCount >= 1 {
resSpecs = qResources
break
} else {
resCount = 0
continue
if resCount == 0 {
return nil, fmt.Errorf("failed to create task, resources counting fails")
} }
}

if resCount == 0 {
return nil, fmt.Errorf("failed to create task, resources counting fails")
}


var clusterInfos []*types.JobClusterInfo
switch resources.ScheduleStrategy {
case strategy.LEASTLOADFIRST:
strtg := strategy.NewLeastLoadFirst(TRAINNING_TASK_REPLICA, resSpecs) strtg := strategy.NewLeastLoadFirst(TRAINNING_TASK_REPLICA, resSpecs)
clusters, err := strtg.Schedule() clusters, err := strtg.Schedule()
if err != nil { if err != nil {
return nil, err return nil, err
} }
clusterInfos = filterClusterInfos(clusters, resources.Clusters)
assignedClusters = copyParams(clusters, resources.Clusters)
} }


return clusterInfos, nil
return assignedClusters, nil
} }


func filterClusterInfos(clusters []*strategy.AssignedCluster, clusterInfos []*types.JobClusterInfo) []*types.JobClusterInfo {
var result []*types.JobClusterInfo
for _, cinfo := range clusterInfos {
for _, c := range clusters {
if cinfo.ClusterID == c.ClusterId {
result = append(result, cinfo)
func copyParams(clusters []*strategy.AssignedCluster, clusterInfos []*types.JobClusterInfo) []*strategy.AssignedCluster {
var result []*strategy.AssignedCluster

for _, c := range clusters {
for _, info := range clusterInfos {
if c.ClusterId == info.ClusterID {
var envs []string
var params []string
for k, v := range info.Runtime.Envs {
val := common.ConvertTypeToString(v)
if val != "" {
env := k + storeLink.COMMA + val
envs = append(envs, env)
}
}
for k, v := range info.Runtime.Params {
val := common.ConvertTypeToString(v)
if val != "" {
p := k + storeLink.COMMA + val
params = append(params, p)
}
}
cluster := &strategy.AssignedCluster{
ClusterId: c.ClusterId,
ClusterName: c.ClusterName,
Replicas: c.Replicas,
Cmd: info.Runtime.Command,
Envs: envs,
Params: params,
}
result = append(result, cluster)
} }
} }
} }
return result return result
} }


func (l *ScheduleCreateTaskLogic) createTask(taskName string, strategyName string, jobClusterInfo []*types.JobClusterInfo) (int64, error) {
func (l *ScheduleCreateTaskLogic) createTask(taskName string, strategyName string, clusters []*strategy.AssignedCluster) (int64, error) {
var synergyStatus int64 var synergyStatus int64
if len(jobClusterInfo) > 1 {
if len(clusters) > 1 {
synergyStatus = 1 synergyStatus = 1
} }


y, err := yaml.Marshal(jobClusterInfo)
y, err := yaml.Marshal(clusters)
if err != nil { if err != nil {
fmt.Printf("Error while Marshaling. %v", err) fmt.Printf("Error while Marshaling. %v", err)
} }


+ 22
- 16
internal/logic/schedule/scheduleruntasklogic.go View File

@@ -6,6 +6,7 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/strategy"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gopkg.in/yaml.v2" "gopkg.in/yaml.v2"


@@ -34,11 +35,16 @@ func (l *ScheduleRunTaskLogic) ScheduleRunTask(req *types.RunTaskReq) (resp *typ
if err != nil { if err != nil {
return nil, err return nil, err
} }

if task == nil { if task == nil {
return nil, errors.New("task not found ") return nil, errors.New("task not found ")
} }


var clusters []*types.JobClusterInfo
if task.Status == constants.Cancelled {
return nil, errors.New("task has been cancelled ")
}

var clusters []*strategy.AssignedCluster
err = yaml.Unmarshal([]byte(task.YamlString), &clusters) err = yaml.Unmarshal([]byte(task.YamlString), &clusters)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -56,21 +62,21 @@ func (l *ScheduleRunTaskLogic) ScheduleRunTask(req *types.RunTaskReq) (resp *typ
return nil, err return nil, err
} }


adapterName, err := l.svcCtx.Scheduler.AiStorages.GetAdapterNameById(ADAPTERID)
if err != nil {
return nil, err
}
for _, i := range clusters {
clusterName, _ := l.svcCtx.Scheduler.AiStorages.GetClusterNameById(i.ClusterID)
opt := &option.AiOption{}
err := l.svcCtx.Scheduler.AiStorages.SaveAiTask(task.Id, opt, adapterName, i.ClusterID, clusterName, "", constants.Saved, "")
if err != nil {
return nil, errors.New("database add failed: " + err.Error())
}
}
//adapterName, err := l.svcCtx.Scheduler.AiStorages.GetAdapterNameById(ADAPTERID)
//if err != nil {
// return nil, err
//}
//
//for _, i := range clusters {
// clusterName, _ := l.svcCtx.Scheduler.AiStorages.GetClusterNameById(i.ClusterID)
//
// opt := &option.AiOption{}
//
// err := l.svcCtx.Scheduler.AiStorages.SaveAiTask(task.Id, opt, adapterName, i.ClusterID, clusterName, "", constants.Saved, "")
// if err != nil {
// return nil, errors.New("database add failed: " + err.Error())
// }
//}


return return
} }

+ 24
- 0
internal/scheduler/common/common.go View File

@@ -15,9 +15,11 @@
package common package common


import ( import (
"encoding/json"
"github.com/go-resty/resty/v2" "github.com/go-resty/resty/v2"
"math" "math"
"math/rand" "math/rand"
"strconv"
"time" "time"
) )


@@ -134,3 +136,25 @@ func Unique(s []string) []string {
} }
return result return result
} }

func ConvertTypeToString(v interface{}) string {
switch v.(type) {

case int:
s := v.(int)
return strconv.Itoa(s)
case string:
s := v.(string)
return s
case float64:
s := strconv.FormatFloat(v.(float64), 'f', -1, 64)
return s
case int64:
s := v.(int64)
return strconv.FormatInt(s, 64)
case json.Number:
return v.(json.Number).String()
default:
return ""
}
}

+ 3
- 12
internal/scheduler/scheduler.go View File

@@ -22,7 +22,6 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/database" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/database"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/strategy" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/strategy"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/response" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/response"
"gorm.io/gorm" "gorm.io/gorm"
"sigs.k8s.io/yaml" "sigs.k8s.io/yaml"
@@ -132,7 +131,7 @@ func (s *Scheduler) TempAssign() error {
return nil return nil
} }


func (s *Scheduler) AssignAndSchedule(ss SubSchedule, mode int, assignedClusters interface{}) (interface{}, error) {
func (s *Scheduler) AssignAndSchedule(ss SubSchedule, mode int, assignedClusters []*strategy.AssignedCluster) (interface{}, error) {
var result interface{} var result interface{}
switch mode { switch mode {
case JOINT_CLOUD_MODE: case JOINT_CLOUD_MODE:
@@ -157,17 +156,9 @@ func (s *Scheduler) AssignAndSchedule(ss SubSchedule, mode int, assignedClusters
result = resp result = resp


case STORAGE_SCHEDULE_MODE: case STORAGE_SCHEDULE_MODE:
jobClusterInfos, ok := assignedClusters.([]*types.JobClusterInfo)
if !ok {
return nil, errors.New("converting JobClusterInfos fails")
}
var clusters []*strategy.AssignedCluster
for _, info := range jobClusterInfos {
cluster := &strategy.AssignedCluster{ClusterId: info.ClusterID, Replicas: 1}
clusters = append(clusters, cluster)
}

//assign tasks to clusters //assign tasks to clusters
resp, err := ss.AssignTask(clusters, mode)
resp, err := ss.AssignTask(assignedClusters, mode)
if err != nil { if err != nil {
return nil, err return nil, err
} }


+ 10
- 0
internal/scheduler/schedulers/aiScheduler.go View File

@@ -173,6 +173,16 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster, mode int
wg.Add(1) wg.Add(1)
go func() { go func() {
opt, _ := cloneAiOption(as.option) opt, _ := cloneAiOption(as.option)
// decide opt params by mode
switch mode {
case scheduler.STORAGE_SCHEDULE_MODE:
opt.Cmd = c.Cmd
opt.Envs = c.Envs
opt.Params = c.Params
default:

}

resp, err := executorMap[c.ClusterId].Execute(as.ctx, opt, mode) resp, err := executorMap[c.ClusterId].Execute(as.ctx, opt, mode)
if err != nil { if err != nil {
e := struct { e := struct {


+ 4
- 0
internal/scheduler/strategy/dataLocality.go View File

@@ -0,0 +1,4 @@
package strategy

type DataLocality struct {
}

+ 3
- 0
internal/scheduler/strategy/strategy.go View File

@@ -23,6 +23,9 @@ type AssignedCluster struct {
ClusterId string ClusterId string
ClusterName string ClusterName string
Replicas int32 Replicas int32
Cmd string
Envs []string
Params []string
} }


func GetStrategyNames() []string { func GetStrategyNames() []string {


+ 1
- 0
pkg/constants/task.go View File

@@ -29,4 +29,5 @@ const (
Pending = "Pending" Pending = "Pending"
Stopped = "Stopped" Stopped = "Stopped"
Deploying = "Deploying" Deploying = "Deploying"
Cancelled = "Cancelled"
) )

Loading…
Cancel
Save