|
|
|
@@ -57,7 +57,12 @@ func (l *ScheduleCreateTaskLogic) ScheduleCreateTask(req *types.CreateTaskReq) ( |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
taskId, err := l.createTask("SCHEDULE_TASK_"+utils.RandomString(TRAINNING_TASK_SUFFIX_LEN), req.JobResources.ScheduleStrategy, req.JobResources.Clusters) |
|
|
|
|
|
|
|
assignedClusters := copyParams([]*strategy.AssignedCluster{{ |
|
|
|
ClusterId: req.JobResources.Clusters[0].ClusterID, |
|
|
|
}}, req.JobResources.Clusters) |
|
|
|
|
|
|
|
taskId, err := l.createTask("SCHEDULE_TASK_"+utils.RandomString(TRAINNING_TASK_SUFFIX_LEN), req.JobResources.ScheduleStrategy, assignedClusters) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
@@ -66,24 +71,24 @@ func (l *ScheduleCreateTaskLogic) ScheduleCreateTask(req *types.CreateTaskReq) ( |
|
|
|
return resp, nil |
|
|
|
|
|
|
|
} else { |
|
|
|
clusterInfos, err := l.getClusterInfosByStrategy(&req.JobResources) |
|
|
|
assignedClusters, err := l.getAssignedClustersByStrategy(&req.JobResources) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
|
|
|
|
if len(clusterInfos) == 0 { |
|
|
|
if len(assignedClusters) == 0 { |
|
|
|
return nil, fmt.Errorf("failed to create task, no scheduled cluster found") |
|
|
|
} |
|
|
|
|
|
|
|
for _, info := range clusterInfos { |
|
|
|
clusters = append(clusters, info.ClusterID) |
|
|
|
for _, c := range assignedClusters { |
|
|
|
clusters = append(clusters, c.ClusterId) |
|
|
|
} |
|
|
|
|
|
|
|
schedatas, err := l.generateScheduleResult(req.DataDistributes, clusters) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
taskId, err := l.createTask("SCHEDULE_TASK_"+utils.RandomString(TRAINNING_TASK_SUFFIX_LEN), req.JobResources.ScheduleStrategy, clusterInfos) |
|
|
|
taskId, err := l.createTask("SCHEDULE_TASK_"+utils.RandomString(TRAINNING_TASK_SUFFIX_LEN), req.JobResources.ScheduleStrategy, assignedClusters) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
@@ -93,69 +98,93 @@ func (l *ScheduleCreateTaskLogic) ScheduleCreateTask(req *types.CreateTaskReq) ( |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
func (l *ScheduleCreateTaskLogic) getClusterInfosByStrategy(resources *types.JobResources) ([]*types.JobClusterInfo, error) { |
|
|
|
func (l *ScheduleCreateTaskLogic) getAssignedClustersByStrategy(resources *types.JobResources) ([]*strategy.AssignedCluster, error) { |
|
|
|
var assignedClusters []*strategy.AssignedCluster |
|
|
|
switch resources.ScheduleStrategy { |
|
|
|
case strategy.LEASTLOADFIRST: |
|
|
|
var resSpecs []*collector.ResourceSpec |
|
|
|
var resCount int |
|
|
|
for i := 0; i < QUERY_RESOURCE_RETRY; i++ { |
|
|
|
defer time.Sleep(time.Second) |
|
|
|
qResources, err := l.queryResource.queryResources(make([]string, 0)) |
|
|
|
if err != nil { |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
var resSpecs []*collector.ResourceSpec |
|
|
|
var resCount int |
|
|
|
for i := 0; i < QUERY_RESOURCE_RETRY; i++ { |
|
|
|
defer time.Sleep(time.Second) |
|
|
|
qResources, err := l.queryResource.queryResources(make([]string, 0)) |
|
|
|
if err != nil { |
|
|
|
continue |
|
|
|
} |
|
|
|
for _, resource := range qResources { |
|
|
|
if resource.Resources != nil { |
|
|
|
resCount++ |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
for _, resource := range qResources { |
|
|
|
if resource.Resources != nil { |
|
|
|
resCount++ |
|
|
|
if resCount >= 1 { |
|
|
|
resSpecs = qResources |
|
|
|
break |
|
|
|
} else { |
|
|
|
resCount = 0 |
|
|
|
continue |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if resCount >= 1 { |
|
|
|
resSpecs = qResources |
|
|
|
break |
|
|
|
} else { |
|
|
|
resCount = 0 |
|
|
|
continue |
|
|
|
if resCount == 0 { |
|
|
|
return nil, fmt.Errorf("failed to create task, resources counting fails") |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if resCount == 0 { |
|
|
|
return nil, fmt.Errorf("failed to create task, resources counting fails") |
|
|
|
} |
|
|
|
|
|
|
|
var clusterInfos []*types.JobClusterInfo |
|
|
|
switch resources.ScheduleStrategy { |
|
|
|
case strategy.LEASTLOADFIRST: |
|
|
|
strtg := strategy.NewLeastLoadFirst(TRAINNING_TASK_REPLICA, resSpecs) |
|
|
|
clusters, err := strtg.Schedule() |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
clusterInfos = filterClusterInfos(clusters, resources.Clusters) |
|
|
|
assignedClusters = copyParams(clusters, resources.Clusters) |
|
|
|
} |
|
|
|
|
|
|
|
return clusterInfos, nil |
|
|
|
return assignedClusters, nil |
|
|
|
} |
|
|
|
|
|
|
|
func filterClusterInfos(clusters []*strategy.AssignedCluster, clusterInfos []*types.JobClusterInfo) []*types.JobClusterInfo { |
|
|
|
var result []*types.JobClusterInfo |
|
|
|
for _, cinfo := range clusterInfos { |
|
|
|
for _, c := range clusters { |
|
|
|
if cinfo.ClusterID == c.ClusterId { |
|
|
|
result = append(result, cinfo) |
|
|
|
func copyParams(clusters []*strategy.AssignedCluster, clusterInfos []*types.JobClusterInfo) []*strategy.AssignedCluster { |
|
|
|
var result []*strategy.AssignedCluster |
|
|
|
|
|
|
|
for _, c := range clusters { |
|
|
|
for _, info := range clusterInfos { |
|
|
|
if c.ClusterId == info.ClusterID { |
|
|
|
var envs []string |
|
|
|
var params []string |
|
|
|
for k, v := range info.Runtime.Envs { |
|
|
|
val := common.ConvertTypeToString(v) |
|
|
|
if val != "" { |
|
|
|
env := k + storeLink.COMMA + val |
|
|
|
envs = append(envs, env) |
|
|
|
} |
|
|
|
} |
|
|
|
for k, v := range info.Runtime.Params { |
|
|
|
val := common.ConvertTypeToString(v) |
|
|
|
if val != "" { |
|
|
|
p := k + storeLink.COMMA + val |
|
|
|
params = append(params, p) |
|
|
|
} |
|
|
|
} |
|
|
|
cluster := &strategy.AssignedCluster{ |
|
|
|
ClusterId: c.ClusterId, |
|
|
|
ClusterName: c.ClusterName, |
|
|
|
Replicas: c.Replicas, |
|
|
|
Cmd: info.Runtime.Command, |
|
|
|
Envs: envs, |
|
|
|
Params: params, |
|
|
|
} |
|
|
|
result = append(result, cluster) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
return result |
|
|
|
} |
|
|
|
|
|
|
|
func (l *ScheduleCreateTaskLogic) createTask(taskName string, strategyName string, jobClusterInfo []*types.JobClusterInfo) (int64, error) { |
|
|
|
func (l *ScheduleCreateTaskLogic) createTask(taskName string, strategyName string, clusters []*strategy.AssignedCluster) (int64, error) { |
|
|
|
var synergyStatus int64 |
|
|
|
if len(jobClusterInfo) > 1 { |
|
|
|
if len(clusters) > 1 { |
|
|
|
synergyStatus = 1 |
|
|
|
} |
|
|
|
|
|
|
|
y, err := yaml.Marshal(jobClusterInfo) |
|
|
|
y, err := yaml.Marshal(clusters) |
|
|
|
if err != nil { |
|
|
|
fmt.Printf("Error while Marshaling. %v", err) |
|
|
|
} |
|
|
|
|