Browse Source

added ai center overview apis

Former-commit-id: 02ea4ae3c9
pull/130/head
tzwang 1 year ago
parent
commit
9bd725380e
4 changed files with 63 additions and 26 deletions
  1. +38
    -0
      api/desc/ai/pcm-ai.api
  2. +19
    -0
      api/desc/pcm.api
  3. +3
    -0
      api/desc/schedule/pcm-schedule.api
  4. +3
    -26
      api/internal/scheduler/scheduler.go

+ 38
- 0
api/desc/ai/pcm-ai.api View File

@@ -1697,6 +1697,44 @@ PayloadCreateTrainJob{
jobId string `json:"jobId,optional"`
}
********************/

/******************Ai Center overview*************************/
CenterOverviewResp {
CenterNum int32 `json:"totalCenters,optional"`
TaskNum int32 `json:"totalTasks,optional"`
CardNum int32 `json:"totalCards,optional"`
PowerInTops float64 `json:"totalPower,optional"`
}

CenterQueueingResp {
Current []*CenterQueue `json:"current,optional"`
History []*CenterQueue `json:"history,optional"`
}

CenterQueue {
Name string `json:"name,optional"`
QueueingNum int32 `json:"num,optional"`
}

CenterListResp {
List []*Center `json:"centerList,optional"`
}

Center {
Name string `json:"name,optional"`
StackName string `json:"stack,optional"`
Version string `json:"version,optional"`
}

CenterTaskListResp {
List []*AiTask `json:"taskList,optional"`
}

AiTask {
Name string `json:"name,optional"`
status string `json:"status,optional"`
TimeElapsed int32 `json:"elapsed,optional"`
}
)

/******************create TrainIngJob end*************************/


+ 19
- 0
api/desc/pcm.api View File

@@ -219,6 +219,22 @@ service pcm {
group: ai
)
service pcm {
@doc "智算中心概览"
@handler getCenterOverviewHandler
get /ai/getCenterOverview returns (CenterOverviewResp)

@doc "智算中心排队状况"
@handler getCenterQueueingHandler
get /ai/getCenterQueueing returns (CenterQueueingResp)

@doc "智算中心列表"
@handler getCenterListHandler
get /ai/getCenterList returns (CenterListResp)

@doc "智算中心任务列表"
@handler getCenterTaskListHandler
get /ai/getCenterTaskList returns (CenterTaskListResp)

@doc "查询数据集列表"
@handler listDataSetHandler
get /ai/listDataSet/:projectId (DataSetReq) returns (DataSetResp)
@@ -927,6 +943,9 @@ service pcm {

@handler ScheduleSubmitHandler
post /schedule/submit (ScheduleReq) returns (ScheduleResp)

@handler ScheduleGetOverviewHandler
post /schedule/getOverview returns (ScheduleOverviewResp)
}

@server(


+ 3
- 0
api/desc/schedule/pcm-schedule.api View File

@@ -24,6 +24,9 @@ type (
Msg string `json:"msg"`
}

ScheduleOverviewResp {
}

AiOption {
TaskName string `json:"taskName"`
AdapterId string `json:"adapterId"`


+ 3
- 26
api/internal/scheduler/scheduler.go View File

@@ -129,42 +129,19 @@ func (s *Scheduler) TempAssign() error {
}

func (s *Scheduler) AssignAndSchedule(ss SubSchedule) (interface{}, error) {
//// 已指定 ParticipantId
//if s.task.ParticipantId != 0 {
// return nil
//}
//// 标签匹配以及后,未找到ParticipantIds
//if len(s.participantIds) == 0 {
// return errors.New("未找到匹配的ParticipantIds")
//}
//
//// 指定或者标签匹配的结果只有一个集群,给任务信息指定
//if len(s.participantIds) == 1 {
// s.task.ParticipantId = s.participantIds[0]
// //replicas := s.task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64)
// //result := make(map[int64]string)
// //result[s.participantIds[0]] = strconv.FormatFloat(replicas, 'f', 2, 64)
// //s.result = result
//
// return nil
//}

//choose strategy
strategy, err := ss.PickOptimalStrategy()
if err != nil {
return nil, err
}

//schedule
clusters, err := strategy.Schedule()
if err != nil {
return nil, err
}

//集群数量不满足,指定到标签匹配后第一个集群
//if len(providerList) < 2 {
// s.task.ParticipantId = s.participantIds[0]
// return nil
//}

//assign tasks to clusters
resp, err := ss.AssignTask(clusters)
if err != nil {
return nil, err


Loading…
Cancel
Save