|
|
|
@@ -2,12 +2,16 @@ package core |
|
|
|
|
|
|
|
import ( |
|
|
|
"context" |
|
|
|
"errors" |
|
|
|
"fmt" |
|
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" |
|
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" |
|
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" |
|
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" |
|
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" |
|
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils/timeutils" |
|
|
|
"strconv" |
|
|
|
"sync" |
|
|
|
"time" |
|
|
|
|
|
|
|
"github.com/zeromicro/go-zero/core/logx" |
|
|
|
@@ -53,8 +57,9 @@ func (l *PageListTaskLogic) PageListTask(req *types.PageTaskReq) (resp *types.Pa |
|
|
|
} |
|
|
|
|
|
|
|
// 更新智算任务状态 |
|
|
|
var ch = make(chan struct{}) |
|
|
|
go l.updateAitaskStatus(list, ch) |
|
|
|
chs := [2]chan struct{}{make(chan struct{}), make(chan struct{})} |
|
|
|
go l.updateTaskStatus(list, chs[0]) |
|
|
|
go l.updateAiTaskStatus(list, chs[1]) |
|
|
|
|
|
|
|
for _, model := range list { |
|
|
|
if model.StartTime != "" && model.EndTime == "" { |
|
|
|
@@ -72,15 +77,18 @@ func (l *PageListTaskLogic) PageListTask(req *types.PageTaskReq) (resp *types.Pa |
|
|
|
resp.PageNum = req.PageNum |
|
|
|
resp.Total = total |
|
|
|
|
|
|
|
select { |
|
|
|
case _ = <-ch: |
|
|
|
return resp, nil |
|
|
|
case <-time.After(1 * time.Second): |
|
|
|
return resp, nil |
|
|
|
for _, ch := range chs { |
|
|
|
select { |
|
|
|
case <-ch: |
|
|
|
return |
|
|
|
case <-time.After(1 * time.Second): |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
func (l *PageListTaskLogic) updateAitaskStatus(tasks []*types.TaskModel, ch chan<- struct{}) { |
|
|
|
func (l *PageListTaskLogic) updateTaskStatus(tasks []*types.TaskModel, ch chan<- struct{}) { |
|
|
|
for _, task := range tasks { |
|
|
|
if task.AdapterTypeDict != 1 { |
|
|
|
continue |
|
|
|
@@ -150,8 +158,62 @@ func (l *PageListTaskLogic) updateAitaskStatus(tasks []*types.TaskModel, ch chan |
|
|
|
|
|
|
|
tx = l.svcCtx.DbEngin.Table("task").Updates(task) |
|
|
|
if tx.Error != nil { |
|
|
|
logx.Errorf(tx.Error.Error()) |
|
|
|
return |
|
|
|
} |
|
|
|
} |
|
|
|
ch <- struct{}{} |
|
|
|
} |
|
|
|
|
|
|
|
func (l *PageListTaskLogic) updateAiTaskStatus(tasks []*types.TaskModel, ch chan<- struct{}) { |
|
|
|
var wg sync.WaitGroup |
|
|
|
for _, task := range tasks { |
|
|
|
if task.AdapterTypeDict != 1 { |
|
|
|
continue |
|
|
|
} |
|
|
|
if task.Status == constants.Succeeded { |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
var aiTaskList []*models.TaskAi |
|
|
|
tx := l.svcCtx.DbEngin.Raw("select * from task_ai where `task_id` = ? ", task.Id).Scan(&aiTaskList) |
|
|
|
if tx.Error != nil { |
|
|
|
logx.Errorf(tx.Error.Error()) |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
if len(aiTaskList) == 0 { |
|
|
|
continue |
|
|
|
} |
|
|
|
|
|
|
|
for _, aitask := range aiTaskList { |
|
|
|
t := aitask |
|
|
|
if t.Status == constants.Completed { |
|
|
|
continue |
|
|
|
} |
|
|
|
wg.Add(1) |
|
|
|
go func() { |
|
|
|
trainingTask, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[strconv.FormatInt(t.AdapterId, 10)][strconv.FormatInt(t.ClusterId, 10)].GetTrainingTask(l.ctx, t.JobId) |
|
|
|
if err != nil { |
|
|
|
msg := fmt.Sprintf("AiTaskId: %v, clusterId: %v , JobId: %v, error: %v \n", t.Id, t.ClusterId, t.JobId, err.Error()) |
|
|
|
logx.Errorf(errors.New(msg).Error()) |
|
|
|
wg.Done() |
|
|
|
return |
|
|
|
} |
|
|
|
t.Status = trainingTask.Status |
|
|
|
t.StartTime = trainingTask.Start |
|
|
|
t.EndTime = trainingTask.End |
|
|
|
err = l.svcCtx.Scheduler.AiStorages.UpdateAiTask(t) |
|
|
|
if err != nil { |
|
|
|
msg := fmt.Sprintf("AiTaskId: %v, clusterId: %v , JobId: %v, error: %v \n", t.Id, t.ClusterId, t.JobId, err.Error()) |
|
|
|
logx.Errorf(errors.New(msg).Error()) |
|
|
|
wg.Done() |
|
|
|
return |
|
|
|
} |
|
|
|
wg.Done() |
|
|
|
}() |
|
|
|
} |
|
|
|
} |
|
|
|
wg.Wait() |
|
|
|
ch <- struct{}{} |
|
|
|
} |