From 998998f13a4f11b069eecae52865f9b29a114e8d Mon Sep 17 00:00:00 2001 From: tzwang Date: Tue, 26 Aug 2025 15:58:34 +0800 Subject: [PATCH 1/2] update aijoblog --- .../schedule/schedulegetaijoblogloglogic.go | 19 +++++++++++++++++-- internal/scheduler/database/aiStorage.go | 11 +++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/internal/logic/schedule/schedulegetaijoblogloglogic.go b/internal/logic/schedule/schedulegetaijoblogloglogic.go index a8529b1c..2c169bb1 100644 --- a/internal/logic/schedule/schedulegetaijoblogloglogic.go +++ b/internal/logic/schedule/schedulegetaijoblogloglogic.go @@ -2,6 +2,8 @@ package schedule import ( "context" + "errors" + "strconv" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" @@ -26,11 +28,24 @@ func NewScheduleGetAiJobLogLogLogic(ctx context.Context, svcCtx *svc.ServiceCont func (l *ScheduleGetAiJobLogLogLogic) ScheduleGetAiJobLogLog(req *types.AiJobLogReq) (resp *types.AiJobLogResp, err error) { resp = &types.AiJobLogResp{} - id, err := l.svcCtx.Scheduler.AiStorages.GetAiTaskIdByClusterIdAndTaskId(req.ClusterId, req.TaskId) + aiTasks, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByTaskId(req.TaskId) if err != nil { return nil, err } - log, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].GetTrainingTaskLog(l.ctx, id, req.InstanceNum) + + if len(aiTasks) == 0 { + return nil, errors.New("no ai task not found") + } else if len(aiTasks) > 1 { + return nil, errors.New("multiple ai task found") + } + + aiTask := aiTasks[0] + adapterId := strconv.FormatInt(aiTask.AdapterId, 10) + clusterId := strconv.FormatInt(aiTask.ClusterId, 10) + + jobId := aiTask.JobId + + log, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[adapterId][clusterId].GetTrainingTaskLog(l.ctx, jobId, req.InstanceNum) if err != nil { return nil, err } diff --git a/internal/scheduler/database/aiStorage.go b/internal/scheduler/database/aiStorage.go index 3dc99ca6..4937b444 100644 --- a/internal/scheduler/database/aiStorage.go +++ b/internal/scheduler/database/aiStorage.go @@ -99,6 +99,17 @@ func (s *AiStorage) GetAiTasksByAdapterId(adapterId string) ([]*models.TaskAi, e return resp, nil } +func (s *AiStorage) GetAiTasksByTaskId(taskId string) ([]*models.TaskAi, error) { + var resp []*models.TaskAi + db := s.DbEngin.Model(&models.TaskAi{}).Table("task_ai") + db = db.Where("task_id = ?", taskId) + err := db.Order("commit_time desc").Find(&resp).Error + if err != nil { + return nil, err + } + return resp, nil +} + func (s *AiStorage) GetAiTaskListById(id int64) ([]*models.TaskAi, error) { var aiTaskList []*models.TaskAi tx := s.DbEngin.Raw("select * from task_ai where `task_id` = ? ", id).Scan(&aiTaskList) From f67589382f753e60f3a7117fb59dd0818f1fa922 Mon Sep 17 00:00:00 2001 From: tzwang Date: Tue, 26 Aug 2025 16:10:53 +0800 Subject: [PATCH 2/2] update aijoblog --- .../logic/schedule/schedulegetaijoblogloglogic.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/internal/logic/schedule/schedulegetaijoblogloglogic.go b/internal/logic/schedule/schedulegetaijoblogloglogic.go index 2c169bb1..22d10f6d 100644 --- a/internal/logic/schedule/schedulegetaijoblogloglogic.go +++ b/internal/logic/schedule/schedulegetaijoblogloglogic.go @@ -3,6 +3,7 @@ package schedule import ( "context" "errors" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "strconv" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" @@ -28,13 +29,22 @@ func NewScheduleGetAiJobLogLogLogic(ctx context.Context, svcCtx *svc.ServiceCont func (l *ScheduleGetAiJobLogLogLogic) ScheduleGetAiJobLogLog(req *types.AiJobLogReq) (resp *types.AiJobLogResp, err error) { resp = &types.AiJobLogResp{} + taskId, err := strconv.ParseInt(req.TaskId, 10, 64) + if err != nil { + return nil, err + } + task, err := l.svcCtx.Scheduler.AiStorages.GetTaskById(taskId) + if err != nil { + return nil, err + } + aiTasks, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByTaskId(req.TaskId) if err != nil { return nil, err } - if len(aiTasks) == 0 { - return nil, errors.New("no ai task not found") + if len(aiTasks) == 0 && task.Status == constants.Failed { + return nil, errors.New("submit failed, no log available") } else if len(aiTasks) > 1 { return nil, errors.New("multiple ai task found") }