|
|
|
@@ -2,6 +2,9 @@ package schedule |
|
|
|
|
|
|
|
import ( |
|
|
|
"context" |
|
|
|
"errors" |
|
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" |
|
|
|
"strconv" |
|
|
|
|
|
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" |
|
|
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" |
|
|
|
@@ -26,11 +29,33 @@ func NewScheduleGetAiJobLogLogLogic(ctx context.Context, svcCtx *svc.ServiceCont |
|
|
|
func (l *ScheduleGetAiJobLogLogLogic) ScheduleGetAiJobLogLog(req *types.AiJobLogReq) (resp *types.AiJobLogResp, err error) { |
|
|
|
resp = &types.AiJobLogResp{} |
|
|
|
|
|
|
|
id, err := l.svcCtx.Scheduler.AiStorages.GetAiTaskIdByClusterIdAndTaskId(req.ClusterId, req.TaskId) |
|
|
|
taskId, err := strconv.ParseInt(req.TaskId, 10, 64) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
log, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].GetTrainingTaskLog(l.ctx, id, req.InstanceNum) |
|
|
|
task, err := l.svcCtx.Scheduler.AiStorages.GetTaskById(taskId) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
|
|
|
|
aiTasks, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByTaskId(req.TaskId) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
|
|
|
|
if len(aiTasks) == 0 && task.Status == constants.Failed { |
|
|
|
return nil, errors.New("submit failed, no log available") |
|
|
|
} else if len(aiTasks) > 1 { |
|
|
|
return nil, errors.New("multiple ai task found") |
|
|
|
} |
|
|
|
|
|
|
|
aiTask := aiTasks[0] |
|
|
|
adapterId := strconv.FormatInt(aiTask.AdapterId, 10) |
|
|
|
clusterId := strconv.FormatInt(aiTask.ClusterId, 10) |
|
|
|
|
|
|
|
jobId := aiTask.JobId |
|
|
|
|
|
|
|
log, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[adapterId][clusterId].GetTrainingTaskLog(l.ctx, jobId, req.InstanceNum) |
|
|
|
if err != nil { |
|
|
|
return nil, err |
|
|
|
} |
|
|
|
|