|
|
|
@@ -768,43 +768,118 @@ func GetNotebookImageName(imageId string) (string, error) { |
|
|
|
return imageName, nil |
|
|
|
} |
|
|
|
|
|
|
|
func ProcessTrainJobInfo(task *models.Cloudbrain) error { |
|
|
|
func HandleTrainJobInfo(task *models.Cloudbrain) error { |
|
|
|
if strings.HasPrefix(task.JobID, models.TempJobIdPrefix) { |
|
|
|
|
|
|
|
if task.VersionCount > VersionCountOne { |
|
|
|
//multi version |
|
|
|
result, err := GetTrainJobVersionList(1000, 1, strings.TrimPrefix(task.JobID, models.TempJobIdPrefix)) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetTrainJobVersionList(%s) failed:%v", task.JobName, err) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
if result != nil { |
|
|
|
//todo: get the cb record count and check the count == |
|
|
|
if len(result.JobVersionList) == task.VersionCount { |
|
|
|
log.Info("find the record(%s)", task.DisplayJobName) |
|
|
|
task.Status = TransTrainJobStatus(result.JobVersionList[0].IntStatus) |
|
|
|
task.VersionName = result.JobVersionList[0].VersionName |
|
|
|
task.VersionID = result.JobVersionList[0].VersionID |
|
|
|
err = models.UpdateJob(task) |
|
|
|
if err != nil { |
|
|
|
log.Error("UpdateJob(%s) failed:%v", task.JobName, err) |
|
|
|
return err |
|
|
|
} |
|
|
|
temp, err := models.GetCloudbrainTempByCloudbrainID(task.ID) |
|
|
|
if err != nil { |
|
|
|
log.Error("no such temp record(%s):%v", task.DisplayJobName, err.Error()) |
|
|
|
return err |
|
|
|
} |
|
|
|
err = models.DeleteCloudbrainTemp(temp) |
|
|
|
if err != nil { |
|
|
|
log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
return err |
|
|
|
} |
|
|
|
return nil |
|
|
|
} else { |
|
|
|
log.Error("can not find the record(%s) until now", task.DisplayJobName) |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
} else { |
|
|
|
//inference or one version |
|
|
|
result, err := GetTrainJobList(1000, 1, "create_time", "desc", task.JobName) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetTrainJobList(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
if result != nil { |
|
|
|
isExist := false |
|
|
|
for _, job := range result.JobList { |
|
|
|
if task.JobName == job.JobName { |
|
|
|
log.Info("find the record(%s)", task.DisplayJobName) |
|
|
|
isExist = true |
|
|
|
task.Status = TransTrainJobStatus(job.IntStatus) |
|
|
|
task.JobID = strconv.FormatInt(job.JobID, 10) |
|
|
|
err = models.UpdateJob(task) |
|
|
|
if err != nil { |
|
|
|
log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
return err |
|
|
|
} |
|
|
|
temp, err := models.GetCloudbrainTempByCloudbrainID(task.ID) |
|
|
|
if err != nil { |
|
|
|
log.Error("no such temp record(%s):%v", task.DisplayJobName, err.Error()) |
|
|
|
return err |
|
|
|
} |
|
|
|
err = models.DeleteCloudbrainTemp(temp) |
|
|
|
if err != nil { |
|
|
|
log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
return err |
|
|
|
} |
|
|
|
return nil |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//todo: move |
|
|
|
if !isExist { |
|
|
|
log.Error("can not find the record(%s) until now", task.DisplayJobName) |
|
|
|
//temp.QueryTimes = temp.QueryTimes + 1 |
|
|
|
} else { |
|
|
|
log.Info("find the record(%s)", task.DisplayJobName) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
} else { |
|
|
|
//normal |
|
|
|
} |
|
|
|
result, err := GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) |
|
|
|
if err != nil { |
|
|
|
log.Error("GetTrainJob(%s) failed:%v", task.JobName, err) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
if result != nil { |
|
|
|
task.Status = TransTrainJobStatus(result.IntStatus) |
|
|
|
task.Duration = result.Duration / 1000 |
|
|
|
task.TrainJobDuration = result.TrainJobDuration |
|
|
|
|
|
|
|
if task.StartTime == 0 && result.StartTime > 0 { |
|
|
|
task.StartTime = timeutil.TimeStamp(result.StartTime / 1000) |
|
|
|
} |
|
|
|
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) |
|
|
|
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { |
|
|
|
task.EndTime = task.StartTime.Add(task.Duration) |
|
|
|
} |
|
|
|
task.CorrectCreateUnix() |
|
|
|
err = models.UpdateJob(task) |
|
|
|
result, err := GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) |
|
|
|
if err != nil { |
|
|
|
log.Error("UpdateJob(%s) failed:%v", task.JobName, err) |
|
|
|
log.Error("GetTrainJob(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
return err |
|
|
|
} |
|
|
|
|
|
|
|
if result != nil { |
|
|
|
task.Status = TransTrainJobStatus(result.IntStatus) |
|
|
|
task.Duration = result.Duration / 1000 |
|
|
|
task.TrainJobDuration = result.TrainJobDuration |
|
|
|
|
|
|
|
if task.StartTime == 0 && result.StartTime > 0 { |
|
|
|
task.StartTime = timeutil.TimeStamp(result.StartTime / 1000) |
|
|
|
} |
|
|
|
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) |
|
|
|
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { |
|
|
|
task.EndTime = task.StartTime.Add(task.Duration) |
|
|
|
} |
|
|
|
task.CorrectCreateUnix() |
|
|
|
err = models.UpdateJob(task) |
|
|
|
if err != nil { |
|
|
|
log.Error("UpdateJob(%s) failed:%v", task.JobName, err) |
|
|
|
return err |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//temp |
|
|
|
return nil |
|
|
|
} |