|
|
@@ -509,7 +509,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job |
|
|
RepoID: repo.ID, |
|
|
RepoID: repo.ID, |
|
|
Type: models.TypeCloudBrainTwo, |
|
|
Type: models.TypeCloudBrainTwo, |
|
|
JobTypes: jobTypes, |
|
|
JobTypes: jobTypes, |
|
|
JobName: req.JobName, |
|
|
|
|
|
|
|
|
JobID: jobId, |
|
|
}) |
|
|
}) |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
ctx.ServerError("Cloudbrain", err) |
|
|
ctx.ServerError("Cloudbrain", err) |
|
|
@@ -519,10 +519,10 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job |
|
|
|
|
|
|
|
|
createTime := timeutil.TimeStampNow() |
|
|
createTime := timeutil.TimeStampNow() |
|
|
task := &models.Cloudbrain{ |
|
|
task := &models.Cloudbrain{ |
|
|
Status: string(models.ModelArtsTrainJobWaiting), |
|
|
|
|
|
|
|
|
Status: models.JobStatusTemp, |
|
|
UserID: ctx.User.ID, |
|
|
UserID: ctx.User.ID, |
|
|
RepoID: ctx.Repo.Repository.ID, |
|
|
RepoID: ctx.Repo.Repository.ID, |
|
|
JobID: models.TempJobIdPrefix + jobId, |
|
|
|
|
|
|
|
|
JobID: jobId, |
|
|
JobName: req.JobName, |
|
|
JobName: req.JobName, |
|
|
DisplayJobName: req.DisplayJobName, |
|
|
DisplayJobName: req.DisplayJobName, |
|
|
JobType: string(models.JobTypeTrain), |
|
|
JobType: string(models.JobTypeTrain), |
|
|
@@ -846,17 +846,17 @@ func GetNotebookImageName(imageId string) (string, error) { |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
func HandleTrainJobInfo(task *models.Cloudbrain) error { |
|
|
func HandleTrainJobInfo(task *models.Cloudbrain) error { |
|
|
if strings.HasPrefix(task.JobID, models.TempJobIdPrefix) { |
|
|
|
|
|
|
|
|
if isTempJob(task.JobID, task.Status) { |
|
|
if task.VersionCount > VersionCountOne { |
|
|
if task.VersionCount > VersionCountOne { |
|
|
//multi version |
|
|
//multi version |
|
|
result, err := GetTrainJobVersionList(1000, 1, strings.TrimPrefix(task.JobID, models.TempJobIdPrefix)) |
|
|
result, err := GetTrainJobVersionList(1000, 1, strings.TrimPrefix(task.JobID, models.TempJobIdPrefix)) |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
log.Error("GetTrainJobVersionList(%s) failed:%v", task.JobName, err) |
|
|
|
|
|
|
|
|
log.Error("GetTrainJobVersionList failed:%v", err) |
|
|
return err |
|
|
return err |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if result != nil { |
|
|
if result != nil { |
|
|
if result.JobID == task.JobID && result.JobName == task.JobName { |
|
|
|
|
|
|
|
|
if strconv.FormatInt(result.JobID, 10) == task.JobID && result.JobName == task.JobName { |
|
|
if result.VersionCount == int64(task.VersionCount) { |
|
|
if result.VersionCount == int64(task.VersionCount) { |
|
|
log.Info("find the record(%s)", task.DisplayJobName) |
|
|
log.Info("find the record(%s)", task.DisplayJobName) |
|
|
task.Status = TransTrainJobStatus(result.JobVersionList[0].IntStatus) |
|
|
task.Status = TransTrainJobStatus(result.JobVersionList[0].IntStatus) |
|
|
@@ -871,13 +871,13 @@ func HandleTrainJobInfo(task *models.Cloudbrain) error { |
|
|
temp, err := models.GetCloudbrainTempByCloudbrainID(task.ID) |
|
|
temp, err := models.GetCloudbrainTempByCloudbrainID(task.ID) |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
log.Error("no such temp record(%s):%v", task.DisplayJobName, err.Error()) |
|
|
log.Error("no such temp record(%s):%v", task.DisplayJobName, err.Error()) |
|
|
return err |
|
|
|
|
|
} |
|
|
|
|
|
err = models.DeleteCloudbrainTemp(temp) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
|
|
return err |
|
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
err = models.DeleteCloudbrainTemp(temp) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
return nil |
|
|
return nil |
|
|
} else { |
|
|
} else { |
|
|
log.Error("can not find the record(%s) until now", task.DisplayJobName) |
|
|
log.Error("can not find the record(%s) until now", task.DisplayJobName) |
|
|
@@ -890,16 +890,14 @@ func HandleTrainJobInfo(task *models.Cloudbrain) error { |
|
|
//inference or one version |
|
|
//inference or one version |
|
|
result, err := GetTrainJobList(1000, 1, "create_time", "desc", task.JobName) |
|
|
result, err := GetTrainJobList(1000, 1, "create_time", "desc", task.JobName) |
|
|
if err != nil { |
|
|
if err != nil { |
|
|
log.Error("GetTrainJobList(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
|
|
|
|
|
log.Error("GetTrainJobList failed:%v", err) |
|
|
return err |
|
|
return err |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if result != nil { |
|
|
if result != nil { |
|
|
isExist := false |
|
|
|
|
|
for _, job := range result.JobList { |
|
|
for _, job := range result.JobList { |
|
|
if task.JobName == job.JobName { |
|
|
if task.JobName == job.JobName { |
|
|
log.Info("find the record(%s)", task.DisplayJobName) |
|
|
log.Info("find the record(%s)", task.DisplayJobName) |
|
|
isExist = true |
|
|
|
|
|
task.Status = TransTrainJobStatus(job.IntStatus) |
|
|
task.Status = TransTrainJobStatus(job.IntStatus) |
|
|
task.JobID = strconv.FormatInt(job.JobID, 10) |
|
|
task.JobID = strconv.FormatInt(job.JobID, 10) |
|
|
|
|
|
|
|
|
@@ -921,14 +919,6 @@ func HandleTrainJobInfo(task *models.Cloudbrain) error { |
|
|
return nil |
|
|
return nil |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
//todo: move |
|
|
|
|
|
if !isExist { |
|
|
|
|
|
log.Error("can not find the record(%s) until now", task.DisplayJobName) |
|
|
|
|
|
//temp.QueryTimes = temp.QueryTimes + 1 |
|
|
|
|
|
} else { |
|
|
|
|
|
log.Info("find the record(%s)", task.DisplayJobName) |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
} |
|
|
} |
|
|
@@ -963,3 +953,85 @@ func HandleTrainJobInfo(task *models.Cloudbrain) error { |
|
|
|
|
|
|
|
|
return nil |
|
|
return nil |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func HandleNotebookInfo(task *models.Cloudbrain) error { |
|
|
|
|
|
if isTempJob(task.JobID, task.Status) { |
|
|
|
|
|
result, err := GetNotebookList(1000, 0, "createTime", "DESC", task.JobName) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("GetNotebookList failed:%v", err) |
|
|
|
|
|
return err |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if result != nil { |
|
|
|
|
|
count, err := models.GetCloudbrainCountByJobName(task.JobName, task.JobType) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("GetCloudbrainCountByJobName failed:%v", err) |
|
|
|
|
|
return err |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if len(result.NotebookList) == count { |
|
|
|
|
|
if result.NotebookList[0].JobName == task.JobName { |
|
|
|
|
|
log.Info("find the record(%s)", task.DisplayJobName) |
|
|
|
|
|
task.Status = result.NotebookList[0].Status |
|
|
|
|
|
task.JobID = result.NotebookList[0].JobID |
|
|
|
|
|
|
|
|
|
|
|
err = models.UpdateJob(task) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("UpdateJob(%s) failed:%v", task.JobName, err) |
|
|
|
|
|
return err |
|
|
|
|
|
} |
|
|
|
|
|
temp, err := models.GetCloudbrainTempByCloudbrainID(task.ID) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("no such temp record(%s):%v", task.DisplayJobName, err.Error()) |
|
|
|
|
|
return err |
|
|
|
|
|
} |
|
|
|
|
|
err = models.DeleteCloudbrainTemp(temp) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
|
|
return err |
|
|
|
|
|
} |
|
|
|
|
|
return nil |
|
|
|
|
|
} else { |
|
|
|
|
|
log.Error("can not find the record(%s) until now", task.DisplayJobName) |
|
|
|
|
|
} |
|
|
|
|
|
} else { |
|
|
|
|
|
log.Error("can not find the record(%s) until now", task.DisplayJobName) |
|
|
|
|
|
} |
|
|
|
|
|
} else { |
|
|
|
|
|
log.Error("can not find the record(%s) until now", task.DisplayJobName) |
|
|
|
|
|
} |
|
|
|
|
|
} else { |
|
|
|
|
|
//normal |
|
|
|
|
|
result, err := GetNotebook2(task.JobID) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
|
|
return err |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if result != nil { |
|
|
|
|
|
task.Status = result.Status |
|
|
|
|
|
if task.StartTime == 0 && result.Lease.UpdateTime > 0 { |
|
|
|
|
|
task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000) |
|
|
|
|
|
} |
|
|
|
|
|
if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { |
|
|
|
|
|
task.EndTime = timeutil.TimeStampNow() |
|
|
|
|
|
} |
|
|
|
|
|
task.CorrectCreateUnix() |
|
|
|
|
|
task.ComputeAndSetDuration() |
|
|
|
|
|
err = models.UpdateJob(task) |
|
|
|
|
|
if err != nil { |
|
|
|
|
|
log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err) |
|
|
|
|
|
return err |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return nil |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
func isTempJob(jobID, status string) bool { |
|
|
|
|
|
if (strings.HasPrefix(jobID, models.TempJobIdPrefix) && status == string(models.ModelArtsTrainJobWaiting)) || status == models.JobStatusTemp { |
|
|
|
|
|
return true |
|
|
|
|
|
} |
|
|
|
|
|
return false |
|
|
|
|
|
} |