| @@ -60,7 +60,7 @@ type Cloudbrain struct { | |||||
| ContainerIp string | ContainerIp string | ||||
| CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"` | CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"` | ||||
| UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` | UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` | ||||
| Duration int `xorm:"INDEX duration"` | |||||
| Duration int64 `xorm:"INDEX duration"` | |||||
| TrainJobDuration string | TrainJobDuration string | ||||
| DeletedAt time.Time `xorm:"deleted"` | DeletedAt time.Time `xorm:"deleted"` | ||||
| CanDebug bool `xorm:"-"` | CanDebug bool `xorm:"-"` | ||||
| @@ -933,7 +933,7 @@ func SetCloudbrainStatusByJobID(jobID string, status CloudbrainStatus) (err erro | |||||
| return | return | ||||
| } | } | ||||
| func SetTrainJobStatusByJobID(jobID string, status string, duration int, trainjobduration string) (err error) { | |||||
| func SetTrainJobStatusByJobID(jobID string, status string, duration int64, trainjobduration string) (err error) { | |||||
| cb := &Cloudbrain{JobID: jobID, Status: string(status), Duration: duration, TrainJobDuration: trainjobduration} | cb := &Cloudbrain{JobID: jobID, Status: string(status), Duration: duration, TrainJobDuration: trainjobduration} | ||||
| _, err = x.Cols("status", "duration", "train_job_duration").Where("cloudbrain.job_id=?", jobID).Update(cb) | _, err = x.Cols("status", "duration", "train_job_duration").Where("cloudbrain.job_id=?", jobID).Update(cb) | ||||
| return | return | ||||
| @@ -221,7 +221,7 @@ func TransTrainJobStatus(status int) string { | |||||
| case 0: | case 0: | ||||
| return "UNKNOWN" | return "UNKNOWN" | ||||
| case 1: | case 1: | ||||
| return "CREATING" | |||||
| return "INIT" | |||||
| case 2: | case 2: | ||||
| return "IMAGE_CREATING" | return "IMAGE_CREATING" | ||||
| case 3: | case 3: | ||||
| @@ -237,13 +237,13 @@ func TransTrainJobStatus(status int) string { | |||||
| case 8: | case 8: | ||||
| return "RUNNING" | return "RUNNING" | ||||
| case 9: | case 9: | ||||
| return "STOPPED" | |||||
| return "KILLING" | |||||
| case 10: | case 10: | ||||
| return "COMPLETED" | return "COMPLETED" | ||||
| case 11: | case 11: | ||||
| return "FAILED" | return "FAILED" | ||||
| case 12: | case 12: | ||||
| return "STOPPED" | |||||
| return "KILLED" | |||||
| case 13: | case 13: | ||||
| return "CANCELED" | return "CANCELED" | ||||
| case 14: | case 14: | ||||
| @@ -64,6 +64,8 @@ func GetModelArtsTrainJob(ctx *context.APIContext) { | |||||
| } | } | ||||
| job.Status = modelarts.TransTrainJobStatus(result.IntStatus) | job.Status = modelarts.TransTrainJobStatus(result.IntStatus) | ||||
| job.Duration = result.Duration | |||||
| job.TrainJobDuration = result.TrainJobDuration | |||||
| err = models.UpdateJob(job) | err = models.UpdateJob(job) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("UpdateJob failed:", err) | log.Error("UpdateJob failed:", err) | ||||
| @@ -506,43 +506,7 @@ func TrainJobIndex(ctx *context.Context) { | |||||
| page = 1 | page = 1 | ||||
| } | } | ||||
| tasks, _, err := models.Cloudbrains(&models.CloudbrainsOptions{ | |||||
| ListOptions: models.ListOptions{ | |||||
| Page: page, | |||||
| PageSize: setting.UI.IssuePagingNum, | |||||
| }, | |||||
| RepoID: repo.ID, | |||||
| Type: models.TypeCloudBrainTrainJob, | |||||
| }) | |||||
| if err != nil { | |||||
| ctx.ServerError("Cloudbrain", err) | |||||
| return | |||||
| } | |||||
| for i := range tasks { | |||||
| TrainJobDetail, err := modelarts.GetTrainJob(tasks[i].Cloudbrain.JobID, strconv.FormatInt(tasks[i].Cloudbrain.VersionID, 10)) | |||||
| if TrainJobDetail != nil { | |||||
| TrainJobDetail.CreateTime = time.Unix(int64(TrainJobDetail.LongCreateTime/1000), 0).Format("2006-01-02 15:04:05") | |||||
| if TrainJobDetail.Duration != 0 { | |||||
| TrainJobDetail.TrainJobDuration = addZero(TrainJobDetail.Duration/3600000) + ":" + addZero(TrainJobDetail.Duration%3600000/60000) + ":" + addZero(TrainJobDetail.Duration%60000/1000) | |||||
| } else { | |||||
| TrainJobDetail.TrainJobDuration = "00:00:00" | |||||
| } | |||||
| } | |||||
| if err != nil { | |||||
| log.Error("GetJob(%s) failed:%v", tasks[i].Cloudbrain.JobID, err.Error()) | |||||
| return | |||||
| } | |||||
| err = models.SetTrainJobStatusByJobID(tasks[i].Cloudbrain.JobID, modelarts.TransTrainJobStatus(TrainJobDetail.IntStatus), int(TrainJobDetail.Duration), string(TrainJobDetail.TrainJobDuration)) | |||||
| // err = models.UpdateJob(tasks[i].Cloudbrain) | |||||
| if err != nil { | |||||
| ctx.ServerError("UpdateJob failed", err) | |||||
| return | |||||
| } | |||||
| } | |||||
| trainTasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ | |||||
| tasks, count, err := models.Cloudbrains(&models.CloudbrainsOptions{ | |||||
| ListOptions: models.ListOptions{ | ListOptions: models.ListOptions{ | ||||
| Page: page, | Page: page, | ||||
| PageSize: setting.UI.IssuePagingNum, | PageSize: setting.UI.IssuePagingNum, | ||||
| @@ -560,7 +524,7 @@ func TrainJobIndex(ctx *context.Context) { | |||||
| ctx.Data["Page"] = pager | ctx.Data["Page"] = pager | ||||
| ctx.Data["PageIsCloudBrain"] = true | ctx.Data["PageIsCloudBrain"] = true | ||||
| ctx.Data["Tasks"] = trainTasks | |||||
| ctx.Data["Tasks"] = tasks | |||||
| ctx.HTML(200, tplModelArtsTrainJobIndex) | ctx.HTML(200, tplModelArtsTrainJobIndex) | ||||
| } | } | ||||
| @@ -901,12 +865,13 @@ func TrainJobShow(ctx *context.Context) { | |||||
| } else { | } else { | ||||
| result.TrainJobDuration = "00:00:00" | result.TrainJobDuration = "00:00:00" | ||||
| } | } | ||||
| err = models.SetTrainJobStatusByJobID(jobID, modelarts.TransTrainJobStatus(result.IntStatus), int(result.Duration), string(result.TrainJobDuration)) | |||||
| result.Status = modelarts.TransTrainJobStatus(result.IntStatus) | |||||
| err = models.SetTrainJobStatusByJobID(jobID, result.Status, result.Duration, string(result.TrainJobDuration)) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.ServerError("UpdateJob failed", err) | ctx.ServerError("UpdateJob failed", err) | ||||
| return | return | ||||
| } | } | ||||
| result.Status = modelarts.TransTrainJobStatus(result.IntStatus) | |||||
| result.DatasetName = attach.Name | result.DatasetName = attach.Name | ||||
| } | } | ||||
| @@ -415,7 +415,7 @@ | |||||
| $(".job-status").each((index, job) => { | $(".job-status").each((index, job) => { | ||||
| const jobID = job.dataset.jobid; | const jobID = job.dataset.jobid; | ||||
| const repoPath = job.dataset.repopath; | const repoPath = job.dataset.repopath; | ||||
| if (job.textContent.trim() == 'STOPPED') { | |||||
| if (job.textContent.trim() == 'STOPPED' || job.textContent.trim() == 'START_FAILED' || job.textContent.trim() == 'CREATE_FAILED') { | |||||
| return | return | ||||
| } | } | ||||
| @@ -423,12 +423,12 @@ | |||||
| // 加载任务状态 | // 加载任务状态 | ||||
| var timeid = window.setInterval(loadJobStatus, 15000); | var timeid = window.setInterval(loadJobStatus, 15000); | ||||
| // $(document).ready(loadJobStatus); | |||||
| $(document).ready(loadJobStatus); | |||||
| function loadJobStatus() { | function loadJobStatus() { | ||||
| $(".job-status").each((index, job) => { | $(".job-status").each((index, job) => { | ||||
| const jobID = job.dataset.jobid; | const jobID = job.dataset.jobid; | ||||
| const repoPath = job.dataset.repopath; | const repoPath = job.dataset.repopath; | ||||
| if (job.textContent.trim() == 'STOPPED') { | |||||
| if (job.textContent.trim() == 'STOPPED' || job.textContent.trim() == 'START_FAILED' || job.textContent.trim() == 'CREATE_FAILED') { | |||||
| return | return | ||||
| } | } | ||||
| @@ -459,12 +459,14 @@ | |||||
| // 加载任务状态 | // 加载任务状态 | ||||
| var timeid = window.setInterval(loadJobStatus, 15000); | var timeid = window.setInterval(loadJobStatus, 15000); | ||||
| // $(document).ready(loadJobStatus); | |||||
| $(document).ready(loadJobStatus); | |||||
| function loadJobStatus() { | function loadJobStatus() { | ||||
| $(".job-status").each((index, job) => { | $(".job-status").each((index, job) => { | ||||
| const jobID = job.dataset.jobid; | const jobID = job.dataset.jobid; | ||||
| const repoPath = job.dataset.repopath; | const repoPath = job.dataset.repopath; | ||||
| if (job.textContent.trim() == 'STOPPED') { | |||||
| if (job.textContent.trim() == 'IMAGE_FAILED' || job.textContent.trim() == 'SUBMIT_FAILED' || job.textContent.trim() == 'DELETE_FAILED' | |||||
| || job.textContent.trim() == 'KILLED' || job.textContent.trim() == 'COMPLETED' || job.textContent.trim() == 'FAILED' | |||||
| || job.textContent.trim() == 'CANCELED' || job.textContent.trim() == 'LOST') { | |||||
| return | return | ||||
| } | } | ||||