@@ -102,11 +102,11 @@ const ( | |||
//grampus | |||
GrampusStatusPending = "pending" | |||
GrampusStatusRunning = "running" | |||
GrampusStatusFailed = "failed" | |||
GrampusStatusSucceeded = "succeeded" | |||
GrampusStatusStopped = "stopped" | |||
GrampusStatusUnknown = "unknown" | |||
GrampusStatusRunning = "RUNNING" | |||
GrampusStatusFailed = "FAILED" | |||
GrampusStatusSucceeded = "SUCCEEDED" | |||
GrampusStatusStopped = "STOPPED" | |||
GrampusStatusUnknown = "UNKNOWN" | |||
) | |||
type Cloudbrain struct { | |||
@@ -214,7 +214,7 @@ func ConvertDurationToStr(duration int64) string { | |||
} | |||
func IsTrainJobTerminal(status string) bool { | |||
return status == string(ModelArtsTrainJobCompleted) || status == string(ModelArtsTrainJobFailed) || status == string(ModelArtsTrainJobKilled) | |||
return status == string(ModelArtsTrainJobCompleted) || status == string(ModelArtsTrainJobFailed) || status == string(ModelArtsTrainJobKilled) || status == GrampusStatusFailed || status == GrampusStatusStopped || status == GrampusStatusSucceeded | |||
} | |||
func IsModelArtsDebugJobTerminal(status string) bool { | |||
@@ -1185,6 +1185,11 @@ type CreateGrampusJobResponse struct { | |||
JobInfo GrampusJobInfo `json:"otJob"` | |||
} | |||
type GetGrampusJobResponse struct { | |||
GrampusResult | |||
JobInfo GrampusJobInfo `json:"otJob"` | |||
} | |||
type GrampusTasks struct { | |||
Command string `json:"command"` | |||
Name string `json:"name"` | |||
@@ -1227,7 +1232,7 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) { | |||
) | |||
} | |||
if len(opts.ComputeResource) >= 0 { | |||
if len(opts.ComputeResource) > 0 { | |||
cond = cond.And( | |||
builder.Eq{"cloudbrain.compute_resource": opts.ComputeResource}, | |||
) | |||
@@ -6,6 +6,7 @@ import ( | |||
"code.gitea.io/gitea/modules/log" | |||
"code.gitea.io/gitea/modules/notification" | |||
"code.gitea.io/gitea/modules/timeutil" | |||
"strings" | |||
) | |||
const ( | |||
@@ -100,7 +101,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||
jobID := jobResult.JobInfo.JobID | |||
err = models.CreateCloudbrain(&models.Cloudbrain{ | |||
Status: string(models.GrampusStatusPending), | |||
Status: TransTrainJobStatus(jobResult.JobInfo.Status), | |||
UserID: ctx.User.ID, | |||
RepoID: ctx.Repo.Repository.ID, | |||
JobID: jobID, | |||
@@ -110,10 +111,10 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||
Type: models.TypeCloudBrainGrampus, | |||
//VersionID: jobResult.VersionID, | |||
//VersionName: jobResult.VersionName, | |||
Uuid: req.Uuid, | |||
DatasetName: req.DatasetName, | |||
CommitID: req.CommitID, | |||
//IsLatestVersion: req.IsLatestVersion, | |||
Uuid: req.Uuid, | |||
DatasetName: req.DatasetName, | |||
CommitID: req.CommitID, | |||
IsLatestVersion: req.IsLatestVersion, | |||
ComputeResource: req.ComputeResource, | |||
//EngineID: req.EngineID, | |||
TrainUrl: req.TrainUrl, | |||
@@ -148,3 +149,11 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||
return nil | |||
} | |||
func TransTrainJobStatus(status string) string { | |||
if status == "pending" { | |||
status = "waiting" | |||
} | |||
return strings.ToUpper(status) | |||
} |
@@ -124,40 +124,34 @@ sendjob: | |||
return &result, nil | |||
} | |||
func GetJob(jobID string) (*models.GetNotebookResult, error) { | |||
func GetJob(jobID string) (*models.GetGrampusJobResponse, error) { | |||
checkSetting() | |||
client := getRestyClient() | |||
var result models.GetNotebookResult | |||
var result models.GetGrampusJobResponse | |||
retry := 0 | |||
sendjob: | |||
res, err := client.R(). | |||
_, err := client.R(). | |||
SetHeader("Content-Type", "application/json"). | |||
SetAuthToken(TOKEN). | |||
SetResult(&result). | |||
Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID) | |||
Get(HOST + urlTrainJob + "/" + jobID) | |||
if err != nil { | |||
return nil, fmt.Errorf("resty GetJob: %v", err) | |||
} | |||
if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
if result.ErrorCode == errorIllegalToken && retry < 1 { | |||
retry++ | |||
log.Info("retry get token") | |||
_ = getToken() | |||
goto sendjob | |||
} | |||
var response models.NotebookResult | |||
err = json.Unmarshal(res.Body(), &response) | |||
if err != nil { | |||
log.Error("json.Unmarshal failed: %s", err.Error()) | |||
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
} | |||
if len(response.ErrorCode) != 0 { | |||
log.Error("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
return &result, fmt.Errorf("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
if result.ErrorCode != 0 { | |||
log.Error("GetJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) | |||
return &result, fmt.Errorf("GetJob failed(%d): %s", result.ErrorCode, result.ErrorMsg) | |||
} | |||
return &result, nil | |||
@@ -6,6 +6,7 @@ | |||
package repo | |||
import ( | |||
"code.gitea.io/gitea/modules/grampus" | |||
"net/http" | |||
"strconv" | |||
"strings" | |||
@@ -167,7 +168,7 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { | |||
log.Error("UpdateJob failed:", err) | |||
} | |||
} | |||
} else { | |||
} else if job.Type == models.TypeCloudBrainTwo { | |||
result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10)) | |||
if err != nil { | |||
ctx.NotFound(err) | |||
@@ -181,6 +182,29 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { | |||
job.Duration = result.Duration / 1000 | |||
job.TrainJobDuration = models.ConvertDurationToStr(job.Duration) | |||
if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 { | |||
job.EndTime = job.StartTime.Add(job.Duration) | |||
} | |||
job.CorrectCreateUnix() | |||
err = models.UpdateTrainJobVersion(job) | |||
if err != nil { | |||
log.Error("UpdateJob failed:", err) | |||
} | |||
} else if job.Type == models.TypeCloudBrainGrampus { | |||
result, err := grampus.GetJob(jobID) | |||
if err != nil { | |||
log.Error("GetJob(%s) failed:%v", job.JobName, err) | |||
ctx.NotFound(err) | |||
return | |||
} | |||
if job.StartTime == 0 && result.JobInfo.StartedAt > 0 { | |||
job.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt / 1000) | |||
} | |||
job.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) | |||
job.Duration = result.JobInfo.RunSec | |||
job.TrainJobDuration = models.ConvertDurationToStr(job.Duration) | |||
if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 { | |||
job.EndTime = job.StartTime.Add(job.Duration) | |||
} | |||
@@ -2,6 +2,7 @@ package repo | |||
import ( | |||
"bufio" | |||
"code.gitea.io/gitea/modules/grampus" | |||
"encoding/json" | |||
"errors" | |||
"fmt" | |||
@@ -1492,7 +1493,31 @@ func SyncCloudbrainStatus() { | |||
} else { | |||
log.Error("task.JobType(%s) is error:%s", task.JobName, task.JobType) | |||
} | |||
} else if task.Type == models.TypeCloudBrainGrampus { | |||
result, err := grampus.GetJob(task.JobID) | |||
if err != nil { | |||
log.Error("GetTrainJob(%s) failed:%v", task.JobName, err) | |||
continue | |||
} | |||
if result != nil { | |||
task.Status = grampus.TransTrainJobStatus(result.JobInfo.Status) | |||
task.Duration = result.JobInfo.RunSec | |||
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||
if task.StartTime == 0 && result.JobInfo.StartedAt > 0 { | |||
task.StartTime = timeutil.TimeStamp(result.JobInfo.StartedAt / 1000) | |||
} | |||
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { | |||
task.EndTime = task.StartTime.Add(task.Duration) | |||
} | |||
task.CorrectCreateUnix() | |||
err = models.UpdateJob(task) | |||
if err != nil { | |||
log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||
continue | |||
} | |||
} | |||
} else { | |||
log.Error("task.Type(%s) is error:%d", task.JobName, task.Type) | |||
} | |||
@@ -577,6 +577,7 @@ func TrainJobIndex(ctx *context.Context) { | |||
JobTypes: jobTypes, | |||
IsLatestVersion: modelarts.IsLatestVersion, | |||
ComputeResource: listType, | |||
Type: models.TypeCloudBrainAll, | |||
}) | |||
if err != nil { | |||
ctx.ServerError("Cloudbrain", err) | |||