| @@ -245,6 +245,32 @@ func GetTrainJobLog(jobID string) (string, error) { | |||||
| return logContent, nil | return logContent, nil | ||||
| } | } | ||||
| func GetGrampusMetrics(jobID string) (models.GetTrainJobMetricStatisticResult, error) { | |||||
| checkSetting() | |||||
| client := getRestyClient() | |||||
| var result models.GetTrainJobMetricStatisticResult | |||||
| res, err := client.R(). | |||||
| SetAuthToken(TOKEN). | |||||
| Get(HOST + urlTrainJob + "/" + jobID + "/task/0/replica/0/metrics") | |||||
| if err != nil { | |||||
| return result, fmt.Errorf("resty GetTrainJobLog: %v", err) | |||||
| } | |||||
| if err = json.Unmarshal([]byte(res.String()), &result); err != nil { | |||||
| log.Error("GetGrampusMetrics json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| return result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| } | |||||
| if res.StatusCode() != http.StatusOK { | |||||
| log.Error("Call GrampusMetrics failed(%d):%s(%s)", res.StatusCode(), result.ErrorCode, result.ErrorMsg) | |||||
| return result, fmt.Errorf("Call GrampusMetrics failed(%d):%d(%s)", res.StatusCode(), result.ErrorCode, result.ErrorMsg) | |||||
| } | |||||
| if !result.IsSuccess { | |||||
| log.Error("GetGrampusMetrics(%s) failed", jobID) | |||||
| return result, fmt.Errorf("GetGrampusMetrics failed:%s", result.ErrorMsg) | |||||
| } | |||||
| return result, nil | |||||
| } | |||||
| func StopJob(jobID string) (*models.GrampusStopJobResponse, error) { | func StopJob(jobID string) (*models.GrampusStopJobResponse, error) { | ||||
| checkSetting() | checkSetting() | ||||
| client := getRestyClient() | client := getRestyClient() | ||||
| @@ -1048,6 +1048,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||||
| m.Get("", repo.GetModelArtsTrainJobVersion) | m.Get("", repo.GetModelArtsTrainJobVersion) | ||||
| m.Post("/stop_version", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo_ext.GrampusStopJob) | m.Post("/stop_version", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo_ext.GrampusStopJob) | ||||
| m.Get("/log", repo_ext.GrampusGetLog) | m.Get("/log", repo_ext.GrampusGetLog) | ||||
| m.Get("/metrics", repo_ext.GrampusMetrics) | |||||
| m.Get("/download_log", cloudbrain.AdminOrJobCreaterRightForTrain, repo_ext.GrampusDownloadLog) | m.Get("/download_log", cloudbrain.AdminOrJobCreaterRightForTrain, repo_ext.GrampusDownloadLog) | ||||
| }) | }) | ||||
| }) | }) | ||||
| @@ -957,6 +957,28 @@ func GrampusGetLog(ctx *context.Context) { | |||||
| return | return | ||||
| } | } | ||||
| func GrampusMetrics(ctx *context.Context) { | |||||
| jobID := ctx.Params(":jobid") | |||||
| job, err := models.GetCloudbrainByJobID(jobID) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainByJobID failed: %v", err, ctx.Data["MsgID"]) | |||||
| ctx.ServerError(err.Error(), err) | |||||
| return | |||||
| } | |||||
| result, err := grampus.GetGrampusMetrics(job.JobID) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJobLog failed: %v", err, ctx.Data["MsgID"]) | |||||
| } | |||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | |||||
| "JobID": jobID, | |||||
| "Interval": result.Interval, | |||||
| "MetricsInfo": result.MetricsInfo, | |||||
| }) | |||||
| return | |||||
| } | |||||
| func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName, pretrainModelPath, pretrainModelFileName, modelRemoteObsUrl string) (string, error) { | func generateCommand(repoName, processorType, codeRemotePath, dataRemotePath, bootFile, paramSrc, outputRemotePath, datasetName, pretrainModelPath, pretrainModelFileName, modelRemoteObsUrl string) (string, error) { | ||||
| var command string | var command string | ||||