| @@ -8,14 +8,13 @@ import ( | |||||
| "strings" | "strings" | ||||
| "time" | "time" | ||||
| "code.gitea.io/gitea/modules/util" | |||||
| "xorm.io/builder" | "xorm.io/builder" | ||||
| "xorm.io/xorm" | "xorm.io/xorm" | ||||
| "code.gitea.io/gitea/modules/log" | "code.gitea.io/gitea/modules/log" | ||||
| "code.gitea.io/gitea/modules/setting" | "code.gitea.io/gitea/modules/setting" | ||||
| "code.gitea.io/gitea/modules/timeutil" | "code.gitea.io/gitea/modules/timeutil" | ||||
| "code.gitea.io/gitea/modules/util" | |||||
| ) | ) | ||||
| type CloudbrainStatus string | type CloudbrainStatus string | ||||
| @@ -31,9 +30,9 @@ const ( | |||||
| ) | ) | ||||
| const ( | const ( | ||||
| NPUResource = "NPU" | |||||
| GPUResource = "CPU/GPU" | |||||
| AllResource = "all" | |||||
| NPUResource = "NPU" | |||||
| GPUResource = "CPU/GPU" | |||||
| AllResource = "all" | |||||
| //notebook storage category | //notebook storage category | ||||
| EVSCategory = "EVS" | EVSCategory = "EVS" | ||||
| @@ -1262,6 +1261,52 @@ type LogFile struct { | |||||
| Name string | Name string | ||||
| } | } | ||||
| type JobList struct { | |||||
| JobName string `json:"job_name"` | |||||
| JobID int64 `json:"job_id"` | |||||
| VersionID int64 `json:"version_id"` | |||||
| VersionCount int64 `json:"version_count"` | |||||
| Description string `json:"job_desc"` | |||||
| IntStatus int `json:"status"` | |||||
| } | |||||
| type GetTrainJobListResult struct { | |||||
| ErrorResult | |||||
| JobTotalCount int `json:"job_total_count"` //查询到的用户创建作业总数 | |||||
| JobCountLimit int `json:"job_count_limit"` //用户还可以创建训练作业的数量 | |||||
| Quotas int `json:"quotas"` //训练作业的运行数量上限 | |||||
| JobList []JobList `json:"jobs"` | |||||
| } | |||||
| type JobVersionList struct { | |||||
| VersionName string `json:"version_name"` | |||||
| VersionID int64 `json:"version_id"` | |||||
| IntStatus int `json:"status"` | |||||
| } | |||||
| type GetTrainJobVersionListResult struct { | |||||
| ErrorResult | |||||
| JobID int64 `json:"job_id"` | |||||
| JobName string `json:"job_name"` | |||||
| JobDesc string `json:"job_desc"` | |||||
| VersionCount int64 `json:"version_count"` | |||||
| JobVersionList []JobVersionList `json:"versions"` | |||||
| } | |||||
| type NotebookList struct { | |||||
| JobName string `json:"name"` | |||||
| JobID string `json:"id"` | |||||
| Status string `json:"status"` | |||||
| } | |||||
| type GetNotebookListResult struct { | |||||
| TotalCount int64 `json:"total"` //总的记录数量 | |||||
| CurrentPage int `json:"current"` //当前页数 | |||||
| TotalPages int `json:"pages"` //总的页数 | |||||
| Size int `json:"size"` //每一页的数量 | |||||
| NotebookList []NotebookList `json:"data"` | |||||
| } | |||||
| //Grampus | //Grampus | ||||
| type GrampusResult struct { | type GrampusResult struct { | ||||
| ErrorCode int `json:"errorCode"` | ErrorCode int `json:"errorCode"` | ||||
| @@ -2289,3 +2334,8 @@ func GetCloudbrainByIDs(ids []int64) ([]*Cloudbrain, error) { | |||||
| In("id", ids). | In("id", ids). | ||||
| Find(&cloudbrains) | Find(&cloudbrains) | ||||
| } | } | ||||
| func GetCloudbrainCountByJobName(jobName, jobType string, typeCloudbrain int) (int, error) { | |||||
| count, err := x.Where("job_name = ? and job_type= ? and type = ?", jobName, jobType, typeCloudbrain).Count(new(Cloudbrain)) | |||||
| return int(count), err | |||||
| } | |||||
| @@ -0,0 +1,68 @@ | |||||
| package models | |||||
| import ( | |||||
| "time" | |||||
| "code.gitea.io/gitea/modules/setting" | |||||
| "code.gitea.io/gitea/modules/timeutil" | |||||
| ) | |||||
| const ( | |||||
| TempJobId = "TEMP" | |||||
| TempVersionId = TempJobId | |||||
| TempJobStatus = TempJobId | |||||
| ) | |||||
| type CloudbrainTemp struct { | |||||
| ID int64 `xorm:"pk autoincr"` | |||||
| JobID string `xorm:"NOT NULL DEFAULT 'TEMP'"` | |||||
| VersionID string `xorm:"NOT NULL DEFAULT 'TEMP'"` | |||||
| JobName string `xorm:"NOT NULL "` | |||||
| Type int `xorm:"NOT NULL "` | |||||
| JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"` | |||||
| Status string `xorm:"INDEX NOT NULL DEFAULT 'TEMP'"` | |||||
| QueryTimes int `xorm:"INDEX NOT NULL DEFAULT 0"` | |||||
| CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"` | |||||
| UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` | |||||
| DeletedAt time.Time `xorm:"deleted"` | |||||
| } | |||||
| func InsertCloudbrainTemp(temp *CloudbrainTemp) (err error) { | |||||
| if _, err = x.Insert(temp); err != nil { | |||||
| return err | |||||
| } | |||||
| return nil | |||||
| } | |||||
| func getCloudBrainTemp(temp *CloudbrainTemp) (*CloudbrainTemp, error) { | |||||
| has, err := x.Get(temp) | |||||
| if err != nil { | |||||
| return nil, err | |||||
| } else if !has { | |||||
| return nil, ErrJobNotExist{} | |||||
| } | |||||
| return temp, nil | |||||
| } | |||||
| func GetCloudBrainTempJobs() ([]*CloudbrainTemp, error) { | |||||
| jobs := make([]*CloudbrainTemp, 0, 10) | |||||
| return jobs, x.In("status", TempJobStatus, string(ModelArtsStopping), string(ModelArtsTrainJobKilling)). | |||||
| And("query_times < ?", setting.MaxTempQueryTimes). | |||||
| Limit(100). | |||||
| Find(&jobs) | |||||
| } | |||||
| func DeleteCloudbrainTemp(temp *CloudbrainTemp) error { | |||||
| return deleteCloudbrainTemp(x, temp) | |||||
| } | |||||
| func deleteCloudbrainTemp(e Engine, temp *CloudbrainTemp) error { | |||||
| _, err := e.ID(temp.ID).Delete(temp) | |||||
| return err | |||||
| } | |||||
| func UpdateCloudbrainTemp(temp *CloudbrainTemp) error { | |||||
| _, err := x.ID(temp.ID).AllCols().Update(temp) | |||||
| return err | |||||
| } | |||||
| @@ -145,6 +145,7 @@ func init() { | |||||
| new(OrgStatistic), | new(OrgStatistic), | ||||
| new(SearchRecord), | new(SearchRecord), | ||||
| new(AiModelConvert), | new(AiModelConvert), | ||||
| new(CloudbrainTemp), | |||||
| ) | ) | ||||
| tablesStatistic = append(tablesStatistic, | tablesStatistic = append(tablesStatistic, | ||||
| @@ -142,8 +142,8 @@ func isAdminOrImageCreater(ctx *context.Context, image *models.Image, err error) | |||||
| func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { | func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { | ||||
| var ID = ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(ID) | |||||
| var id = ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByID failed:%v", err.Error()) | log.Error("GetCloudbrainByID failed:%v", err.Error()) | ||||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | ||||
| @@ -158,8 +158,8 @@ func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { | |||||
| func AdminOrJobCreaterRight(ctx *context.Context) { | func AdminOrJobCreaterRight(ctx *context.Context) { | ||||
| var ID = ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(ID) | |||||
| var id = ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByID failed:%v", err.Error()) | log.Error("GetCloudbrainByID failed:%v", err.Error()) | ||||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | ||||
| @@ -5,6 +5,7 @@ | |||||
| package cron | package cron | ||||
| import ( | import ( | ||||
| "code.gitea.io/gitea/modules/modelarts" | |||||
| "context" | "context" | ||||
| "time" | "time" | ||||
| @@ -207,6 +208,17 @@ func registerSyncCloudbrainStatus() { | |||||
| }) | }) | ||||
| } | } | ||||
| func registerSyncModelArtsTempJobs() { | |||||
| RegisterTaskFatal("sync_model_arts_temp_jobs", &BaseConfig{ | |||||
| Enabled: true, | |||||
| RunAtStart: false, | |||||
| Schedule: "@every 1m", | |||||
| }, func(ctx context.Context, _ *models.User, _ Config) error { | |||||
| modelarts.SyncTempStatusJob() | |||||
| return nil | |||||
| }) | |||||
| } | |||||
| func initBasicTasks() { | func initBasicTasks() { | ||||
| registerUpdateMirrorTask() | registerUpdateMirrorTask() | ||||
| registerRepoHealthCheck() | registerRepoHealthCheck() | ||||
| @@ -227,4 +239,5 @@ func initBasicTasks() { | |||||
| registerSyncCloudbrainStatus() | registerSyncCloudbrainStatus() | ||||
| registerHandleOrgStatistic() | registerHandleOrgStatistic() | ||||
| registerSyncModelArtsTempJobs() | |||||
| } | } | ||||
| @@ -6,8 +6,7 @@ import ( | |||||
| "fmt" | "fmt" | ||||
| "path" | "path" | ||||
| "strconv" | "strconv" | ||||
| "code.gitea.io/gitea/modules/timeutil" | |||||
| "strings" | |||||
| "code.gitea.io/gitea/models" | "code.gitea.io/gitea/models" | ||||
| "code.gitea.io/gitea/modules/context" | "code.gitea.io/gitea/modules/context" | ||||
| @@ -15,6 +14,7 @@ import ( | |||||
| "code.gitea.io/gitea/modules/notification" | "code.gitea.io/gitea/modules/notification" | ||||
| "code.gitea.io/gitea/modules/setting" | "code.gitea.io/gitea/modules/setting" | ||||
| "code.gitea.io/gitea/modules/storage" | "code.gitea.io/gitea/modules/storage" | ||||
| "code.gitea.io/gitea/modules/timeutil" | |||||
| ) | ) | ||||
| const ( | const ( | ||||
| @@ -59,7 +59,7 @@ const ( | |||||
| PerPage = 10 | PerPage = 10 | ||||
| IsLatestVersion = "1" | IsLatestVersion = "1" | ||||
| NotLatestVersion = "0" | NotLatestVersion = "0" | ||||
| VersionCount = 1 | |||||
| VersionCountOne = 1 | |||||
| SortByCreateTime = "create_time" | SortByCreateTime = "create_time" | ||||
| ConfigTypeCustom = "custom" | ConfigTypeCustom = "custom" | ||||
| @@ -284,9 +284,24 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc | |||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("createNotebook2 failed: %v", err.Error()) | log.Error("createNotebook2 failed: %v", err.Error()) | ||||
| if strings.HasPrefix(err.Error(), UnknownErrorPrefix) { | |||||
| log.Info("(%s)unknown error, set temp status", displayJobName) | |||||
| errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{ | |||||
| JobID: models.TempJobId, | |||||
| VersionID: models.TempVersionId, | |||||
| Status: models.TempJobStatus, | |||||
| Type: models.TypeCloudBrainTwo, | |||||
| JobName: jobName, | |||||
| JobType: string(models.JobTypeDebug), | |||||
| }) | |||||
| if errTemp != nil { | |||||
| log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error()) | |||||
| return errTemp | |||||
| } | |||||
| } | |||||
| return err | return err | ||||
| } | } | ||||
| err = models.CreateCloudbrain(&models.Cloudbrain{ | |||||
| task := &models.Cloudbrain{ | |||||
| Status: jobResult.Status, | Status: jobResult.Status, | ||||
| UserID: ctx.User.ID, | UserID: ctx.User.ID, | ||||
| RepoID: ctx.Repo.Repository.ID, | RepoID: ctx.Repo.Repository.ID, | ||||
| @@ -302,16 +317,13 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc | |||||
| Description: description, | Description: description, | ||||
| CreatedUnix: createTime, | CreatedUnix: createTime, | ||||
| UpdatedUnix: createTime, | UpdatedUnix: createTime, | ||||
| }) | |||||
| if err != nil { | |||||
| return err | |||||
| } | } | ||||
| task, err := models.GetCloudbrainByName(jobName) | |||||
| err = models.CreateCloudbrain(task) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByName failed: %v", err.Error()) | |||||
| return err | return err | ||||
| } | } | ||||
| stringId := strconv.FormatInt(task.ID, 10) | stringId := strconv.FormatInt(task.ID, 10) | ||||
| notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask) | notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask) | ||||
| return nil | return nil | ||||
| @@ -364,7 +376,22 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||||
| }) | }) | ||||
| } | } | ||||
| if createErr != nil { | if createErr != nil { | ||||
| log.Error("CreateJob failed: %v", createErr.Error()) | |||||
| log.Error("createTrainJob failed: %v", createErr.Error()) | |||||
| if strings.HasPrefix(createErr.Error(), UnknownErrorPrefix) { | |||||
| log.Info("(%s)unknown error, set temp status", req.DisplayJobName) | |||||
| errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{ | |||||
| JobID: models.TempJobId, | |||||
| VersionID: models.TempVersionId, | |||||
| Status: models.TempJobStatus, | |||||
| Type: models.TypeCloudBrainTwo, | |||||
| JobName: req.JobName, | |||||
| JobType: string(models.JobTypeTrain), | |||||
| }) | |||||
| if errTemp != nil { | |||||
| log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error()) | |||||
| return errTemp | |||||
| } | |||||
| } | |||||
| return createErr | return createErr | ||||
| } | } | ||||
| jobId := strconv.FormatInt(jobResult.JobID, 10) | jobId := strconv.FormatInt(jobResult.JobID, 10) | ||||
| @@ -438,7 +465,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||||
| createTime := timeutil.TimeStampNow() | createTime := timeutil.TimeStampNow() | ||||
| var jobResult *models.CreateTrainJobResult | var jobResult *models.CreateTrainJobResult | ||||
| var createErr error | var createErr error | ||||
| log.Info(" req.EngineID =" + fmt.Sprint(req.EngineID)) | |||||
| if req.EngineID < 0 { | if req.EngineID < 0 { | ||||
| jobResult, createErr = createTrainJobVersionUserImage(models.CreateTrainJobVersionUserImageParams{ | jobResult, createErr = createTrainJobVersionUserImage(models.CreateTrainJobVersionUserImageParams{ | ||||
| Description: req.Description, | Description: req.Description, | ||||
| @@ -480,7 +507,22 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||||
| }, jobId) | }, jobId) | ||||
| } | } | ||||
| if createErr != nil { | if createErr != nil { | ||||
| log.Error("CreateJob failed: %v", createErr.Error()) | |||||
| log.Error("createTrainJobVersion failed: %v", createErr.Error()) | |||||
| if strings.HasPrefix(createErr.Error(), UnknownErrorPrefix) { | |||||
| log.Info("(%s)unknown error, set temp status", req.DisplayJobName) | |||||
| errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{ | |||||
| JobID: jobId, | |||||
| VersionID: models.TempVersionId, | |||||
| Status: models.TempJobStatus, | |||||
| Type: models.TypeCloudBrainTwo, | |||||
| JobName: req.JobName, | |||||
| JobType: string(models.JobTypeTrain), | |||||
| }) | |||||
| if errTemp != nil { | |||||
| log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error()) | |||||
| return errTemp | |||||
| } | |||||
| } | |||||
| return createErr | return createErr | ||||
| } | } | ||||
| @@ -540,7 +582,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||||
| } | } | ||||
| //将训练任务的上一版本的isLatestVersion设置为"0" | //将训练任务的上一版本的isLatestVersion设置为"0" | ||||
| createErr = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount) | |||||
| createErr = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCountOne, NotLatestVersion, TotalVersionCount) | |||||
| if createErr != nil { | if createErr != nil { | ||||
| ctx.ServerError("Update IsLatestVersion failed", createErr) | ctx.ServerError("Update IsLatestVersion failed", createErr) | ||||
| return createErr | return createErr | ||||
| @@ -549,99 +591,6 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||||
| return createErr | return createErr | ||||
| } | } | ||||
| func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) { | |||||
| createTime := timeutil.TimeStampNow() | |||||
| jobResult, err := createTrainJobUserImage(models.CreateUserImageTrainJobParams{ | |||||
| JobName: req.JobName, | |||||
| Description: req.Description, | |||||
| Config: models.UserImageConfig{ | |||||
| WorkServerNum: req.WorkServerNumber, | |||||
| AppUrl: req.CodeObsPath, | |||||
| BootFileUrl: req.BootFileUrl, | |||||
| DataUrl: req.DataUrl, | |||||
| TrainUrl: req.TrainUrl, | |||||
| LogUrl: req.LogUrl, | |||||
| PoolID: req.PoolID, | |||||
| CreateVersion: true, | |||||
| Flavor: models.Flavor{ | |||||
| Code: req.FlavorCode, | |||||
| }, | |||||
| Parameter: req.Parameters, | |||||
| UserImageUrl: req.UserImageUrl, | |||||
| UserCommand: req.UserCommand, | |||||
| }, | |||||
| }) | |||||
| if err != nil { | |||||
| log.Error("CreateJob failed: %v", err.Error()) | |||||
| return err | |||||
| } | |||||
| var jobTypes []string | |||||
| jobTypes = append(jobTypes, string(models.JobTypeTrain)) | |||||
| repo := ctx.Repo.Repository | |||||
| VersionTaskList, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ | |||||
| RepoID: repo.ID, | |||||
| Type: models.TypeCloudBrainTwo, | |||||
| JobTypes: jobTypes, | |||||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||||
| }) | |||||
| if err != nil { | |||||
| ctx.ServerError("Cloudbrain", err) | |||||
| return err | |||||
| } | |||||
| //将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount | |||||
| err = models.CreateCloudbrain(&models.Cloudbrain{ | |||||
| Status: TransTrainJobStatus(jobResult.Status), | |||||
| UserID: ctx.User.ID, | |||||
| RepoID: ctx.Repo.Repository.ID, | |||||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||||
| JobName: req.JobName, | |||||
| DisplayJobName: req.DisplayJobName, | |||||
| JobType: string(models.JobTypeTrain), | |||||
| Type: models.TypeCloudBrainTwo, | |||||
| VersionID: jobResult.VersionID, | |||||
| VersionName: jobResult.VersionName, | |||||
| Uuid: req.Uuid, | |||||
| DatasetName: req.DatasetName, | |||||
| CommitID: req.CommitID, | |||||
| IsLatestVersion: req.IsLatestVersion, | |||||
| PreVersionName: req.PreVersionName, | |||||
| ComputeResource: models.NPUResource, | |||||
| EngineID: MORDELART_USER_IMAGE_ENGINE_ID, | |||||
| Image: req.UserImageUrl, | |||||
| TrainUrl: req.TrainUrl, | |||||
| BranchName: req.BranchName, | |||||
| Parameters: req.Params, | |||||
| BootFile: req.BootFile, | |||||
| DataUrl: req.DataUrl, | |||||
| LogUrl: req.LogUrl, | |||||
| PreVersionId: req.PreVersionId, | |||||
| FlavorCode: req.FlavorCode, | |||||
| Description: req.Description, | |||||
| WorkServerNumber: req.WorkServerNumber, | |||||
| FlavorName: req.FlavorName, | |||||
| EngineName: req.EngineName, | |||||
| TotalVersionCount: VersionTaskList[0].TotalVersionCount + 1, | |||||
| VersionCount: VersionListCount + 1, | |||||
| CreatedUnix: createTime, | |||||
| UpdatedUnix: createTime, | |||||
| }) | |||||
| if err != nil { | |||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | |||||
| return err | |||||
| } | |||||
| //将训练任务的上一版本的isLatestVersion设置为"0" | |||||
| err = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount) | |||||
| if err != nil { | |||||
| ctx.ServerError("Update IsLatestVersion failed", err) | |||||
| return err | |||||
| } | |||||
| return err | |||||
| } | |||||
| func TransTrainJobStatus(status int) string { | func TransTrainJobStatus(status int) string { | ||||
| switch status { | switch status { | ||||
| case 0: | case 0: | ||||
| @@ -722,7 +671,22 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e | |||||
| }, | }, | ||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("CreateJob failed: %v", err.Error()) | |||||
| log.Error("createInferenceJob failed: %v", err.Error()) | |||||
| if strings.HasPrefix(err.Error(), UnknownErrorPrefix) { | |||||
| log.Info("(%s)unknown error, set temp status", req.DisplayJobName) | |||||
| err = models.InsertCloudbrainTemp(&models.CloudbrainTemp{ | |||||
| JobID: models.TempJobId, | |||||
| VersionID: models.TempVersionId, | |||||
| Status: models.TempJobStatus, | |||||
| Type: models.TypeCloudBrainTwo, | |||||
| JobName: req.JobName, | |||||
| JobType: string(models.JobTypeInference), | |||||
| }) | |||||
| if err != nil { | |||||
| log.Error("InsertCloudbrainTemp failed: %v", err.Error()) | |||||
| return err | |||||
| } | |||||
| } | |||||
| return err | return err | ||||
| } | } | ||||
| @@ -807,3 +771,455 @@ func InitSpecialPool() { | |||||
| json.Unmarshal([]byte(setting.ModelArtsSpecialPools), &SpecialPools) | json.Unmarshal([]byte(setting.ModelArtsSpecialPools), &SpecialPools) | ||||
| } | } | ||||
| } | } | ||||
| func HandleTrainJobInfo(task *models.Cloudbrain) error { | |||||
| result, err := GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJob(%s) failed:%v", task.DisplayJobName, err) | |||||
| return err | |||||
| } | |||||
| if result != nil { | |||||
| oldStatus := task.Status | |||||
| task.Status = TransTrainJobStatus(result.IntStatus) | |||||
| task.Duration = result.Duration / 1000 | |||||
| task.TrainJobDuration = result.TrainJobDuration | |||||
| if task.StartTime == 0 && result.StartTime > 0 { | |||||
| task.StartTime = timeutil.TimeStamp(result.StartTime / 1000) | |||||
| } | |||||
| task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||||
| if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { | |||||
| task.EndTime = task.StartTime.Add(task.Duration) | |||||
| } | |||||
| task.CorrectCreateUnix() | |||||
| if oldStatus != task.Status { | |||||
| notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||||
| } | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||||
| return err | |||||
| } | |||||
| } | |||||
| return nil | |||||
| } | |||||
| func HandleNotebookInfo(task *models.Cloudbrain) error { | |||||
| result, err := GetNotebook2(task.JobID) | |||||
| if err != nil { | |||||
| log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err) | |||||
| return err | |||||
| } | |||||
| if result != nil { | |||||
| oldStatus := task.Status | |||||
| task.Status = result.Status | |||||
| if task.StartTime == 0 && result.Lease.UpdateTime > 0 { | |||||
| task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000) | |||||
| } | |||||
| if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { | |||||
| task.EndTime = timeutil.TimeStampNow() | |||||
| } | |||||
| task.CorrectCreateUnix() | |||||
| task.ComputeAndSetDuration() | |||||
| if oldStatus != task.Status { | |||||
| notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||||
| } | |||||
| if task.FlavorCode == "" { | |||||
| task.FlavorCode = result.Flavor | |||||
| } | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err) | |||||
| return err | |||||
| } | |||||
| } | |||||
| return nil | |||||
| } | |||||
| func SyncTempStatusJob() { | |||||
| jobs, err := models.GetCloudBrainTempJobs() | |||||
| if err != nil { | |||||
| log.Error("GetCloudBrainTempJobs failed:%v", err.Error()) | |||||
| return | |||||
| } | |||||
| for _, temp := range jobs { | |||||
| log.Info("start to handle record: %s", temp.JobName) | |||||
| if temp.Type == models.TypeCloudBrainTwo { | |||||
| if temp.JobType == string(models.JobTypeDebug) { | |||||
| err = handleNotebook(temp) | |||||
| if err != nil { | |||||
| log.Error("handleNotebook falied:%v", err) | |||||
| break | |||||
| } | |||||
| } else if temp.JobType == string(models.JobTypeTrain) || temp.JobType == string(models.JobTypeInference) { | |||||
| _, err = models.GetCloudbrainByJobID(temp.JobID) | |||||
| if err != nil { | |||||
| //one version | |||||
| err = handleTrainJob(temp) | |||||
| if err != nil { | |||||
| log.Error("handleTrainJob falied:%v", err) | |||||
| break | |||||
| } | |||||
| } else { | |||||
| //multi version | |||||
| err = handleTrainJobMultiVersion(temp) | |||||
| if err != nil { | |||||
| log.Error("handleTrainJobMultiVersion falied:%v", err) | |||||
| break | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| return | |||||
| } | |||||
| func handleNotebook(temp *models.CloudbrainTemp) error { | |||||
| if temp.Status == models.TempJobStatus { | |||||
| err := handleTempNotebook(temp) | |||||
| if err != nil { | |||||
| log.Error("handleTempNotebook failed:%v", err) | |||||
| return err | |||||
| } | |||||
| } else if temp.Status == string(models.ModelArtsStopping) { | |||||
| res, err := GetNotebook2(temp.JobID) | |||||
| if err != nil { | |||||
| log.Error("GetNotebook2 failed:%v", err) | |||||
| return err | |||||
| } | |||||
| temp.Status = res.Status | |||||
| if temp.Status == string(models.ModelArtsStopped) { | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp failed:%v", err) | |||||
| return err | |||||
| } | |||||
| _, err := DelNotebook2(temp.JobID) | |||||
| if err != nil { | |||||
| log.Error("DelNotebook2 failed:%v", err) | |||||
| return err | |||||
| } | |||||
| temp.Status = string(models.ModelArtsDeleted) | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp failed:%v", err) | |||||
| return err | |||||
| } | |||||
| } | |||||
| } | |||||
| return nil | |||||
| } | |||||
| func handleTempNotebook(temp *models.CloudbrainTemp) error { | |||||
| var err error | |||||
| var isExist bool | |||||
| for { | |||||
| result, err := GetNotebookList(1000, 0, "createTime", "DESC", temp.JobName) | |||||
| if err != nil { | |||||
| log.Error("GetNotebookList failed:%v", err) | |||||
| break | |||||
| } | |||||
| temp.QueryTimes++ | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp failed:%v", err) | |||||
| } | |||||
| if result != nil { | |||||
| for _, notebook := range result.NotebookList { | |||||
| if temp.JobID == models.TempJobId { | |||||
| //new notebook | |||||
| if notebook.JobName == temp.JobName { | |||||
| isExist = true | |||||
| temp.Status = notebook.Status | |||||
| temp.JobID = notebook.JobID | |||||
| break | |||||
| } | |||||
| } else { | |||||
| //restart: always can find one record | |||||
| if notebook.JobName == temp.JobName { | |||||
| if notebook.Status != string(models.ModelArtsStopped) { | |||||
| isExist = true | |||||
| temp.Status = notebook.Status | |||||
| temp.JobID = notebook.JobID | |||||
| break | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| if isExist { | |||||
| log.Info("find the record(%s), status(%s)", temp.JobName, temp.Status) | |||||
| if temp.Status == string(models.ModelArtsCreateFailed) { | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp failed:%v", err) | |||||
| break | |||||
| } | |||||
| _, err := DelNotebook2(temp.JobID) | |||||
| if err != nil { | |||||
| log.Error("DelNotebook2(%s) failed:%v", temp.JobName, err) | |||||
| break | |||||
| } | |||||
| temp.Status = string(models.ModelArtsDeleted) | |||||
| } else { | |||||
| _, err := ManageNotebook2(temp.JobID, models.NotebookAction{Action: models.ActionStop}) | |||||
| if err != nil { | |||||
| log.Error("ManageNotebook2(%s) failed:%v", temp.JobName, err) | |||||
| break | |||||
| } | |||||
| temp.Status = string(models.ModelArtsStopping) | |||||
| } | |||||
| models.UpdateCloudbrainTemp(temp) | |||||
| } else { | |||||
| log.Error("can not find the record(%s) till now", temp.JobName) | |||||
| err = errors.New("not found") | |||||
| break | |||||
| } | |||||
| } else { | |||||
| log.Error("can not find the record(%s) till now", temp.JobName) | |||||
| err = errors.New("not found") | |||||
| break | |||||
| } | |||||
| break | |||||
| } | |||||
| if temp.QueryTimes >= setting.MaxTempQueryTimes && !isExist { | |||||
| log.Info("reach MaxTempQueryTimes, set the job failed") | |||||
| temp.Status = string(models.ModelArtsTrainJobFailed) | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err) | |||||
| return err | |||||
| } | |||||
| } | |||||
| return err | |||||
| } | |||||
| func handleTrainJob(temp *models.CloudbrainTemp) error { | |||||
| if temp.Status == models.TempJobStatus { | |||||
| err := handleTempTrainJob(temp) | |||||
| if err != nil { | |||||
| log.Error("handleTempTrainJob failed:%v", err) | |||||
| return err | |||||
| } | |||||
| } else if temp.Status == string(models.ModelArtsTrainJobKilling) { | |||||
| res, err := GetTrainJob(temp.JobID, temp.VersionID) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJob failed:%v", err) | |||||
| return err | |||||
| } | |||||
| temp.Status = TransTrainJobStatus(res.IntStatus) | |||||
| if temp.Status == string(models.ModelArtsTrainJobKilled) { | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp failed:%v", err) | |||||
| return err | |||||
| } | |||||
| _, err := DelTrainJob(temp.JobID) | |||||
| if err != nil { | |||||
| log.Error("DelTrainJob failed:%v", err) | |||||
| return err | |||||
| } | |||||
| temp.Status = string(models.ModelArtsDeleted) | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp failed:%v", err) | |||||
| return err | |||||
| } | |||||
| } | |||||
| } | |||||
| return nil | |||||
| } | |||||
| func handleTrainJobMultiVersion(temp *models.CloudbrainTemp) error { | |||||
| if temp.Status == models.TempJobStatus { | |||||
| err := handleTempTrainJobMultiVersion(temp) | |||||
| if err != nil { | |||||
| log.Error("handleTempTrainJobMultiVersion failed:%v", err) | |||||
| return err | |||||
| } | |||||
| } else if temp.Status == string(models.ModelArtsTrainJobKilling) { | |||||
| res, err := GetTrainJob(temp.JobID, temp.VersionID) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJob failed:%v", err) | |||||
| return err | |||||
| } | |||||
| temp.Status = TransTrainJobStatus(res.IntStatus) | |||||
| if temp.Status == string(models.ModelArtsTrainJobKilled) { | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp failed:%v", err) | |||||
| return err | |||||
| } | |||||
| _, err := DelTrainJobVersion(temp.JobID, temp.VersionID) | |||||
| if err != nil { | |||||
| log.Error("DelTrainJob failed:%v", err) | |||||
| return err | |||||
| } | |||||
| temp.Status = string(models.ModelArtsDeleted) | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp failed:%v", err) | |||||
| return err | |||||
| } | |||||
| } | |||||
| } | |||||
| return nil | |||||
| } | |||||
| func handleTempTrainJobMultiVersion(temp *models.CloudbrainTemp) error { | |||||
| var err error | |||||
| var isExist bool | |||||
| for { | |||||
| result, err := GetTrainJobVersionList(1000, 1, temp.JobID) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJobVersionList failed:%v", err) | |||||
| break | |||||
| } | |||||
| temp.QueryTimes++ | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp failed:%v", err) | |||||
| } | |||||
| if result != nil { | |||||
| count, _ := models.GetCloudbrainCountByJobName(temp.JobName, temp.JobType, temp.Type) | |||||
| if result.VersionCount == int64(count+1) { | |||||
| isExist = true | |||||
| temp.Status = TransTrainJobStatus(result.JobVersionList[0].IntStatus) | |||||
| temp.VersionID = strconv.FormatInt(result.JobVersionList[0].VersionID, 10) | |||||
| log.Info("find the record(%s), status(%s)", temp.JobName, temp.Status) | |||||
| _, err := StopTrainJob(temp.JobID, temp.VersionID) | |||||
| if err != nil { | |||||
| log.Error("StopTrainJob failed:%v", err) | |||||
| break | |||||
| } | |||||
| temp.Status = string(models.ModelArtsTrainJobKilling) | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err) | |||||
| break | |||||
| } | |||||
| } else { | |||||
| log.Error("can not find the record(%s) till now", temp.JobName) | |||||
| err = errors.New("not found") | |||||
| break | |||||
| } | |||||
| } | |||||
| break | |||||
| } | |||||
| if temp.QueryTimes >= setting.MaxTempQueryTimes && !isExist { | |||||
| log.Info("reach MaxTempQueryTimes, set the job failed") | |||||
| temp.Status = string(models.ModelArtsTrainJobFailed) | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err) | |||||
| return err | |||||
| } | |||||
| } | |||||
| return err | |||||
| } | |||||
| func handleTempTrainJob(temp *models.CloudbrainTemp) error { | |||||
| var err error | |||||
| var isExist bool | |||||
| for { | |||||
| result, err := GetTrainJobList(1000, 1, "create_time", "desc", temp.JobName) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJobList failed:%v", err) | |||||
| break | |||||
| } | |||||
| temp.QueryTimes++ | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp failed:%v", err) | |||||
| } | |||||
| if result != nil { | |||||
| for _, job := range result.JobList { | |||||
| if temp.JobName == job.JobName && TransTrainJobStatus(job.IntStatus) != string(models.ModelArtsTrainJobFailed) { | |||||
| isExist = true | |||||
| temp.Status = TransTrainJobStatus(job.IntStatus) | |||||
| temp.JobID = strconv.FormatInt(job.JobID, 10) | |||||
| temp.VersionID = strconv.FormatInt(job.VersionID, 10) | |||||
| log.Info("find the record(%s), status(%s)", temp.JobName, temp.Status) | |||||
| _, err = StopTrainJob(temp.JobID, temp.VersionID) | |||||
| if err != nil { | |||||
| log.Error("StopTrainJob(%s) failed:%v", temp.JobName, err) | |||||
| break | |||||
| } | |||||
| temp.Status = string(models.ModelArtsTrainJobKilling) | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err) | |||||
| break | |||||
| } | |||||
| } | |||||
| } | |||||
| if !isExist { | |||||
| log.Error("can not find the record(%s) till now", temp.JobName) | |||||
| err = errors.New("not found") | |||||
| break | |||||
| } | |||||
| } | |||||
| break | |||||
| } | |||||
| if temp.QueryTimes >= setting.MaxTempQueryTimes && !isExist { | |||||
| log.Info("reach MaxTempQueryTimes, set the job failed") | |||||
| temp.Status = string(models.ModelArtsTrainJobFailed) | |||||
| err = models.UpdateCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("UpdateCloudbrainTemp(%s) failed:%v", temp.JobName, err) | |||||
| return err | |||||
| } | |||||
| } | |||||
| return err | |||||
| } | |||||
| @@ -37,6 +37,7 @@ const ( | |||||
| NotebookNotFound = "ModelArts.6404" | NotebookNotFound = "ModelArts.6404" | ||||
| NotebookNoPermission = "ModelArts.6407" | NotebookNoPermission = "ModelArts.6407" | ||||
| NotebookInvalid = "ModelArts.6400" | NotebookInvalid = "ModelArts.6400" | ||||
| UnknownErrorPrefix = "UNKNOWN:" | |||||
| ) | ) | ||||
| func getRestyClient() *resty.Client { | func getRestyClient() *resty.Client { | ||||
| @@ -298,6 +299,10 @@ sendjob: | |||||
| return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | ||||
| } | } | ||||
| if res.StatusCode() == http.StatusBadGateway { | |||||
| return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||||
| } | |||||
| if len(response.ErrorCode) != 0 { | if len(response.ErrorCode) != 0 { | ||||
| log.Error("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | log.Error("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | ||||
| if response.ErrorCode == modelartsIllegalToken && retry < 1 { | if response.ErrorCode == modelartsIllegalToken && retry < 1 { | ||||
| @@ -506,23 +511,27 @@ sendjob: | |||||
| log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | ||||
| return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | ||||
| } | } | ||||
| log.Error("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| BootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'." | |||||
| DataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'." | |||||
| if temp.ErrorMsg == BootFileErrorMsg { | |||||
| log.Error("启动文件错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| log.Error("createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'." | |||||
| dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'." | |||||
| if temp.ErrorMsg == bootFileErrorMsg { | |||||
| log.Error("启动文件错误!createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| return &result, fmt.Errorf("启动文件错误!") | return &result, fmt.Errorf("启动文件错误!") | ||||
| } | } | ||||
| if temp.ErrorMsg == DataSetErrorMsg { | |||||
| log.Error("数据集错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| if temp.ErrorMsg == dataSetErrorMsg { | |||||
| log.Error("数据集错误!createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| return &result, fmt.Errorf("数据集错误!") | return &result, fmt.Errorf("数据集错误!") | ||||
| } | } | ||||
| return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| if res.StatusCode() == http.StatusBadGateway { | |||||
| return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } else { | |||||
| return &result, fmt.Errorf("createTrainJobUserImage failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } | |||||
| } | } | ||||
| if !result.IsSuccess { | if !result.IsSuccess { | ||||
| log.Error("createTrainJob failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
| return &result, fmt.Errorf("createTrainJob failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
| log.Error("createTrainJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
| return &result, fmt.Errorf("createTrainJobUserImage failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
| } | } | ||||
| return &result, nil | return &result, nil | ||||
| @@ -547,9 +556,6 @@ sendjob: | |||||
| return nil, fmt.Errorf("resty create train-job: %s", err) | return nil, fmt.Errorf("resty create train-job: %s", err) | ||||
| } | } | ||||
| req, _ := json.Marshal(createJobParams) | |||||
| log.Info("%s", req) | |||||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | ||||
| retry++ | retry++ | ||||
| _ = getToken() | _ = getToken() | ||||
| @@ -563,17 +569,21 @@ sendjob: | |||||
| return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | ||||
| } | } | ||||
| log.Error("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | log.Error("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | ||||
| BootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'." | |||||
| DataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'." | |||||
| if temp.ErrorMsg == BootFileErrorMsg { | |||||
| bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'." | |||||
| dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'." | |||||
| if temp.ErrorMsg == bootFileErrorMsg { | |||||
| log.Error("启动文件错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | log.Error("启动文件错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | ||||
| return &result, fmt.Errorf("启动文件错误!") | return &result, fmt.Errorf("启动文件错误!") | ||||
| } | } | ||||
| if temp.ErrorMsg == DataSetErrorMsg { | |||||
| if temp.ErrorMsg == dataSetErrorMsg { | |||||
| log.Error("数据集错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | log.Error("数据集错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | ||||
| return &result, fmt.Errorf("数据集错误!") | return &result, fmt.Errorf("数据集错误!") | ||||
| } | } | ||||
| return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| if res.StatusCode() == http.StatusBadGateway { | |||||
| return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } else { | |||||
| return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } | |||||
| } | } | ||||
| if !result.IsSuccess { | if !result.IsSuccess { | ||||
| @@ -603,9 +613,6 @@ sendjob: | |||||
| return nil, fmt.Errorf("resty create train-job version: %s", err) | return nil, fmt.Errorf("resty create train-job version: %s", err) | ||||
| } | } | ||||
| req, _ := json.Marshal(createJobVersionParams) | |||||
| log.Info("%s", req) | |||||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | ||||
| retry++ | retry++ | ||||
| _ = getToken() | _ = getToken() | ||||
| @@ -618,17 +625,23 @@ sendjob: | |||||
| log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | ||||
| return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | ||||
| } | } | ||||
| BootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'." | |||||
| DataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'." | |||||
| if temp.ErrorMsg == BootFileErrorMsg { | |||||
| log.Error("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| bootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'." | |||||
| dataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'." | |||||
| if temp.ErrorMsg == bootFileErrorMsg { | |||||
| log.Error("启动文件错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | log.Error("启动文件错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | ||||
| return &result, fmt.Errorf("启动文件错误!") | return &result, fmt.Errorf("启动文件错误!") | ||||
| } | } | ||||
| if temp.ErrorMsg == DataSetErrorMsg { | |||||
| if temp.ErrorMsg == dataSetErrorMsg { | |||||
| log.Error("数据集错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | log.Error("数据集错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | ||||
| return &result, fmt.Errorf("数据集错误!") | return &result, fmt.Errorf("数据集错误!") | ||||
| } | } | ||||
| return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| if res.StatusCode() == http.StatusBadGateway { | |||||
| return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } else { | |||||
| return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } | |||||
| } | } | ||||
| if !result.IsSuccess { | if !result.IsSuccess { | ||||
| @@ -761,9 +774,6 @@ sendjob: | |||||
| goto sendjob | goto sendjob | ||||
| } | } | ||||
| //temp, _ := json.Marshal(req) | |||||
| //log.Info("%s", temp) | |||||
| if res.StatusCode() != http.StatusOK { | if res.StatusCode() != http.StatusOK { | ||||
| var temp models.ErrorResult | var temp models.ErrorResult | ||||
| if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | ||||
| @@ -1172,7 +1182,11 @@ sendjob: | |||||
| log.Error("数据集错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | log.Error("数据集错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | ||||
| return &result, fmt.Errorf("数据集错误!") | return &result, fmt.Errorf("数据集错误!") | ||||
| } | } | ||||
| return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| if res.StatusCode() == http.StatusBadGateway { | |||||
| return &result, fmt.Errorf(UnknownErrorPrefix+"createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } else { | |||||
| return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } | |||||
| } | } | ||||
| if !result.IsSuccess { | if !result.IsSuccess { | ||||
| @@ -1212,7 +1226,11 @@ sendjob: | |||||
| err = json.Unmarshal(res.Body(), &response) | err = json.Unmarshal(res.Body(), &response) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("json.Unmarshal failed: %s", err.Error()) | log.Error("json.Unmarshal failed: %s", err.Error()) | ||||
| return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||||
| return &result, fmt.Errorf("json.Unmarshal failed: %s", err.Error()) | |||||
| } | |||||
| if res.StatusCode() == http.StatusBadGateway { | |||||
| return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||||
| } | } | ||||
| if len(response.ErrorCode) != 0 { | if len(response.ErrorCode) != 0 { | ||||
| @@ -1271,3 +1289,139 @@ sendjob: | |||||
| return &result, nil | return &result, nil | ||||
| } | } | ||||
| func GetTrainJobList(perPage, page int, sortBy, order, searchContent string) (*models.GetTrainJobListResult, error) { | |||||
| checkSetting() | |||||
| client := getRestyClient() | |||||
| var result models.GetTrainJobListResult | |||||
| retry := 0 | |||||
| sendjob: | |||||
| res, err := client.R(). | |||||
| SetQueryParams(map[string]string{ | |||||
| "per_page": strconv.Itoa(perPage), | |||||
| "page": strconv.Itoa(page), | |||||
| "sortBy": sortBy, | |||||
| "order": order, | |||||
| "search_content": searchContent, | |||||
| }). | |||||
| SetAuthToken(TOKEN). | |||||
| SetResult(&result). | |||||
| Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob) | |||||
| if err != nil { | |||||
| return nil, fmt.Errorf("resty GetTrainJobList: %v", err) | |||||
| } | |||||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||||
| retry++ | |||||
| _ = getToken() | |||||
| goto sendjob | |||||
| } | |||||
| if res.StatusCode() != http.StatusOK { | |||||
| var temp models.ErrorResult | |||||
| if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||||
| log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| } | |||||
| log.Error("GetTrainJobList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| return &result, fmt.Errorf(temp.ErrorMsg) | |||||
| } | |||||
| if !result.IsSuccess { | |||||
| log.Error("GetTrainJobList failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
| return &result, fmt.Errorf(result.ErrorMsg) | |||||
| } | |||||
| return &result, nil | |||||
| } | |||||
| func GetTrainJobVersionList(perPage, page int, jobID string) (*models.GetTrainJobVersionListResult, error) { | |||||
| checkSetting() | |||||
| client := getRestyClient() | |||||
| var result models.GetTrainJobVersionListResult | |||||
| retry := 0 | |||||
| sendjob: | |||||
| res, err := client.R(). | |||||
| SetQueryParams(map[string]string{ | |||||
| "per_page": strconv.Itoa(perPage), | |||||
| "page": strconv.Itoa(page), | |||||
| }). | |||||
| SetAuthToken(TOKEN). | |||||
| SetResult(&result). | |||||
| Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions") | |||||
| if err != nil { | |||||
| return nil, fmt.Errorf("resty GetTrainJobVersionList: %v", err) | |||||
| } | |||||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||||
| retry++ | |||||
| _ = getToken() | |||||
| goto sendjob | |||||
| } | |||||
| if res.StatusCode() != http.StatusOK { | |||||
| var temp models.ErrorResult | |||||
| if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||||
| log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| } | |||||
| log.Error("GetTrainJobVersionList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| return &result, fmt.Errorf(temp.ErrorMsg) | |||||
| } | |||||
| if !result.IsSuccess { | |||||
| log.Error("GetTrainJobVersionList failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
| return &result, fmt.Errorf(result.ErrorMsg) | |||||
| } | |||||
| return &result, nil | |||||
| } | |||||
| func GetNotebookList(limit, offset int, sortBy, order, searchContent string) (*models.GetNotebookListResult, error) { | |||||
| checkSetting() | |||||
| client := getRestyClient() | |||||
| var result models.GetNotebookListResult | |||||
| retry := 0 | |||||
| sendjob: | |||||
| res, err := client.R(). | |||||
| SetQueryParams(map[string]string{ | |||||
| "limit": strconv.Itoa(limit), | |||||
| "offset": strconv.Itoa(offset), | |||||
| "name": searchContent, | |||||
| "sort_key": sortBy, | |||||
| "sort_dir": order, | |||||
| }). | |||||
| SetAuthToken(TOKEN). | |||||
| SetResult(&result). | |||||
| Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2) | |||||
| if err != nil { | |||||
| return nil, fmt.Errorf("resty GetNotebookList: %v", err) | |||||
| } | |||||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||||
| retry++ | |||||
| _ = getToken() | |||||
| goto sendjob | |||||
| } | |||||
| if res.StatusCode() != http.StatusOK { | |||||
| var temp models.ErrorResult | |||||
| if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||||
| log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| } | |||||
| log.Error("GetNotebookList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| return &result, fmt.Errorf(temp.ErrorMsg) | |||||
| } | |||||
| return &result, nil | |||||
| } | |||||
| @@ -539,6 +539,7 @@ var ( | |||||
| DebugHost string | DebugHost string | ||||
| ImageInfos string | ImageInfos string | ||||
| Capacity int | Capacity int | ||||
| MaxTempQueryTimes int | |||||
| //train-job | //train-job | ||||
| ResourcePools string | ResourcePools string | ||||
| Engines string | Engines string | ||||
| @@ -1424,6 +1425,7 @@ func NewContext() { | |||||
| Flavor = sec.Key("FLAVOR").MustString("") | Flavor = sec.Key("FLAVOR").MustString("") | ||||
| ImageInfos = sec.Key("IMAGE_INFOS").MustString("") | ImageInfos = sec.Key("IMAGE_INFOS").MustString("") | ||||
| Capacity = sec.Key("IMAGE_INFOS").MustInt(100) | Capacity = sec.Key("IMAGE_INFOS").MustInt(100) | ||||
| MaxTempQueryTimes = sec.Key("MAX_TEMP_QUERY_TIMES").MustInt(30) | |||||
| ResourcePools = sec.Key("Resource_Pools").MustString("") | ResourcePools = sec.Key("Resource_Pools").MustString("") | ||||
| Engines = sec.Key("Engines").MustString("") | Engines = sec.Key("Engines").MustString("") | ||||
| EngineVersions = sec.Key("Engine_Versions").MustString("") | EngineVersions = sec.Key("Engine_Versions").MustString("") | ||||
| @@ -26,40 +26,6 @@ import ( | |||||
| routerRepo "code.gitea.io/gitea/routers/repo" | routerRepo "code.gitea.io/gitea/routers/repo" | ||||
| ) | ) | ||||
| func GetModelArtsNotebook(ctx *context.APIContext) { | |||||
| var ( | |||||
| err error | |||||
| ) | |||||
| jobID := ctx.Params(":jobid") | |||||
| repoID := ctx.Repo.Repository.ID | |||||
| job, err := models.GetRepoCloudBrainByJobID(repoID, jobID) | |||||
| if err != nil { | |||||
| ctx.NotFound(err) | |||||
| return | |||||
| } | |||||
| result, err := modelarts.GetJob(jobID) | |||||
| if err != nil { | |||||
| ctx.NotFound(err) | |||||
| return | |||||
| } | |||||
| oldStatus := job.Status | |||||
| job.Status = result.Status | |||||
| if oldStatus != result.Status { | |||||
| notification.NotifyChangeCloudbrainStatus(job, oldStatus) | |||||
| } | |||||
| err = models.UpdateJob(job) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed:", err) | |||||
| } | |||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | |||||
| "JobID": jobID, | |||||
| "JobStatus": result.Status, | |||||
| }) | |||||
| } | |||||
| func GetModelArtsNotebook2(ctx *context.APIContext) { | func GetModelArtsNotebook2(ctx *context.APIContext) { | ||||
| var ( | var ( | ||||
| err error | err error | ||||
| @@ -71,33 +37,16 @@ func GetModelArtsNotebook2(ctx *context.APIContext) { | |||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| return | return | ||||
| } | } | ||||
| result, err := modelarts.GetNotebook2(job.JobID) | |||||
| err = modelarts.HandleNotebookInfo(job) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| return | return | ||||
| } | } | ||||
| if job.StartTime == 0 && result.Lease.UpdateTime > 0 { | |||||
| job.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000) | |||||
| } | |||||
| oldStatus := job.Status | |||||
| job.Status = result.Status | |||||
| if job.EndTime == 0 && models.IsModelArtsDebugJobTerminal(job.Status) { | |||||
| job.EndTime = timeutil.TimeStampNow() | |||||
| } | |||||
| job.CorrectCreateUnix() | |||||
| job.ComputeAndSetDuration() | |||||
| if oldStatus != result.Status { | |||||
| notification.NotifyChangeCloudbrainStatus(job, oldStatus) | |||||
| } | |||||
| err = models.UpdateJob(job) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed:", err) | |||||
| } | |||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
| "ID": ID, | "ID": ID, | ||||
| "JobName": job.JobName, | "JobName": job.JobName, | ||||
| "JobStatus": result.Status, | |||||
| "JobStatus": job.Status, | |||||
| "JobDuration": job.TrainJobDuration, | "JobDuration": job.TrainJobDuration, | ||||
| }) | }) | ||||
| @@ -189,27 +138,11 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { | |||||
| } | } | ||||
| } | } | ||||
| } else if job.Type == models.TypeCloudBrainTwo { | } else if job.Type == models.TypeCloudBrainTwo { | ||||
| result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10)) | |||||
| err := modelarts.HandleTrainJobInfo(job) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| return | return | ||||
| } | } | ||||
| if job.StartTime == 0 && result.StartTime > 0 { | |||||
| job.StartTime = timeutil.TimeStamp(result.StartTime / 1000) | |||||
| } | |||||
| job.Status = modelarts.TransTrainJobStatus(result.IntStatus) | |||||
| job.Duration = result.Duration / 1000 | |||||
| job.TrainJobDuration = models.ConvertDurationToStr(job.Duration) | |||||
| if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 { | |||||
| job.EndTime = job.StartTime.Add(job.Duration) | |||||
| } | |||||
| job.CorrectCreateUnix() | |||||
| err = models.UpdateTrainJobVersion(job) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed:", err) | |||||
| } | |||||
| } else if job.Type == models.TypeC2Net { | } else if job.Type == models.TypeC2Net { | ||||
| result, err := grampus.GetJob(jobID) | result, err := grampus.GetJob(jobID) | ||||
| if err != nil { | if err != nil { | ||||
| @@ -558,26 +491,11 @@ func GetModelArtsInferenceJob(ctx *context.APIContext) { | |||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| return | return | ||||
| } | } | ||||
| result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10)) | |||||
| err = modelarts.HandleTrainJobInfo(job) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| return | return | ||||
| } | } | ||||
| if job.StartTime == 0 && result.StartTime > 0 { | |||||
| job.StartTime = timeutil.TimeStamp(result.StartTime / 1000) | |||||
| } | |||||
| job.Status = modelarts.TransTrainJobStatus(result.IntStatus) | |||||
| job.Duration = result.Duration / 1000 | |||||
| job.TrainJobDuration = models.ConvertDurationToStr(job.Duration) | |||||
| if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 { | |||||
| job.EndTime = job.StartTime.Add(job.Duration) | |||||
| } | |||||
| job.CorrectCreateUnix() | |||||
| err = models.UpdateInferenceJob(job) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed:", err) | |||||
| } | |||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
| "JobID": jobID, | "JobID": jobID, | ||||
| @@ -1828,70 +1828,24 @@ func SyncCloudbrainStatus() { | |||||
| } | } | ||||
| } else if task.Type == models.TypeCloudBrainTwo { | } else if task.Type == models.TypeCloudBrainTwo { | ||||
| if task.JobType == string(models.JobTypeDebug) { | if task.JobType == string(models.JobTypeDebug) { | ||||
| //result, err := modelarts.GetJob(task.JobID) | |||||
| result, err := modelarts.GetNotebook2(task.JobID) | |||||
| err := modelarts.HandleNotebookInfo(task) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetJob(%s) failed:%v", task.JobName, err) | |||||
| log.Error("HandleNotebookInfo(%s) failed:%v", task.DisplayJobName, err) | |||||
| continue | continue | ||||
| } | } | ||||
| if result != nil { | |||||
| oldStatus := task.Status | |||||
| task.Status = result.Status | |||||
| if task.StartTime == 0 && result.Lease.UpdateTime > 0 { | |||||
| task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000) | |||||
| } | |||||
| if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { | |||||
| task.EndTime = timeutil.TimeStampNow() | |||||
| } | |||||
| task.CorrectCreateUnix() | |||||
| task.ComputeAndSetDuration() | |||||
| if oldStatus != task.Status { | |||||
| notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||||
| } | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||||
| continue | |||||
| } | |||||
| } | |||||
| } else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) { | } else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) { | ||||
| result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| err := modelarts.HandleTrainJobInfo(task) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetTrainJob(%s) failed:%v", task.JobName, err) | |||||
| log.Error("HandleTrainJobInfo(%s) failed:%v", task.DisplayJobName, err) | |||||
| continue | continue | ||||
| } | } | ||||
| if result != nil { | |||||
| oldStatus := task.Status | |||||
| task.Status = modelarts.TransTrainJobStatus(result.IntStatus) | |||||
| task.Duration = result.Duration / 1000 | |||||
| task.TrainJobDuration = result.TrainJobDuration | |||||
| if task.StartTime == 0 && result.StartTime > 0 { | |||||
| task.StartTime = timeutil.TimeStamp(result.StartTime / 1000) | |||||
| } | |||||
| task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||||
| if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { | |||||
| task.EndTime = task.StartTime.Add(task.Duration) | |||||
| } | |||||
| task.CorrectCreateUnix() | |||||
| if oldStatus != task.Status { | |||||
| notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||||
| } | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||||
| continue | |||||
| } | |||||
| } | |||||
| } else { | } else { | ||||
| log.Error("task.JobType(%s) is error:%s", task.JobName, task.JobType) | |||||
| log.Error("task.JobType(%s) is error:%s", task.DisplayJobName, task.JobType) | |||||
| } | } | ||||
| } else if task.Type == models.TypeC2Net { | } else if task.Type == models.TypeC2Net { | ||||
| result, err := grampus.GetJob(task.JobID) | result, err := grampus.GetJob(task.JobID) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetTrainJob(%s) failed:%v", task.JobName, err) | |||||
| log.Error("GetTrainJob(%s) failed:%v", task.DisplayJobName, err) | |||||
| continue | continue | ||||
| } | } | ||||
| @@ -348,7 +348,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||||
| EngineName: image, | EngineName: image, | ||||
| DatasetName: attachment.Name, | DatasetName: attachment.Name, | ||||
| IsLatestVersion: modelarts.IsLatestVersion, | IsLatestVersion: modelarts.IsLatestVersion, | ||||
| VersionCount: modelarts.VersionCount, | |||||
| VersionCount: modelarts.VersionCountOne, | |||||
| WorkServerNumber: 1, | WorkServerNumber: 1, | ||||
| } | } | ||||
| @@ -398,7 +398,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||||
| branchName := form.BranchName | branchName := form.BranchName | ||||
| isLatestVersion := modelarts.IsLatestVersion | isLatestVersion := modelarts.IsLatestVersion | ||||
| flavorName := form.FlavorName | flavorName := form.FlavorName | ||||
| versionCount := modelarts.VersionCount | |||||
| versionCount := modelarts.VersionCountOne | |||||
| engineName := form.EngineName | engineName := form.EngineName | ||||
| if !jobNamePattern.MatchString(displayJobName) { | if !jobNamePattern.MatchString(displayJobName) { | ||||
| @@ -15,9 +15,6 @@ import ( | |||||
| "time" | "time" | ||||
| "unicode/utf8" | "unicode/utf8" | ||||
| "code.gitea.io/gitea/modules/notification" | |||||
| "code.gitea.io/gitea/modules/timeutil" | |||||
| "code.gitea.io/gitea/models" | "code.gitea.io/gitea/models" | ||||
| "code.gitea.io/gitea/modules/auth" | "code.gitea.io/gitea/modules/auth" | ||||
| "code.gitea.io/gitea/modules/base" | "code.gitea.io/gitea/modules/base" | ||||
| @@ -26,9 +23,11 @@ import ( | |||||
| "code.gitea.io/gitea/modules/git" | "code.gitea.io/gitea/modules/git" | ||||
| "code.gitea.io/gitea/modules/log" | "code.gitea.io/gitea/modules/log" | ||||
| "code.gitea.io/gitea/modules/modelarts" | "code.gitea.io/gitea/modules/modelarts" | ||||
| "code.gitea.io/gitea/modules/notification" | |||||
| "code.gitea.io/gitea/modules/obs" | "code.gitea.io/gitea/modules/obs" | ||||
| "code.gitea.io/gitea/modules/setting" | "code.gitea.io/gitea/modules/setting" | ||||
| "code.gitea.io/gitea/modules/storage" | "code.gitea.io/gitea/modules/storage" | ||||
| "code.gitea.io/gitea/modules/timeutil" | |||||
| "code.gitea.io/gitea/modules/util" | "code.gitea.io/gitea/modules/util" | ||||
| ) | ) | ||||
| @@ -272,33 +271,15 @@ func NotebookShow(ctx *context.Context) { | |||||
| return | return | ||||
| } | } | ||||
| result, err := modelarts.GetNotebook2(task.JobID) | |||||
| if err != nil { | |||||
| log.Error("GET job error", err.Error()) | |||||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||||
| return | |||||
| } | |||||
| if result != nil { | |||||
| if task.DeletedAt.IsZero() { //normal record | |||||
| if task.Status != result.Status { | |||||
| oldStatus := task.Status | |||||
| task.Status = result.Status | |||||
| models.ParseAndSetDurationFromModelArtsNotebook(result, task) | |||||
| notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("GET job error", err.Error()) | |||||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||||
| return | |||||
| } | |||||
| } | |||||
| } else { //deleted record | |||||
| } | |||||
| if task.FlavorCode == "" { | |||||
| task.FlavorCode = result.Flavor | |||||
| if task.DeletedAt.IsZero() { //normal record | |||||
| err := modelarts.HandleNotebookInfo(task) | |||||
| if err != nil { | |||||
| ctx.Data["error"] = err.Error() | |||||
| ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) | |||||
| return | |||||
| } | } | ||||
| } else { //deleted record | |||||
| } | } | ||||
| datasetDownload := make([]models.DatasetDownload, 0) | datasetDownload := make([]models.DatasetDownload, 0) | ||||
| @@ -435,82 +416,127 @@ func NotebookDebug2(ctx *context.Context) { | |||||
| ctx.Redirect(result.Url + "?token=" + result.Token) | ctx.Redirect(result.Url + "?token=" + result.Token) | ||||
| } | } | ||||
| func NotebookManage(ctx *context.Context) { | |||||
| var ID = ctx.Params(":id") | |||||
| var action = ctx.Params(":action") | |||||
| var resultCode = "0" | |||||
| func NotebookRestart(ctx *context.Context) { | |||||
| var id = ctx.Params(":id") | |||||
| var resultCode = "-1" | |||||
| var errorMsg = "" | var errorMsg = "" | ||||
| var status = "" | var status = "" | ||||
| task := ctx.Cloudbrain | |||||
| for { | for { | ||||
| task, err := models.GetCloudbrainByID(ID) | |||||
| if err != nil { | |||||
| log.Error("get task(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "system error" | |||||
| ctx.CheckWechatBind() | |||||
| if ctx.Written() { | |||||
| return | |||||
| } | |||||
| if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) { | |||||
| log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) | |||||
| errorMsg = "the job is not stopped" | |||||
| break | break | ||||
| } | } | ||||
| if action == models.ActionStop { | |||||
| if task.Status != string(models.ModelArtsRunning) { | |||||
| log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "the job is not running" | |||||
| break | |||||
| } | |||||
| if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin() && !ctx.IsUserRepoOwner()) { | |||||
| log.Error("the user has no right ro stop the job", task.JobName, ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "you have no right to stop the job" | |||||
| break | |||||
| } | |||||
| } else if action == models.ActionRestart { | |||||
| ctx.CheckWechatBind() | |||||
| if ctx.Written() { | |||||
| return | |||||
| } | |||||
| if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) { | |||||
| log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "the job is not stopped" | |||||
| count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||||
| errorMsg = "system error" | |||||
| break | |||||
| } else { | |||||
| if count >= 1 { | |||||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||||
| errorMsg = "you have already a running or waiting task, can not create more" | |||||
| break | break | ||||
| } | } | ||||
| } | |||||
| if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin()) { | |||||
| log.Error("the user has no right ro restart the job", task.JobName, ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "you have no right to restart the job" | |||||
| break | |||||
| } | |||||
| createTime := timeutil.TimeStampNow() | |||||
| param := models.NotebookAction{ | |||||
| Action: models.ActionStart, | |||||
| } | |||||
| count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "system error" | |||||
| break | |||||
| } else { | |||||
| if count >= 1 { | |||||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "you have already a running or waiting task, can not create more" | |||||
| break | |||||
| res, err := modelarts.ManageNotebook2(task.JobID, param) | |||||
| if err != nil { | |||||
| log.Error("ManageNotebook2(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["MsgID"]) | |||||
| /* 暂不处理再次调试502的场景,详情见方案 | |||||
| if strings.HasPrefix(err.Error(), modelarts.UnknownErrorPrefix) { | |||||
| log.Info("(%s)unknown error, set temp status", task.DisplayJobName) | |||||
| errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{ | |||||
| JobID: task.JobID, | |||||
| VersionID: models.TempVersionId, | |||||
| Status: models.TempJobStatus, | |||||
| Type: task.Type, | |||||
| JobName: task.JobName, | |||||
| JobType: task.JobType, | |||||
| }) | |||||
| if errTemp != nil { | |||||
| log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error()) | |||||
| } | } | ||||
| } | } | ||||
| */ | |||||
| errorMsg = err.Error() | |||||
| break | |||||
| } | |||||
| action = models.ActionStart | |||||
| } else { | |||||
| log.Error("the action(%s) is illegal", action, ctx.Data["MsgID"]) | |||||
| newTask := &models.Cloudbrain{ | |||||
| Status: res.Status, | |||||
| UserID: task.UserID, | |||||
| RepoID: task.RepoID, | |||||
| JobID: task.JobID, | |||||
| JobName: task.JobName, | |||||
| DisplayJobName: task.DisplayJobName, | |||||
| JobType: task.JobType, | |||||
| Type: task.Type, | |||||
| Uuid: task.Uuid, | |||||
| Image: task.Image, | |||||
| ComputeResource: task.ComputeResource, | |||||
| Description: task.Description, | |||||
| CreatedUnix: createTime, | |||||
| UpdatedUnix: createTime, | |||||
| FlavorCode: task.FlavorCode, | |||||
| FlavorName: task.FlavorName, | |||||
| } | |||||
| err = models.RestartCloudbrain(task, newTask) | |||||
| if err != nil { | |||||
| log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | |||||
| errorMsg = "system error" | |||||
| break | |||||
| } | |||||
| status = res.Status | |||||
| resultCode = "0" | |||||
| notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, strconv.FormatInt(newTask.ID, 10), newTask.DisplayJobName, models.ActionCreateDebugNPUTask) | |||||
| break | |||||
| } | |||||
| ctx.JSON(200, map[string]string{ | |||||
| "result_code": resultCode, | |||||
| "error_msg": errorMsg, | |||||
| "status": status, | |||||
| "id": id, | |||||
| }) | |||||
| } | |||||
| func NotebookStop(ctx *context.Context) { | |||||
| var id = ctx.Params(":id") | |||||
| var resultCode = "0" | |||||
| var errorMsg = "" | |||||
| var status = "" | |||||
| task := ctx.Cloudbrain | |||||
| for { | |||||
| if task.Status != string(models.ModelArtsRunning) { | |||||
| log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | resultCode = "-1" | ||||
| errorMsg = "非法操作" | |||||
| errorMsg = "the job is not running" | |||||
| break | break | ||||
| } | } | ||||
| param := models.NotebookAction{ | param := models.NotebookAction{ | ||||
| Action: action, | |||||
| Action: models.ActionStop, | |||||
| } | } | ||||
| createTime := timeutil.TimeStampNow() | |||||
| res, err := modelarts.ManageNotebook2(task.JobID, param) | res, err := modelarts.ManageNotebook2(task.JobID, param) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | ||||
| @@ -523,52 +549,21 @@ func NotebookManage(ctx *context.Context) { | |||||
| } | } | ||||
| status = res.Status | status = res.Status | ||||
| if action == models.ActionStart { | |||||
| newTask := &models.Cloudbrain{ | |||||
| Status: status, | |||||
| UserID: task.UserID, | |||||
| RepoID: task.RepoID, | |||||
| JobID: task.JobID, | |||||
| JobName: task.JobName, | |||||
| DisplayJobName: task.DisplayJobName, | |||||
| JobType: task.JobType, | |||||
| Type: task.Type, | |||||
| Uuid: task.Uuid, | |||||
| Image: task.Image, | |||||
| ComputeResource: task.ComputeResource, | |||||
| Description: task.Description, | |||||
| CreatedUnix: createTime, | |||||
| UpdatedUnix: createTime, | |||||
| FlavorCode: task.FlavorCode, | |||||
| FlavorName: task.FlavorName, | |||||
| } | |||||
| err = models.RestartCloudbrain(task, newTask) | |||||
| if err != nil { | |||||
| log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "system error" | |||||
| break | |||||
| } | |||||
| ID = strconv.FormatInt(newTask.ID, 10) | |||||
| notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, ID, task.DisplayJobName, models.ActionCreateDebugNPUTask) | |||||
| } else { | |||||
| oldStatus := task.Status | |||||
| task.Status = res.Status | |||||
| if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { | |||||
| task.EndTime = timeutil.TimeStampNow() | |||||
| } | |||||
| task.ComputeAndSetDuration() | |||||
| if oldStatus != task.Status { | |||||
| notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||||
| } | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "system error" | |||||
| break | |||||
| } | |||||
| oldStatus := task.Status | |||||
| task.Status = res.Status | |||||
| if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { | |||||
| task.EndTime = timeutil.TimeStampNow() | |||||
| } | |||||
| task.ComputeAndSetDuration() | |||||
| if oldStatus != task.Status { | |||||
| notification.NotifyChangeCloudbrainStatus(task, oldStatus) | |||||
| } | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "system error" | |||||
| break | |||||
| } | } | ||||
| break | break | ||||
| @@ -578,7 +573,7 @@ func NotebookManage(ctx *context.Context) { | |||||
| "result_code": resultCode, | "result_code": resultCode, | ||||
| "error_msg": errorMsg, | "error_msg": errorMsg, | ||||
| "status": status, | "status": status, | ||||
| "id": ID, | |||||
| "id": id, | |||||
| }) | }) | ||||
| } | } | ||||
| @@ -1092,7 +1087,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||||
| branchName := form.BranchName | branchName := form.BranchName | ||||
| isLatestVersion := modelarts.IsLatestVersion | isLatestVersion := modelarts.IsLatestVersion | ||||
| FlavorName := form.FlavorName | FlavorName := form.FlavorName | ||||
| VersionCount := modelarts.VersionCount | |||||
| VersionCount := modelarts.VersionCountOne | |||||
| EngineName := form.EngineName | EngineName := form.EngineName | ||||
| count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | ||||
| @@ -1826,60 +1821,6 @@ func TrainJobShow(ctx *context.Context) { | |||||
| ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | ||||
| } | } | ||||
| func TrainJobGetLog(ctx *context.Context) { | |||||
| ctx.Data["PageIsTrainJob"] = true | |||||
| var jobID = ctx.Params(":jobid") | |||||
| var logFileName = ctx.Query("file_name") | |||||
| var baseLine = ctx.Query("base_line") | |||||
| var order = ctx.Query("order") | |||||
| if order != modelarts.OrderDesc && order != modelarts.OrderAsc { | |||||
| log.Error("order(%s) check failed", order) | |||||
| ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow) | |||||
| return | |||||
| } | |||||
| task, err := models.GetCloudbrainByJobID(jobID) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) | |||||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) | |||||
| return | |||||
| } | |||||
| result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error()) | |||||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) | |||||
| return | |||||
| } | |||||
| ctx.Data["log"] = result | |||||
| //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | |||||
| } | |||||
| func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) { | |||||
| task, err := models.GetCloudbrainByJobID(jobID) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) | |||||
| return nil, nil, err | |||||
| } | |||||
| resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error()) | |||||
| return nil, nil, err | |||||
| } | |||||
| result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), "", resultLogFile.LogFileList[0], modelarts.OrderDesc, modelarts.Lines) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error()) | |||||
| return nil, nil, err | |||||
| } | |||||
| return resultLogFile, result, err | |||||
| } | |||||
| func TrainJobDel(ctx *context.Context) { | func TrainJobDel(ctx *context.Context) { | ||||
| var jobID = ctx.Params(":jobid") | var jobID = ctx.Params(":jobid") | ||||
| var listType = ctx.Query("listType") | var listType = ctx.Query("listType") | ||||
| @@ -1946,15 +1887,6 @@ func TrainJobStop(ctx *context.Context) { | |||||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType) | ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType) | ||||
| } | } | ||||
| func canUserCreateTrainJob(uid int64) (bool, error) { | |||||
| org, err := models.GetOrgByName(setting.AllowedOrg) | |||||
| if err != nil { | |||||
| log.Error("get allowed org failed: ", setting.AllowedOrg) | |||||
| return false, err | |||||
| } | |||||
| return org.IsOrgMember(uid) | |||||
| } | |||||
| func canUserCreateTrainJobVersion(ctx *context.Context, userID int64) (bool, error) { | func canUserCreateTrainJobVersion(ctx *context.Context, userID int64) (bool, error) { | ||||
| if ctx == nil || ctx.User == nil { | if ctx == nil || ctx.User == nil { | ||||
| log.Error("user unlogin!") | log.Error("user unlogin!") | ||||
| @@ -2046,7 +1978,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference | |||||
| EngineName := form.EngineName | EngineName := form.EngineName | ||||
| LabelName := form.LabelName | LabelName := form.LabelName | ||||
| isLatestVersion := modelarts.IsLatestVersion | isLatestVersion := modelarts.IsLatestVersion | ||||
| VersionCount := modelarts.VersionCount | |||||
| VersionCount := modelarts.VersionCountOne | |||||
| trainUrl := form.TrainUrl | trainUrl := form.TrainUrl | ||||
| modelName := form.ModelName | modelName := form.ModelName | ||||
| modelVersion := form.ModelVersion | modelVersion := form.ModelVersion | ||||
| @@ -1183,7 +1183,8 @@ func RegisterRoutes(m *macaron.Macaron) { | |||||
| m.Group("/:id", func() { | m.Group("/:id", func() { | ||||
| m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) | m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) | ||||
| m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug2) | m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug2) | ||||
| m.Post("/:action", reqRepoCloudBrainWriter, repo.NotebookManage) | |||||
| m.Post("/restart", cloudbrain.AdminOrJobCreaterRight, repo.NotebookRestart) | |||||
| m.Post("/stop", cloudbrain.AdminOrJobCreaterRight, repo.NotebookStop) | |||||
| m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) | m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) | ||||
| }) | }) | ||||
| m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.NotebookNew) | m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.NotebookNew) | ||||
| @@ -200,7 +200,7 @@ | |||||
| <div class="six wide column"> | <div class="six wide column"> | ||||
| <img class="ui centered large image" src="/img/i-yunnao.svg"> | <img class="ui centered large image" src="/img/i-yunnao.svg"> | ||||
| </div> | </div> | ||||
| <div class="ten wide column am-pt-30"> | |||||
| <div class="ten wide column"> | |||||
| <h2 class="ui grey inverted header">{{.page_dev_yunlao}}</h2> | <h2 class="ui grey inverted header">{{.page_dev_yunlao}}</h2> | ||||
| <p class="am-lh-18 ui text grey"> | <p class="am-lh-18 ui text grey"> | ||||
| {{.page_dev_yunlao_desc1}}<br> | {{.page_dev_yunlao_desc1}}<br> | ||||
| @@ -211,12 +211,12 @@ | |||||
| </p> | </p> | ||||
| {{if .IsSigned}} | {{if .IsSigned}} | ||||
| <a class="ui blue basic button am-mt-20" href="{{AppSubUrl}}/dashboard">{{.page_use}}</a> | |||||
| <a class="ui blue basic button am-mt-10" href="{{AppSubUrl}}/dashboard">{{.page_use}}</a> | |||||
| {{else}} | {{else}} | ||||
| <a class="ui blue basic button am-mt-20" href="{{AppSubUrl}}/user/login">{{.page_use}}</a> | |||||
| <a class="ui blue basic button am-mt-10" href="{{AppSubUrl}}/user/login">{{.page_use}}</a> | |||||
| {{end}} | {{end}} | ||||
| <a class="ui grey basic button am-mt-20" href="mailto:aiforge@openi.org.cn">{{.page_dev_yunlao_apply}}</a> | |||||
| <a class="ui grey basic button am-mt-10" href="mailto:aiforge@openi.org.cn">{{.page_dev_yunlao_apply}}</a> | |||||
| </div> | </div> | ||||
| </div> | </div> | ||||
| </div> | </div> | ||||