diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 82c4c6b83..f407eefd4 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -22,6 +22,16 @@ const ( NPUResource = "NPU" GPUResource = "CPU/GPU" + //notebook storage category + EVSCategory = "EVS" + EFSCategory = "EFS" + + ManagedOwnership = "MANAGED" + DetectedOwnership = "DEDICATED" + + NotebookFeature = "NOTEBOOK" + DefaultFeature = "DEFAULT" + JobWaiting CloudbrainStatus = "WAITING" JobStopped CloudbrainStatus = "STOPPED" JobSucceeded CloudbrainStatus = "SUCCEEDED" @@ -520,6 +530,25 @@ type CloudBrainResult struct { Msg string `json:"msg"` } +type CreateNotebook2Params struct { + JobName string `json:"name"` + Description string `json:"description"` + Duration int64 `json:"duration"` //ms + Feature string `json:"feature"` + PoolID string `json:"pool_id"` + Flavor string `json:"flavor"` + ImageID string `json:"image_id"` + WorkspaceID string `json:"workspace_id"` + Volume VolumeReq `json:"volume"` +} + +type VolumeReq struct { + Capacity int `json:"capacity"` + Category string `json:"category"` + Ownership string `json:"ownership"` + Uri string `json:"uri"` +} + type CreateNotebookParams struct { JobName string `json:"name"` Description string `json:"description"` @@ -637,6 +666,41 @@ type GetNotebookResult struct { } `json:"spec"` } +type GetNotebook2Result struct { + ErrorCode string `json:"error_code"` + ErrorMsg string `json:"error_msg"` + FailReason string `json:"fail_reason"` + ID string `json:"id"` + Name string `json:"name"` + Description string `json:"description"` + Status string `json:"status"` + Url string `json:"url"` //实例访问的URL + Flavor string `json:"flavor"` + CreateTime string + LatestUpdateTime string + Image struct { + Name string `json:"name"` + Status string `json:"status"` + QueuingNum int `json:"queuing_num"` + QueueLeftTime int `json:"queue_left_time"` //s + Duration int `json:"duration"` //auto_stop_time s + } `json:"image"` + Lease struct { + CreateTime int64 `json:"create_time"` //实例创建的时间,UTC毫秒 + BeginTime string + Duration int64 `json:"duration"` //实例运行时长,以创建时间为起点计算,即“创建时间+duration > 当前时刻”时,系统会自动停止实例 + UpdateTime int64 `json:"update_time"` //实例最后更新(不包括保活心跳)的时间,UTC毫秒 + EndTime string + } `json:"lease"` //实例自动停止的倒计时信息 + VolumeRes struct { + Capacity int `json:"capacity"` + Category string `json:"category"` + MountPath string `json:"mount_path"` + Ownership string `json:"ownership"` + Status string `json:"status"` + } `json:"volume"` +} + type GetTokenParams struct { Auth Auth `json:"auth"` } diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 8af2a93e5..301c4cb0e 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -17,6 +17,7 @@ const ( //notebook storageTypeOBS = "obs" autoStopDuration = 4 * 60 * 60 + autoStopDurationMs = 4 * 60 * 60 * 1000 DataSetMountPath = "/home/ma-user/work" NotebookEnv = "Python3" @@ -262,6 +263,48 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor strin return nil } +func GenerateNotebook2(ctx *context.Context, jobName, uuid, description, flavor string) error { + if poolInfos == nil { + json.Unmarshal([]byte(setting.PoolInfos), &poolInfos) + } + jobResult, err := createNotebook2(models.CreateNotebook2Params{ + JobName: jobName, + Description: description, + Flavor: flavor, + Duration: autoStopDurationMs, + ImageID: "59a6e9f5-93c0-44dd-85b0-82f390c5d53a", + PoolID: poolInfos.PoolInfo[0].PoolId, + Feature: models.NotebookFeature, + Volume: models.VolumeReq{ + Capacity: 100, + Category: models.EVSCategory, + Ownership: models.ManagedOwnership, + }, + WorkspaceID: "0", + }) + if err != nil { + log.Error("createNotebook2 failed: %v", err.Error()) + return err + } + err = models.CreateCloudbrain(&models.Cloudbrain{ + Status: string(models.JobWaiting), + UserID: ctx.User.ID, + RepoID: ctx.Repo.Repository.ID, + JobID: jobResult.ID, + JobName: jobName, + JobType: string(models.JobTypeDebug), + Type: models.TypeCloudBrainTwo, + Uuid: uuid, + ComputeResource: models.NPUResource, + }) + + if err != nil { + return err + } + + return nil +} + func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { jobResult, err := createTrainJob(models.CreateTrainJobParams{ JobName: req.JobName, diff --git a/modules/modelarts/resty.go b/modules/modelarts/resty.go index d102dca71..3bd36ca82 100755 --- a/modules/modelarts/resty.go +++ b/modules/modelarts/resty.go @@ -28,6 +28,9 @@ const ( urlResourceSpecs = "/job/resource-specs" urlTrainJobConfig = "/training-job-configs" errorCodeExceedLimit = "ModelArts.0118" + + //notebook 2.0 + urlNotebook2 = "/notebooks" ) func getRestyClient() *resty.Client { @@ -174,6 +177,45 @@ sendjob: return &result, nil } +func GetNotebook2(jobID string) (*models.GetNotebook2Result, error) { + checkSetting() + client := getRestyClient() + var result models.GetNotebook2Result + + retry := 0 + +sendjob: + res, err := client.R(). + SetHeader("Content-Type", "application/json"). + SetAuthToken(TOKEN). + SetResult(&result). + Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID) + + if err != nil { + return nil, fmt.Errorf("resty GetJob: %v", err) + } + + if res.StatusCode() == http.StatusUnauthorized && retry < 1 { + retry++ + _ = getToken() + goto sendjob + } + + var response models.NotebookResult + err = json.Unmarshal(res.Body(), &response) + if err != nil { + log.Error("json.Unmarshal failed: %s", err.Error()) + return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) + } + + if len(response.ErrorCode) != 0 { + log.Error("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) + return &result, fmt.Errorf("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) + } + + return &result, nil +} + func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { checkSetting() client := getRestyClient() @@ -930,3 +972,46 @@ sendjob: return &result, nil } + +func createNotebook2(createJobParams models.CreateNotebook2Params) (*models.CreateNotebookResult, error) { + checkSetting() + client := getRestyClient() + var result models.CreateNotebookResult + + retry := 0 + +sendjob: + res, err := client.R(). + SetHeader("Content-Type", "application/json"). + SetAuthToken(TOKEN). + SetBody(createJobParams). + SetResult(&result). + Post(HOST + "/v1/" + setting.ProjectID + urlNotebook2) + + if err != nil { + return nil, fmt.Errorf("resty create notebook2: %s", err) + } + + if res.StatusCode() == http.StatusUnauthorized && retry < 1 { + retry++ + _ = getToken() + goto sendjob + } + + var response models.NotebookResult + err = json.Unmarshal(res.Body(), &response) + if err != nil { + log.Error("json.Unmarshal failed: %s", err.Error()) + return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) + } + + if len(response.ErrorCode) != 0 { + log.Error("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) + if response.ErrorCode == errorCodeExceedLimit { + response.ErrorMsg = "所选规格使用数量已超过最大配额限制。" + } + return &result, fmt.Errorf("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) + } + + return &result, nil +} diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 21da7cf42..72a68a6d3 100755 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -883,7 +883,8 @@ func RegisterRoutes(m *macaron.Macaron) { }, reqRepoReader(models.UnitTypeCloudBrain)) m.Group("/modelarts", func() { m.Group("/notebook", func() { - m.Get("/:jobid", repo.GetModelArtsNotebook) + //m.Get("/:jobid", repo.GetModelArtsNotebook) + m.Get("/:jobid", repo.GetModelArtsNotebook2) }) m.Group("/train-job", func() { m.Group("/:jobid", func() { diff --git a/routers/api/v1/repo/modelarts.go b/routers/api/v1/repo/modelarts.go index c73a93c1a..c9f8761c9 100755 --- a/routers/api/v1/repo/modelarts.go +++ b/routers/api/v1/repo/modelarts.go @@ -51,6 +51,37 @@ func GetModelArtsNotebook(ctx *context.APIContext) { } +func GetModelArtsNotebook2(ctx *context.APIContext) { + var ( + err error + ) + + jobID := ctx.Params(":jobid") + repoID := ctx.Repo.Repository.ID + job, err := models.GetRepoCloudBrainByJobID(repoID, jobID) + if err != nil { + ctx.NotFound(err) + return + } + result, err := modelarts.GetNotebook2(jobID) + if err != nil { + ctx.NotFound(err) + return + } + + job.Status = result.Status + err = models.UpdateJob(job) + if err != nil { + log.Error("UpdateJob failed:", err) + } + + ctx.JSON(http.StatusOK, map[string]interface{}{ + "JobID": jobID, + "JobStatus": result.Status, + }) + +} + func GetModelArtsTrainJob(ctx *context.APIContext) { var ( err error diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index fb87241d3..0599fb03f 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -967,7 +967,8 @@ func SyncCloudbrainStatus() { } } else if task.Type == models.TypeCloudBrainTwo { if task.JobType == string(models.JobTypeDebug) { - result, err := modelarts.GetJob(task.JobID) + //result, err := modelarts.GetJob(task.JobID) + result, err := modelarts.GetNotebook2(task.JobID) if err != nil { log.Error("GetJob(%s) failed:%v", task.JobName, err) continue diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index cc4e0840b..60f7ce51f 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -173,6 +173,52 @@ func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all") } +func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm) { + ctx.Data["PageIsNotebook"] = true + jobName := form.JobName + uuid := form.Attachment + description := form.Description + flavor := form.Flavor + + flavor = "modelarts.bm.910.arm.public.1" + + count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) + if err != nil { + log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form) + return + } else { + if count >= 1 { + log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsNotebookNew, &form) + return + } + } + _, err = models.GetCloudbrainByName(jobName) + if err == nil { + log.Error("the job name did already exist", ctx.Data["MsgID"]) + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("the job name did already exist", tplModelArtsNotebookNew, &form) + return + } else { + if !models.IsErrJobNotExist(err) { + log.Error("system error, %v", err, ctx.Data["MsgID"]) + cloudBrainNewDataPrepare(ctx) + ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form) + return + } + } + + err = modelarts.GenerateNotebook2(ctx, jobName, uuid, description, flavor) + if err != nil { + ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form) + return + } + ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all") +} + func NotebookShow(ctx *context.Context) { ctx.Data["PageIsCloudBrain"] = true @@ -184,7 +230,7 @@ func NotebookShow(ctx *context.Context) { return } - result, err := modelarts.GetJob(jobID) + result, err := modelarts.GetNotebook2(jobID) if err != nil { ctx.Data["error"] = err.Error() ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) @@ -200,12 +246,12 @@ func NotebookShow(ctx *context.Context) { return } - createTime, _ := com.StrTo(result.CreationTimestamp).Int64() + createTime, _ := com.StrTo(result.Lease.CreateTime).Int64() result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05") - endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64() + endTime, _ := com.StrTo(result.Lease.UpdateTime).Int64() result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05") - result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05") - result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05") + //result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05") + //result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05") } ctx.Data["task"] = task diff --git a/routers/routes/routes.go b/routers/routes/routes.go index 9690c115d..d61ae02bb 100755 --- a/routers/routes/routes.go +++ b/routers/routes/routes.go @@ -1029,6 +1029,7 @@ func RegisterRoutes(m *macaron.Macaron) { m.Group("/modelarts", func() { m.Group("/notebook", func() { + /* v1.0 m.Group("/:jobid", func() { m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug) @@ -1037,6 +1038,15 @@ func RegisterRoutes(m *macaron.Macaron) { }) m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew) m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsNotebookForm{}), repo.NotebookCreate) + */ + m.Group("/:jobid", func() { + m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) + m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug) + m.Post("/:action", reqRepoCloudBrainWriter, repo.NotebookManage) + m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) + }) + m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew) + m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsNotebookForm{}), repo.Notebook2Create) }) m.Group("/train-job", func() { diff --git a/templates/repo/modelarts/notebook/new.tmpl b/templates/repo/modelarts/notebook/new.tmpl index 4e32b5ef3..babd65e58 100755 --- a/templates/repo/modelarts/notebook/new.tmpl +++ b/templates/repo/modelarts/notebook/new.tmpl @@ -51,7 +51,7 @@ -
+
-
+
diff --git a/templates/repo/modelarts/notebook/show.tmpl b/templates/repo/modelarts/notebook/show.tmpl index aa769dce3..121f3e728 100755 --- a/templates/repo/modelarts/notebook/show.tmpl +++ b/templates/repo/modelarts/notebook/show.tmpl @@ -47,7 +47,7 @@ {{end}}
-
+