| @@ -22,6 +22,16 @@ const ( | |||
| NPUResource = "NPU" | |||
| GPUResource = "CPU/GPU" | |||
| //notebook storage category | |||
| EVSCategory = "EVS" | |||
| EFSCategory = "EFS" | |||
| ManagedOwnership = "MANAGED" | |||
| DetectedOwnership = "DEDICATED" | |||
| NotebookFeature = "NOTEBOOK" | |||
| DefaultFeature = "DEFAULT" | |||
| JobWaiting CloudbrainStatus = "WAITING" | |||
| JobStopped CloudbrainStatus = "STOPPED" | |||
| JobSucceeded CloudbrainStatus = "SUCCEEDED" | |||
| @@ -520,6 +530,25 @@ type CloudBrainResult struct { | |||
| Msg string `json:"msg"` | |||
| } | |||
| type CreateNotebook2Params struct { | |||
| JobName string `json:"name"` | |||
| Description string `json:"description"` | |||
| Duration int64 `json:"duration"` //ms | |||
| Feature string `json:"feature"` | |||
| PoolID string `json:"pool_id"` | |||
| Flavor string `json:"flavor"` | |||
| ImageID string `json:"image_id"` | |||
| WorkspaceID string `json:"workspace_id"` | |||
| Volume VolumeReq `json:"volume"` | |||
| } | |||
| type VolumeReq struct { | |||
| Capacity int `json:"capacity"` | |||
| Category string `json:"category"` | |||
| Ownership string `json:"ownership"` | |||
| Uri string `json:"uri"` | |||
| } | |||
| type CreateNotebookParams struct { | |||
| JobName string `json:"name"` | |||
| Description string `json:"description"` | |||
| @@ -637,6 +666,41 @@ type GetNotebookResult struct { | |||
| } `json:"spec"` | |||
| } | |||
| type GetNotebook2Result struct { | |||
| ErrorCode string `json:"error_code"` | |||
| ErrorMsg string `json:"error_msg"` | |||
| FailReason string `json:"fail_reason"` | |||
| ID string `json:"id"` | |||
| Name string `json:"name"` | |||
| Description string `json:"description"` | |||
| Status string `json:"status"` | |||
| Url string `json:"url"` //实例访问的URL | |||
| Flavor string `json:"flavor"` | |||
| CreateTime string | |||
| LatestUpdateTime string | |||
| Image struct { | |||
| Name string `json:"name"` | |||
| Status string `json:"status"` | |||
| QueuingNum int `json:"queuing_num"` | |||
| QueueLeftTime int `json:"queue_left_time"` //s | |||
| Duration int `json:"duration"` //auto_stop_time s | |||
| } `json:"image"` | |||
| Lease struct { | |||
| CreateTime int64 `json:"create_time"` //实例创建的时间,UTC毫秒 | |||
| BeginTime string | |||
| Duration int64 `json:"duration"` //实例运行时长,以创建时间为起点计算,即“创建时间+duration > 当前时刻”时,系统会自动停止实例 | |||
| UpdateTime int64 `json:"update_time"` //实例最后更新(不包括保活心跳)的时间,UTC毫秒 | |||
| EndTime string | |||
| } `json:"lease"` //实例自动停止的倒计时信息 | |||
| VolumeRes struct { | |||
| Capacity int `json:"capacity"` | |||
| Category string `json:"category"` | |||
| MountPath string `json:"mount_path"` | |||
| Ownership string `json:"ownership"` | |||
| Status string `json:"status"` | |||
| } `json:"volume"` | |||
| } | |||
| type GetTokenParams struct { | |||
| Auth Auth `json:"auth"` | |||
| } | |||
| @@ -17,6 +17,7 @@ const ( | |||
| //notebook | |||
| storageTypeOBS = "obs" | |||
| autoStopDuration = 4 * 60 * 60 | |||
| autoStopDurationMs = 4 * 60 * 60 * 1000 | |||
| DataSetMountPath = "/home/ma-user/work" | |||
| NotebookEnv = "Python3" | |||
| @@ -262,6 +263,48 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor strin | |||
| return nil | |||
| } | |||
| func GenerateNotebook2(ctx *context.Context, jobName, uuid, description, flavor string) error { | |||
| if poolInfos == nil { | |||
| json.Unmarshal([]byte(setting.PoolInfos), &poolInfos) | |||
| } | |||
| jobResult, err := createNotebook2(models.CreateNotebook2Params{ | |||
| JobName: jobName, | |||
| Description: description, | |||
| Flavor: flavor, | |||
| Duration: autoStopDurationMs, | |||
| ImageID: "59a6e9f5-93c0-44dd-85b0-82f390c5d53a", | |||
| PoolID: poolInfos.PoolInfo[0].PoolId, | |||
| Feature: models.NotebookFeature, | |||
| Volume: models.VolumeReq{ | |||
| Capacity: 100, | |||
| Category: models.EVSCategory, | |||
| Ownership: models.ManagedOwnership, | |||
| }, | |||
| WorkspaceID: "0", | |||
| }) | |||
| if err != nil { | |||
| log.Error("createNotebook2 failed: %v", err.Error()) | |||
| return err | |||
| } | |||
| err = models.CreateCloudbrain(&models.Cloudbrain{ | |||
| Status: string(models.JobWaiting), | |||
| UserID: ctx.User.ID, | |||
| RepoID: ctx.Repo.Repository.ID, | |||
| JobID: jobResult.ID, | |||
| JobName: jobName, | |||
| JobType: string(models.JobTypeDebug), | |||
| Type: models.TypeCloudBrainTwo, | |||
| Uuid: uuid, | |||
| ComputeResource: models.NPUResource, | |||
| }) | |||
| if err != nil { | |||
| return err | |||
| } | |||
| return nil | |||
| } | |||
| func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { | |||
| jobResult, err := createTrainJob(models.CreateTrainJobParams{ | |||
| JobName: req.JobName, | |||
| @@ -28,6 +28,9 @@ const ( | |||
| urlResourceSpecs = "/job/resource-specs" | |||
| urlTrainJobConfig = "/training-job-configs" | |||
| errorCodeExceedLimit = "ModelArts.0118" | |||
| //notebook 2.0 | |||
| urlNotebook2 = "/notebooks" | |||
| ) | |||
| func getRestyClient() *resty.Client { | |||
| @@ -174,6 +177,45 @@ sendjob: | |||
| return &result, nil | |||
| } | |||
| func GetNotebook2(jobID string) (*models.GetNotebook2Result, error) { | |||
| checkSetting() | |||
| client := getRestyClient() | |||
| var result models.GetNotebook2Result | |||
| retry := 0 | |||
| sendjob: | |||
| res, err := client.R(). | |||
| SetHeader("Content-Type", "application/json"). | |||
| SetAuthToken(TOKEN). | |||
| SetResult(&result). | |||
| Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID) | |||
| if err != nil { | |||
| return nil, fmt.Errorf("resty GetJob: %v", err) | |||
| } | |||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
| retry++ | |||
| _ = getToken() | |||
| goto sendjob | |||
| } | |||
| var response models.NotebookResult | |||
| err = json.Unmarshal(res.Body(), &response) | |||
| if err != nil { | |||
| log.Error("json.Unmarshal failed: %s", err.Error()) | |||
| return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
| } | |||
| if len(response.ErrorCode) != 0 { | |||
| log.Error("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
| return &result, fmt.Errorf("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
| } | |||
| return &result, nil | |||
| } | |||
| func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { | |||
| checkSetting() | |||
| client := getRestyClient() | |||
| @@ -930,3 +972,46 @@ sendjob: | |||
| return &result, nil | |||
| } | |||
| func createNotebook2(createJobParams models.CreateNotebook2Params) (*models.CreateNotebookResult, error) { | |||
| checkSetting() | |||
| client := getRestyClient() | |||
| var result models.CreateNotebookResult | |||
| retry := 0 | |||
| sendjob: | |||
| res, err := client.R(). | |||
| SetHeader("Content-Type", "application/json"). | |||
| SetAuthToken(TOKEN). | |||
| SetBody(createJobParams). | |||
| SetResult(&result). | |||
| Post(HOST + "/v1/" + setting.ProjectID + urlNotebook2) | |||
| if err != nil { | |||
| return nil, fmt.Errorf("resty create notebook2: %s", err) | |||
| } | |||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
| retry++ | |||
| _ = getToken() | |||
| goto sendjob | |||
| } | |||
| var response models.NotebookResult | |||
| err = json.Unmarshal(res.Body(), &response) | |||
| if err != nil { | |||
| log.Error("json.Unmarshal failed: %s", err.Error()) | |||
| return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||
| } | |||
| if len(response.ErrorCode) != 0 { | |||
| log.Error("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
| if response.ErrorCode == errorCodeExceedLimit { | |||
| response.ErrorMsg = "所选规格使用数量已超过最大配额限制。" | |||
| } | |||
| return &result, fmt.Errorf("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||
| } | |||
| return &result, nil | |||
| } | |||
| @@ -883,7 +883,8 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
| }, reqRepoReader(models.UnitTypeCloudBrain)) | |||
| m.Group("/modelarts", func() { | |||
| m.Group("/notebook", func() { | |||
| m.Get("/:jobid", repo.GetModelArtsNotebook) | |||
| //m.Get("/:jobid", repo.GetModelArtsNotebook) | |||
| m.Get("/:jobid", repo.GetModelArtsNotebook2) | |||
| }) | |||
| m.Group("/train-job", func() { | |||
| m.Group("/:jobid", func() { | |||
| @@ -51,6 +51,37 @@ func GetModelArtsNotebook(ctx *context.APIContext) { | |||
| } | |||
| func GetModelArtsNotebook2(ctx *context.APIContext) { | |||
| var ( | |||
| err error | |||
| ) | |||
| jobID := ctx.Params(":jobid") | |||
| repoID := ctx.Repo.Repository.ID | |||
| job, err := models.GetRepoCloudBrainByJobID(repoID, jobID) | |||
| if err != nil { | |||
| ctx.NotFound(err) | |||
| return | |||
| } | |||
| result, err := modelarts.GetNotebook2(jobID) | |||
| if err != nil { | |||
| ctx.NotFound(err) | |||
| return | |||
| } | |||
| job.Status = result.Status | |||
| err = models.UpdateJob(job) | |||
| if err != nil { | |||
| log.Error("UpdateJob failed:", err) | |||
| } | |||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | |||
| "JobID": jobID, | |||
| "JobStatus": result.Status, | |||
| }) | |||
| } | |||
| func GetModelArtsTrainJob(ctx *context.APIContext) { | |||
| var ( | |||
| err error | |||
| @@ -967,7 +967,8 @@ func SyncCloudbrainStatus() { | |||
| } | |||
| } else if task.Type == models.TypeCloudBrainTwo { | |||
| if task.JobType == string(models.JobTypeDebug) { | |||
| result, err := modelarts.GetJob(task.JobID) | |||
| //result, err := modelarts.GetJob(task.JobID) | |||
| result, err := modelarts.GetNotebook2(task.JobID) | |||
| if err != nil { | |||
| log.Error("GetJob(%s) failed:%v", task.JobName, err) | |||
| continue | |||
| @@ -173,6 +173,52 @@ func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all") | |||
| } | |||
| func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm) { | |||
| ctx.Data["PageIsNotebook"] = true | |||
| jobName := form.JobName | |||
| uuid := form.Attachment | |||
| description := form.Description | |||
| flavor := form.Flavor | |||
| flavor = "modelarts.bm.910.arm.public.1" | |||
| count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form) | |||
| return | |||
| } else { | |||
| if count >= 1 { | |||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsNotebookNew, &form) | |||
| return | |||
| } | |||
| } | |||
| _, err = models.GetCloudbrainByName(jobName) | |||
| if err == nil { | |||
| log.Error("the job name did already exist", ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| ctx.RenderWithErr("the job name did already exist", tplModelArtsNotebookNew, &form) | |||
| return | |||
| } else { | |||
| if !models.IsErrJobNotExist(err) { | |||
| log.Error("system error, %v", err, ctx.Data["MsgID"]) | |||
| cloudBrainNewDataPrepare(ctx) | |||
| ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form) | |||
| return | |||
| } | |||
| } | |||
| err = modelarts.GenerateNotebook2(ctx, jobName, uuid, description, flavor) | |||
| if err != nil { | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form) | |||
| return | |||
| } | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all") | |||
| } | |||
| func NotebookShow(ctx *context.Context) { | |||
| ctx.Data["PageIsCloudBrain"] = true | |||
| @@ -184,7 +230,7 @@ func NotebookShow(ctx *context.Context) { | |||
| return | |||
| } | |||
| result, err := modelarts.GetJob(jobID) | |||
| result, err := modelarts.GetNotebook2(jobID) | |||
| if err != nil { | |||
| ctx.Data["error"] = err.Error() | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) | |||
| @@ -200,12 +246,12 @@ func NotebookShow(ctx *context.Context) { | |||
| return | |||
| } | |||
| createTime, _ := com.StrTo(result.CreationTimestamp).Int64() | |||
| createTime, _ := com.StrTo(result.Lease.CreateTime).Int64() | |||
| result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05") | |||
| endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64() | |||
| endTime, _ := com.StrTo(result.Lease.UpdateTime).Int64() | |||
| result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05") | |||
| result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05") | |||
| result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05") | |||
| //result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05") | |||
| //result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05") | |||
| } | |||
| ctx.Data["task"] = task | |||
| @@ -1029,6 +1029,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
| m.Group("/modelarts", func() { | |||
| m.Group("/notebook", func() { | |||
| /* v1.0 | |||
| m.Group("/:jobid", func() { | |||
| m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) | |||
| m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug) | |||
| @@ -1037,6 +1038,15 @@ func RegisterRoutes(m *macaron.Macaron) { | |||
| }) | |||
| m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew) | |||
| m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsNotebookForm{}), repo.NotebookCreate) | |||
| */ | |||
| m.Group("/:jobid", func() { | |||
| m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) | |||
| m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug) | |||
| m.Post("/:action", reqRepoCloudBrainWriter, repo.NotebookManage) | |||
| m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) | |||
| }) | |||
| m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew) | |||
| m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsNotebookForm{}), repo.Notebook2Create) | |||
| }) | |||
| m.Group("/train-job", func() { | |||
| @@ -51,7 +51,7 @@ | |||
| <input name="job_name" id="cloudbrain_job_name" placeholder="任务名称" value="{{.job_name}}" tabindex="3" autofocus required maxlength="255" onkeyup="this.value=this.value.replace(/[, ]/g,'')"> | |||
| </div> | |||
| <div class="inline field"> | |||
| <!-- <div class="inline field"> | |||
| <label>数据集</label> | |||
| <input type="text" list="cloudbrain_dataset" placeholder="选择数据集" name="" id="answerInput" autofocus maxlength="36"> | |||
| <datalist id="cloudbrain_dataset" class="ui search" style='width:385px' name="attachment"> | |||
| @@ -69,7 +69,7 @@ | |||
| <div class="inline required field"> | |||
| <label>类型</label> | |||
| <input name="job_type" id="cloudbrain_job_type" value="{{.notebook_type}}" tabindex="3" disabled autofocus required maxlength="255" readonly="readonly"> | |||
| </div> | |||
| </div> --> | |||
| <div class="inline required field"> | |||
| <label>规格</label> | |||
| <select id="cloudbrain_flavor" class="ui search dropdown" placeholder="选择规格" style='width:385px' name="flavor"> | |||
| @@ -79,10 +79,10 @@ | |||
| {{end}} | |||
| </select> | |||
| </div> | |||
| <div class="inline required field"> | |||
| <!--<div class="inline required field"> | |||
| <label>数据集存放路径</label> | |||
| <input name="dataset_path" id="cloudbrain_dataset_path" value="{{.dataset_path}}" tabindex="3" disabled autofocus required maxlength="255" readonly="readonly"> | |||
| </div> | |||
| </div> --> | |||
| <div class="inline field"> | |||
| <label>描述</label> | |||
| <input name="description" id="cloudbrain_description" tabindex="3" autofocus maxlength="255"> | |||
| @@ -47,7 +47,7 @@ | |||
| </table> | |||
| {{end}} | |||
| </div> | |||
| <div class="ui blue segment"> | |||
| <!--<div class="ui blue segment"> | |||
| {{with .result}} | |||
| <table class="ui celled striped table"> | |||
| <thead> | |||
| @@ -125,7 +125,7 @@ | |||
| </tbody> | |||
| </table> | |||
| {{end}} | |||
| </div> | |||
| </div>--> | |||
| </div> | |||
| </div> | |||