Browse Source

notebook2.0

tags/v1.22.2.1^2
lewis 3 years ago
parent
commit
f0e438f9b0
10 changed files with 294 additions and 13 deletions
  1. +64
    -0
      models/cloudbrain.go
  2. +43
    -0
      modules/modelarts/modelarts.go
  3. +85
    -0
      modules/modelarts/resty.go
  4. +2
    -1
      routers/api/v1/api.go
  5. +31
    -0
      routers/api/v1/repo/modelarts.go
  6. +2
    -1
      routers/repo/cloudbrain.go
  7. +51
    -5
      routers/repo/modelarts.go
  8. +10
    -0
      routers/routes/routes.go
  9. +4
    -4
      templates/repo/modelarts/notebook/new.tmpl
  10. +2
    -2
      templates/repo/modelarts/notebook/show.tmpl

+ 64
- 0
models/cloudbrain.go View File

@@ -22,6 +22,16 @@ const (
NPUResource = "NPU" NPUResource = "NPU"
GPUResource = "CPU/GPU" GPUResource = "CPU/GPU"


//notebook storage category
EVSCategory = "EVS"
EFSCategory = "EFS"

ManagedOwnership = "MANAGED"
DetectedOwnership = "DEDICATED"

NotebookFeature = "NOTEBOOK"
DefaultFeature = "DEFAULT"

JobWaiting CloudbrainStatus = "WAITING" JobWaiting CloudbrainStatus = "WAITING"
JobStopped CloudbrainStatus = "STOPPED" JobStopped CloudbrainStatus = "STOPPED"
JobSucceeded CloudbrainStatus = "SUCCEEDED" JobSucceeded CloudbrainStatus = "SUCCEEDED"
@@ -520,6 +530,25 @@ type CloudBrainResult struct {
Msg string `json:"msg"` Msg string `json:"msg"`
} }


type CreateNotebook2Params struct {
JobName string `json:"name"`
Description string `json:"description"`
Duration int64 `json:"duration"` //ms
Feature string `json:"feature"`
PoolID string `json:"pool_id"`
Flavor string `json:"flavor"`
ImageID string `json:"image_id"`
WorkspaceID string `json:"workspace_id"`
Volume VolumeReq `json:"volume"`
}

type VolumeReq struct {
Capacity int `json:"capacity"`
Category string `json:"category"`
Ownership string `json:"ownership"`
Uri string `json:"uri"`
}

type CreateNotebookParams struct { type CreateNotebookParams struct {
JobName string `json:"name"` JobName string `json:"name"`
Description string `json:"description"` Description string `json:"description"`
@@ -637,6 +666,41 @@ type GetNotebookResult struct {
} `json:"spec"` } `json:"spec"`
} }


type GetNotebook2Result struct {
ErrorCode string `json:"error_code"`
ErrorMsg string `json:"error_msg"`
FailReason string `json:"fail_reason"`
ID string `json:"id"`
Name string `json:"name"`
Description string `json:"description"`
Status string `json:"status"`
Url string `json:"url"` //实例访问的URL
Flavor string `json:"flavor"`
CreateTime string
LatestUpdateTime string
Image struct {
Name string `json:"name"`
Status string `json:"status"`
QueuingNum int `json:"queuing_num"`
QueueLeftTime int `json:"queue_left_time"` //s
Duration int `json:"duration"` //auto_stop_time s
} `json:"image"`
Lease struct {
CreateTime int64 `json:"create_time"` //实例创建的时间,UTC毫秒
BeginTime string
Duration int64 `json:"duration"` //实例运行时长,以创建时间为起点计算,即“创建时间+duration > 当前时刻”时,系统会自动停止实例
UpdateTime int64 `json:"update_time"` //实例最后更新(不包括保活心跳)的时间,UTC毫秒
EndTime string
} `json:"lease"` //实例自动停止的倒计时信息
VolumeRes struct {
Capacity int `json:"capacity"`
Category string `json:"category"`
MountPath string `json:"mount_path"`
Ownership string `json:"ownership"`
Status string `json:"status"`
} `json:"volume"`
}

type GetTokenParams struct { type GetTokenParams struct {
Auth Auth `json:"auth"` Auth Auth `json:"auth"`
} }


+ 43
- 0
modules/modelarts/modelarts.go View File

@@ -17,6 +17,7 @@ const (
//notebook //notebook
storageTypeOBS = "obs" storageTypeOBS = "obs"
autoStopDuration = 4 * 60 * 60 autoStopDuration = 4 * 60 * 60
autoStopDurationMs = 4 * 60 * 60 * 1000


DataSetMountPath = "/home/ma-user/work" DataSetMountPath = "/home/ma-user/work"
NotebookEnv = "Python3" NotebookEnv = "Python3"
@@ -262,6 +263,48 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description, flavor strin
return nil return nil
} }


func GenerateNotebook2(ctx *context.Context, jobName, uuid, description, flavor string) error {
if poolInfos == nil {
json.Unmarshal([]byte(setting.PoolInfos), &poolInfos)
}
jobResult, err := createNotebook2(models.CreateNotebook2Params{
JobName: jobName,
Description: description,
Flavor: flavor,
Duration: autoStopDurationMs,
ImageID: "59a6e9f5-93c0-44dd-85b0-82f390c5d53a",
PoolID: poolInfos.PoolInfo[0].PoolId,
Feature: models.NotebookFeature,
Volume: models.VolumeReq{
Capacity: 100,
Category: models.EVSCategory,
Ownership: models.ManagedOwnership,
},
WorkspaceID: "0",
})
if err != nil {
log.Error("createNotebook2 failed: %v", err.Error())
return err
}
err = models.CreateCloudbrain(&models.Cloudbrain{
Status: string(models.JobWaiting),
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
JobID: jobResult.ID,
JobName: jobName,
JobType: string(models.JobTypeDebug),
Type: models.TypeCloudBrainTwo,
Uuid: uuid,
ComputeResource: models.NPUResource,
})

if err != nil {
return err
}

return nil
}

func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) {
jobResult, err := createTrainJob(models.CreateTrainJobParams{ jobResult, err := createTrainJob(models.CreateTrainJobParams{
JobName: req.JobName, JobName: req.JobName,


+ 85
- 0
modules/modelarts/resty.go View File

@@ -28,6 +28,9 @@ const (
urlResourceSpecs = "/job/resource-specs" urlResourceSpecs = "/job/resource-specs"
urlTrainJobConfig = "/training-job-configs" urlTrainJobConfig = "/training-job-configs"
errorCodeExceedLimit = "ModelArts.0118" errorCodeExceedLimit = "ModelArts.0118"

//notebook 2.0
urlNotebook2 = "/notebooks"
) )


func getRestyClient() *resty.Client { func getRestyClient() *resty.Client {
@@ -174,6 +177,45 @@ sendjob:
return &result, nil return &result, nil
} }


func GetNotebook2(jobID string) (*models.GetNotebook2Result, error) {
checkSetting()
client := getRestyClient()
var result models.GetNotebook2Result

retry := 0

sendjob:
res, err := client.R().
SetHeader("Content-Type", "application/json").
SetAuthToken(TOKEN).
SetResult(&result).
Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID)

if err != nil {
return nil, fmt.Errorf("resty GetJob: %v", err)
}

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
goto sendjob
}

var response models.NotebookResult
err = json.Unmarshal(res.Body(), &response)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}

if len(response.ErrorCode) != 0 {
log.Error("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
return &result, fmt.Errorf("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
}

return &result, nil
}

func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) { func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) {
checkSetting() checkSetting()
client := getRestyClient() client := getRestyClient()
@@ -930,3 +972,46 @@ sendjob:


return &result, nil return &result, nil
} }

func createNotebook2(createJobParams models.CreateNotebook2Params) (*models.CreateNotebookResult, error) {
checkSetting()
client := getRestyClient()
var result models.CreateNotebookResult

retry := 0

sendjob:
res, err := client.R().
SetHeader("Content-Type", "application/json").
SetAuthToken(TOKEN).
SetBody(createJobParams).
SetResult(&result).
Post(HOST + "/v1/" + setting.ProjectID + urlNotebook2)

if err != nil {
return nil, fmt.Errorf("resty create notebook2: %s", err)
}

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
goto sendjob
}

var response models.NotebookResult
err = json.Unmarshal(res.Body(), &response)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}

if len(response.ErrorCode) != 0 {
log.Error("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
if response.ErrorCode == errorCodeExceedLimit {
response.ErrorMsg = "所选规格使用数量已超过最大配额限制。"
}
return &result, fmt.Errorf("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
}

return &result, nil
}

+ 2
- 1
routers/api/v1/api.go View File

@@ -883,7 +883,8 @@ func RegisterRoutes(m *macaron.Macaron) {
}, reqRepoReader(models.UnitTypeCloudBrain)) }, reqRepoReader(models.UnitTypeCloudBrain))
m.Group("/modelarts", func() { m.Group("/modelarts", func() {
m.Group("/notebook", func() { m.Group("/notebook", func() {
m.Get("/:jobid", repo.GetModelArtsNotebook)
//m.Get("/:jobid", repo.GetModelArtsNotebook)
m.Get("/:jobid", repo.GetModelArtsNotebook2)
}) })
m.Group("/train-job", func() { m.Group("/train-job", func() {
m.Group("/:jobid", func() { m.Group("/:jobid", func() {


+ 31
- 0
routers/api/v1/repo/modelarts.go View File

@@ -51,6 +51,37 @@ func GetModelArtsNotebook(ctx *context.APIContext) {


} }


func GetModelArtsNotebook2(ctx *context.APIContext) {
var (
err error
)

jobID := ctx.Params(":jobid")
repoID := ctx.Repo.Repository.ID
job, err := models.GetRepoCloudBrainByJobID(repoID, jobID)
if err != nil {
ctx.NotFound(err)
return
}
result, err := modelarts.GetNotebook2(jobID)
if err != nil {
ctx.NotFound(err)
return
}

job.Status = result.Status
err = models.UpdateJob(job)
if err != nil {
log.Error("UpdateJob failed:", err)
}

ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": jobID,
"JobStatus": result.Status,
})

}

func GetModelArtsTrainJob(ctx *context.APIContext) { func GetModelArtsTrainJob(ctx *context.APIContext) {
var ( var (
err error err error


+ 2
- 1
routers/repo/cloudbrain.go View File

@@ -967,7 +967,8 @@ func SyncCloudbrainStatus() {
} }
} else if task.Type == models.TypeCloudBrainTwo { } else if task.Type == models.TypeCloudBrainTwo {
if task.JobType == string(models.JobTypeDebug) { if task.JobType == string(models.JobTypeDebug) {
result, err := modelarts.GetJob(task.JobID)
//result, err := modelarts.GetJob(task.JobID)
result, err := modelarts.GetNotebook2(task.JobID)
if err != nil { if err != nil {
log.Error("GetJob(%s) failed:%v", task.JobName, err) log.Error("GetJob(%s) failed:%v", task.JobName, err)
continue continue


+ 51
- 5
routers/repo/modelarts.go View File

@@ -173,6 +173,52 @@ func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm)
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all") ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all")
} }


func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm) {
ctx.Data["PageIsNotebook"] = true
jobName := form.JobName
uuid := form.Attachment
description := form.Description
flavor := form.Flavor

flavor = "modelarts.bm.910.arm.public.1"

count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsNotebookNew, &form)
return
}
}
_, err = models.GetCloudbrainByName(jobName)
if err == nil {
log.Error("the job name did already exist", ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("the job name did already exist", tplModelArtsNotebookNew, &form)
return
} else {
if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form)
return
}
}

err = modelarts.GenerateNotebook2(ctx, jobName, uuid, description, flavor)
if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form)
return
}
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all")
}

func NotebookShow(ctx *context.Context) { func NotebookShow(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true ctx.Data["PageIsCloudBrain"] = true


@@ -184,7 +230,7 @@ func NotebookShow(ctx *context.Context) {
return return
} }


result, err := modelarts.GetJob(jobID)
result, err := modelarts.GetNotebook2(jobID)
if err != nil { if err != nil {
ctx.Data["error"] = err.Error() ctx.Data["error"] = err.Error()
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
@@ -200,12 +246,12 @@ func NotebookShow(ctx *context.Context) {
return return
} }


createTime, _ := com.StrTo(result.CreationTimestamp).Int64()
createTime, _ := com.StrTo(result.Lease.CreateTime).Int64()
result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05") result.CreateTime = time.Unix(int64(createTime/1000), 0).Format("2006-01-02 15:04:05")
endTime, _ := com.StrTo(result.LatestUpdateTimestamp).Int64()
endTime, _ := com.StrTo(result.Lease.UpdateTime).Int64()
result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05") result.LatestUpdateTime = time.Unix(int64(endTime/1000), 0).Format("2006-01-02 15:04:05")
result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
//result.QueuingInfo.BeginTime = time.Unix(int64(result.QueuingInfo.BeginTimestamp/1000), 0).Format("2006-01-02 15:04:05")
//result.QueuingInfo.EndTime = time.Unix(int64(result.QueuingInfo.EndTimestamp/1000), 0).Format("2006-01-02 15:04:05")
} }


ctx.Data["task"] = task ctx.Data["task"] = task


+ 10
- 0
routers/routes/routes.go View File

@@ -1029,6 +1029,7 @@ func RegisterRoutes(m *macaron.Macaron) {


m.Group("/modelarts", func() { m.Group("/modelarts", func() {
m.Group("/notebook", func() { m.Group("/notebook", func() {
/* v1.0
m.Group("/:jobid", func() { m.Group("/:jobid", func() {
m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) m.Get("", reqRepoCloudBrainReader, repo.NotebookShow)
m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug) m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug)
@@ -1037,6 +1038,15 @@ func RegisterRoutes(m *macaron.Macaron) {
}) })
m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew) m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew)
m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsNotebookForm{}), repo.NotebookCreate) m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsNotebookForm{}), repo.NotebookCreate)
*/
m.Group("/:jobid", func() {
m.Get("", reqRepoCloudBrainReader, repo.NotebookShow)
m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug)
m.Post("/:action", reqRepoCloudBrainWriter, repo.NotebookManage)
m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel)
})
m.Get("/create", reqRepoCloudBrainWriter, repo.NotebookNew)
m.Post("/create", reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsNotebookForm{}), repo.Notebook2Create)
}) })


m.Group("/train-job", func() { m.Group("/train-job", func() {


+ 4
- 4
templates/repo/modelarts/notebook/new.tmpl View File

@@ -51,7 +51,7 @@
<input name="job_name" id="cloudbrain_job_name" placeholder="任务名称" value="{{.job_name}}" tabindex="3" autofocus required maxlength="255" onkeyup="this.value=this.value.replace(/[, ]/g,'')"> <input name="job_name" id="cloudbrain_job_name" placeholder="任务名称" value="{{.job_name}}" tabindex="3" autofocus required maxlength="255" onkeyup="this.value=this.value.replace(/[, ]/g,'')">
</div> </div>


<div class="inline field">
<!-- <div class="inline field">
<label>数据集</label> <label>数据集</label>
<input type="text" list="cloudbrain_dataset" placeholder="选择数据集" name="" id="answerInput" autofocus maxlength="36"> <input type="text" list="cloudbrain_dataset" placeholder="选择数据集" name="" id="answerInput" autofocus maxlength="36">
<datalist id="cloudbrain_dataset" class="ui search" style='width:385px' name="attachment"> <datalist id="cloudbrain_dataset" class="ui search" style='width:385px' name="attachment">
@@ -69,7 +69,7 @@
<div class="inline required field"> <div class="inline required field">
<label>类型</label> <label>类型</label>
<input name="job_type" id="cloudbrain_job_type" value="{{.notebook_type}}" tabindex="3" disabled autofocus required maxlength="255" readonly="readonly"> <input name="job_type" id="cloudbrain_job_type" value="{{.notebook_type}}" tabindex="3" disabled autofocus required maxlength="255" readonly="readonly">
</div>
</div> -->
<div class="inline required field"> <div class="inline required field">
<label>规格</label> <label>规格</label>
<select id="cloudbrain_flavor" class="ui search dropdown" placeholder="选择规格" style='width:385px' name="flavor"> <select id="cloudbrain_flavor" class="ui search dropdown" placeholder="选择规格" style='width:385px' name="flavor">
@@ -79,10 +79,10 @@
{{end}} {{end}}
</select> </select>
</div> </div>
<div class="inline required field">
<!--<div class="inline required field">
<label>数据集存放路径</label> <label>数据集存放路径</label>
<input name="dataset_path" id="cloudbrain_dataset_path" value="{{.dataset_path}}" tabindex="3" disabled autofocus required maxlength="255" readonly="readonly"> <input name="dataset_path" id="cloudbrain_dataset_path" value="{{.dataset_path}}" tabindex="3" disabled autofocus required maxlength="255" readonly="readonly">
</div>
</div> -->
<div class="inline field"> <div class="inline field">
<label>描述</label> <label>描述</label>
<input name="description" id="cloudbrain_description" tabindex="3" autofocus maxlength="255"> <input name="description" id="cloudbrain_description" tabindex="3" autofocus maxlength="255">


+ 2
- 2
templates/repo/modelarts/notebook/show.tmpl View File

@@ -47,7 +47,7 @@
</table> </table>
{{end}} {{end}}
</div> </div>
<div class="ui blue segment">
<!--<div class="ui blue segment">
{{with .result}} {{with .result}}
<table class="ui celled striped table"> <table class="ui celled striped table">
<thead> <thead>
@@ -125,7 +125,7 @@
</tbody> </tbody>
</table> </table>
{{end}} {{end}}
</div>
</div>-->
</div> </div>


</div> </div>


Loading…
Cancel
Save