Browse Source

cd notebook

tags/v1.22.9.1^2
lewis 3 years ago
parent
commit
e92d948822
8 changed files with 266 additions and 317 deletions
  1. +2
    -0
      go.sum
  2. +26
    -39
      models/cloudbrain.go
  3. +2
    -2
      modules/grampus/grampus.go
  4. +9
    -9
      modules/modelarts/modelarts.go
  5. +9
    -22
      modules/modelarts_cd/modelarts.go
  6. +90
    -116
      modules/modelarts_cd/resty.go
  7. +69
    -19
      modules/setting/setting.go
  8. +59
    -110
      routers/repo/modelarts.go

+ 2
- 0
go.sum View File

@@ -713,12 +713,14 @@ github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1
github.com/smartystreets/assertions v0.0.0-20190116191733-b6c0e53d7304/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
github.com/smartystreets/assertions v1.0.1 h1:voD4ITNjPL5jjBfgR/r8fPIIBrliWrWHeiJApdr3r4w=
github.com/smartystreets/assertions v1.0.1/go.mod h1:kHHU4qYBaI3q23Pp3VPrmWhuIUrLW/7eUrw0BU5VaoM=
github.com/smartystreets/assertions v1.1.0 h1:MkTeG1DMwsrdH7QtLXy5W+fUxWq+vmb6cLmyJ7aRtF0=
github.com/smartystreets/assertions v1.1.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM=
github.com/smartystreets/goconvey v0.0.0-20181108003508-044398e4856c/go.mod h1:XDJAKZRPZ1CvBcN2aX5YOUTYGHki24fSF0Iv48Ibg0s=
github.com/smartystreets/goconvey v0.0.0-20190330032615-68dc04aab96a/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337 h1:WN9BUFbdyOsSH/XohnWpXOlq9NBD5sGAB2FciQMUEe8=
github.com/smartystreets/goconvey v0.0.0-20190731233626-505e41936337/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s=
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:UdhH50NIW0fCiwBSr0co2m7BnFLdv4fQTgdqdJTHFeE=


+ 26
- 39
models/cloudbrain.go View File

@@ -24,15 +24,16 @@ type ModelArtsJobStatus string
const (
TypeCloudBrainOne int = iota
TypeCloudBrainTwo
TypeC2Net //智算网络
TypeC2Net //智算网络
TypeCDCenter //成都智算中心

TypeCloudBrainAll = -1
)

const (
NPUResource = "NPU"
GPUResource = "CPU/GPU"
AllResource = "all"
NPUResource = "NPU"
GPUResource = "CPU/GPU"
AllResource = "all"

//notebook storage category
EVSCategory = "EVS"
@@ -584,37 +585,17 @@ type ResourceSpec struct {
ShareMemMiB int `json:"shareMemMiB"`
}

type FlavorInfos struct {
FlavorInfo []*FlavorInfo `json:"flavor_info"`
}

type FlavorInfo struct {
Id int `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
}

type SpecialPools struct {
Pools []*SpecialPool `json:"pools"`
}
type SpecialPool struct {
Org string `json:"org"`
Type string `json:"type"`
IsExclusive bool `json:"isExclusive"`
Pool []*GpuInfo `json:"pool"`
JobType []string `json:"jobType"`
ResourceSpec []*ResourceSpec `json:"resourceSpecs"`
Flavor []*FlavorInfo `json:"flavor"`
}

type ImageInfosModelArts struct {
ImageInfo []*ImageInfoModelArts `json:"image_info"`
}

type ImageInfoModelArts struct {
Id string `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
Org string `json:"org"`
Type string `json:"type"`
IsExclusive bool `json:"isExclusive"`
Pool []*GpuInfo `json:"pool"`
JobType []string `json:"jobType"`
ResourceSpec []*ResourceSpec `json:"resourceSpecs"`
Flavor []*setting.FlavorInfo `json:"flavor"`
}

type PoolInfos struct {
@@ -720,6 +701,17 @@ type CreateNotebook2Params struct {
Volume VolumeReq `json:"volume"`
}

type CreateNotebookWithoutPoolParams struct {
JobName string `json:"name"`
Description string `json:"description"`
Duration int64 `json:"duration"` //ms
Feature string `json:"feature"`
Flavor string `json:"flavor"`
ImageID string `json:"image_id"`
WorkspaceID string `json:"workspace_id"`
Volume VolumeReq `json:"volume"`
}

type VolumeReq struct {
Capacity int `json:"capacity"`
Category string `json:"category"`
@@ -943,6 +935,7 @@ type NotebookGetJobTokenResult struct {
}

type NotebookDelResult struct {
NotebookResult
InstanceID string `json:"instance_id"`
}

@@ -1434,12 +1427,6 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
)
}

if len(opts.ComputeResource) > 0 {
cond = cond.And(
builder.Eq{"cloudbrain.compute_resource": opts.ComputeResource},
)
}

if len(opts.JobTypes) > 0 {
if opts.JobTypeNot {
cond = cond.And(
@@ -1904,9 +1891,9 @@ func GetWaitingCloudbrainCount(cloudbrainType int, computeResource string, jobTy
return sess.Count(new(Cloudbrain))
}

func GetCloudbrainNotebookCountByUserID(userID int64) (int, error) {
func GetCloudbrainNotebookCountByUserID(userID int64, typeCloudbrain int) (int, error) {
count, err := x.In("status", ModelArtsCreateQueue, ModelArtsCreating, ModelArtsStarting, ModelArtsReadyToStart, ModelArtsResizing, ModelArtsStartQueuing, ModelArtsRunning, ModelArtsRestarting).
And("job_type = ? and user_id = ? and type = ?", JobTypeDebug, userID, TypeCloudBrainTwo).Count(new(Cloudbrain))
And("job_type = ? and user_id = ? and type = ?", JobTypeDebug, userID, typeCloudbrain).Count(new(Cloudbrain))
return int(count), err
}



+ 2
- 2
modules/grampus/grampus.go View File

@@ -30,8 +30,8 @@ const (

var (
poolInfos *models.PoolInfos
FlavorInfos *models.FlavorInfos
ImageInfos *models.ImageInfosModelArts
FlavorInfos *setting.StFlavorInfos
ImageInfos *setting.StImageInfosModelArts

SpecialPools *models.SpecialPools
)


+ 9
- 9
modules/modelarts/modelarts.go View File

@@ -1,6 +1,7 @@
package modelarts

import (
"code.gitea.io/gitea/modules/modelarts_cd"
"encoding/json"
"errors"
"fmt"
@@ -68,8 +69,6 @@ const (

var (
poolInfos *models.PoolInfos
FlavorInfos *models.FlavorInfos
ImageInfos *models.ImageInfosModelArts
TrainFlavorInfos *Flavor
SpecialPools *models.SpecialPools
)
@@ -747,11 +746,7 @@ func GetNotebookImageName(imageId string) (string, error) {
var validImage = false
var imageName = ""

if ImageInfos == nil {
json.Unmarshal([]byte(setting.ImageInfos), &ImageInfos)
}

for _, imageInfo := range ImageInfos.ImageInfo {
for _, imageInfo := range setting.StImageInfos.ImageInfo {
if imageInfo.Id == imageId {
validImage = true
imageName = imageInfo.Value
@@ -808,8 +803,13 @@ func HandleTrainJobInfo(task *models.Cloudbrain) error {
}

func HandleNotebookInfo(task *models.Cloudbrain) error {

result, err := GetNotebook2(task.JobID)
var result *models.GetNotebook2Result
var err error
if task.Type == models.TypeCloudBrainTwo {
result, err = GetNotebook2(task.JobID)
} else if task.Type == models.TypeCDCenter {
result, err = modelarts_cd.GetNotebook(task.JobID)
}
if err != nil {
log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err)
return err


+ 9
- 22
modules/modelarts_cd/modelarts.go View File

@@ -1,7 +1,6 @@
package modelarts_cd

import (
"encoding/json"
"errors"
"strconv"
"strings"
@@ -51,13 +50,7 @@ const (
TotalVersionCount = 1
)

var (
poolInfos *models.PoolInfos
FlavorInfos *models.FlavorInfos
ImageInfos *models.ImageInfosModelArts
TrainFlavorInfos *Flavor
SpecialPools *models.SpecialPools
)
var ()

type VersionInfo struct {
Version []struct {
@@ -96,23 +89,18 @@ type Parameters struct {
}

func GenerateNotebook(ctx *context.Context, displayJobName, jobName, uuid, description, flavor, imageId string) error {
if poolInfos == nil {
json.Unmarshal([]byte(setting.PoolInfos), &poolInfos)
}

imageName, err := GetNotebookImageName(imageId)
if err != nil {
log.Error("GetNotebookImageName failed: %v", err.Error())
return err
}
createTime := timeutil.TimeStampNow()
jobResult, err := createNotebook(models.CreateNotebook2Params{
jobResult, err := createNotebook(models.CreateNotebookWithoutPoolParams{
JobName: jobName,
Description: description,
Flavor: flavor,
Duration: autoStopDurationMs,
ImageID: imageId,
PoolID: poolInfos.PoolInfo[0].PoolId,
Feature: models.NotebookFeature,
Volume: models.VolumeReq{
Capacity: setting.Capacity,
@@ -122,14 +110,14 @@ func GenerateNotebook(ctx *context.Context, displayJobName, jobName, uuid, descr
WorkspaceID: "0",
})
if err != nil {
log.Error("createNotebook2 failed: %v", err.Error())
log.Error("createNotebook failed: %v", err.Error())
if strings.HasPrefix(err.Error(), UnknownErrorPrefix) {
log.Info("(%s)unknown error, set temp status", displayJobName)
errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
JobID: models.TempJobId,
VersionID: models.TempVersionId,
Status: models.TempJobStatus,
Type: models.TypeCloudBrainTwo,
Type: models.TypeCDCenter,
JobName: jobName,
JobType: string(models.JobTypeDebug),
})
@@ -149,7 +137,7 @@ func GenerateNotebook(ctx *context.Context, displayJobName, jobName, uuid, descr
FlavorCode: flavor,
DisplayJobName: displayJobName,
JobType: string(models.JobTypeDebug),
Type: models.TypeCloudBrainTwo,
Type: models.TypeCDCenter,
Uuid: uuid,
ComputeResource: models.NPUResource,
Image: imageName,
@@ -172,11 +160,7 @@ func GetNotebookImageName(imageId string) (string, error) {
var validImage = false
var imageName = ""

if ImageInfos == nil {
json.Unmarshal([]byte(setting.ImageInfos), &ImageInfos)
}

for _, imageInfo := range ImageInfos.ImageInfo {
for _, imageInfo := range setting.StImageInfos.ImageInfo {
if imageInfo.Id == imageId {
validImage = true
imageName = imageInfo.Value
@@ -191,6 +175,7 @@ func GetNotebookImageName(imageId string) (string, error) {
return imageName, nil
}

/*
func HandleNotebookInfo(task *models.Cloudbrain) error {

result, err := GetNotebook(task.JobID)
@@ -225,3 +210,5 @@ func HandleNotebookInfo(task *models.Cloudbrain) error {

return nil
}

*/

+ 90
- 116
modules/modelarts_cd/resty.go View File

@@ -23,9 +23,6 @@ var (
)

const (
methodPassword = "password"

urlGetToken = "/v3/auth/tokens"
errorCodeExceedLimit = "ModelArts.0118"

//notebook 2.0
@@ -50,196 +47,173 @@ func getHttpClient() *http.Client {
}

func GetNotebook(jobID string) (*models.GetNotebook2Result, error) {
client := getHttpClient()
var result models.GetNotebook2Result

retry := 0
client := getHttpClient()
s := core.Signer{
Key: setting.ModelartsCD.AccessKey,
Secret: setting.ModelartsCD.SecretKey,
}
r, _ := http.NewRequest(http.MethodGet,
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID,
nil)

sendjob:
res, err := client.R().
SetHeader("Content-Type", "application/json").
SetAuthToken(TOKEN).
SetResult(&result).
Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID)
r.Header.Add("content-type", "application/json")
s.Sign(r)

resp, err := client.Do(r)
if err != nil {
return nil, fmt.Errorf("resty GetJob: %v", err)
log.Error("client.Do failed: %s", err.Error())
return &result, fmt.Errorf("client.Do failed: %s", err.Error())
}

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
goto sendjob
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error("ioutil.ReadAll failed: %s", err.Error())
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error())
}

var response models.NotebookResult
err = json.Unmarshal(res.Body(), &response)
err = json.Unmarshal(body, &result)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}

if len(response.ErrorCode) != 0 {
log.Error("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
if response.ErrorCode == modelartsIllegalToken && retry < 1 {
retry++
_ = getToken()
goto sendjob
}
return &result, fmt.Errorf("GetJob failed(%s): %s", response.ErrorCode, response.ErrorMsg)
if len(result.ErrorCode) != 0 {
log.Error("GetNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("GetNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

func ManageNotebook(jobID string, param models.NotebookAction) (*models.NotebookActionResult, error) {
client := getHttpClient()
var result models.NotebookActionResult

retry := 0
client := getHttpClient()
s := core.Signer{
Key: setting.ModelartsCD.AccessKey,
Secret: setting.ModelartsCD.SecretKey,
}
r, _ := http.NewRequest(http.MethodPost,
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID+"/"+param.Action+"?duration="+strconv.Itoa(autoStopDurationMs),
nil)

sendjob:
res, err := client.R().
SetHeader("Content-Type", "application/json").
SetAuthToken(TOKEN).
SetResult(&result).
Post(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID + "/" + param.Action + "?duration=" + strconv.Itoa(autoStopDurationMs))
r.Header.Add("content-type", "application/json")
s.Sign(r)

resp, err := client.Do(r)
if err != nil {
return &result, fmt.Errorf("resty ManageNotebook2: %v", err)
log.Error("client.Do failed: %s", err.Error())
return &result, fmt.Errorf("client.Do failed: %s", err.Error())
}

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
goto sendjob
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error("ioutil.ReadAll failed: %s", err.Error())
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error())
}

var response models.NotebookResult
err = json.Unmarshal(res.Body(), &response)
err = json.Unmarshal(body, &result)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}

if res.StatusCode() == http.StatusBadGateway {
return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
}

if len(response.ErrorCode) != 0 {
log.Error("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
if response.ErrorCode == modelartsIllegalToken && retry < 1 {
retry++
_ = getToken()
goto sendjob
}
return &result, fmt.Errorf("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
if len(result.ErrorCode) != 0 {
log.Error("ManageNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("ManageNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

func DelNotebook(jobID string) (*models.NotebookDelResult, error) {
client := getHttpClient()
var result models.NotebookDelResult

retry := 0
client := getHttpClient()
s := core.Signer{
Key: setting.ModelartsCD.AccessKey,
Secret: setting.ModelartsCD.SecretKey,
}

sendjob:
res, err := client.R().
SetHeader("Content-Type", "application/json").
SetAuthToken(TOKEN).
SetResult(&result).
Delete(HOST + "/v1/" + setting.ProjectID + urlNotebook2 + "/" + jobID)
r, _ := http.NewRequest(http.MethodDelete,
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2+"/"+jobID,
nil)
r.Header.Add("content-type", "application/json")
s.Sign(r)

resp, err := client.Do(r)
if err != nil {
return &result, fmt.Errorf("resty DelJob: %v", err)
log.Error("client.Do failed: %s", err.Error())
return &result, fmt.Errorf("client.Do failed: %s", err.Error())
}

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
goto sendjob
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error("ioutil.ReadAll failed: %s", err.Error())
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error())
}

var response models.NotebookResult
err = json.Unmarshal(res.Body(), &response)
err = json.Unmarshal(body, &result)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}

if len(response.ErrorCode) != 0 {
log.Error("DelNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
if response.ErrorCode == modelartsIllegalToken && retry < 1 {
retry++
_ = getToken()
goto sendjob
}
return &result, fmt.Errorf("DelNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
if len(result.ErrorCode) != 0 {
log.Error("DelNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("DelNotebook2 failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil
}

func createNotebook(createJobParams models.CreateNotebook2Params) (*models.CreateNotebookResult, error) {
client := getHttpClient()
func createNotebook(createJobParams models.CreateNotebookWithoutPoolParams) (*models.CreateNotebookResult, error) {
var result models.CreateNotebookResult

retry := 0

client := getHttpClient()
s := core.Signer{
Key: "",
Secret: "",
Key: setting.ModelartsCD.AccessKey,
Secret: setting.ModelartsCD.SecretKey,
}

r, _ := http.NewRequest(http.MethodPost, "", ioutil.NopCloser(bytes.NewBuffer([]byte(""))))
req, _ := json.Marshal(createJobParams)
r, _ := http.NewRequest(http.MethodPost,
setting.ModelartsCD.EndPoint+"/v1/"+setting.ModelartsCD.ProjectID+urlNotebook2,
ioutil.NopCloser(bytes.NewBuffer(req)))

r.Header.Add("content-type", "application/json")
s.Sign(r)

resp, err := http.DefaultClient.Do(r)
body, err := ioutil.ReadAll(resp.Body)

sendjob:
res, err := client.
SetHeader("Content-Type", "application/json").
SetAuthToken(TOKEN).
SetBody(createJobParams).
SetResult(&result).
Post(HOST + "/v1/" + setting.ProjectID + urlNotebook2)

resp, err := client.Do(r)
if err != nil {
return nil, fmt.Errorf("resty create notebook2: %s", err)
log.Error("client.Do failed: %s", err.Error())
return &result, fmt.Errorf("client.Do failed: %s", err.Error())
}

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
goto sendjob
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error("ioutil.ReadAll failed: %s", err.Error())
return &result, fmt.Errorf("ioutil.ReadAll failed: %s", err.Error())
}

var response models.NotebookResult
err = json.Unmarshal(res.Body(), &response)
err = json.Unmarshal(body, &result)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("json.Unmarshal failed: %s", err.Error())
}

if res.StatusCode() == http.StatusBadGateway {
return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
}

if len(response.ErrorCode) != 0 {
log.Error("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
if response.ErrorCode == errorCodeExceedLimit {
response.ErrorMsg = "所选规格使用数量已超过最大配额限制。"
}
if response.ErrorCode == modelartsIllegalToken && retry < 1 {
retry++
_ = getToken()
goto sendjob
if len(result.ErrorCode) != 0 {
log.Error("createNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
if result.ErrorCode == errorCodeExceedLimit {
result.ErrorMsg = "所选规格使用数量已超过最大配额限制。"
}
return &result, fmt.Errorf("createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
return &result, fmt.Errorf("createNotebook failed(%s): %s", result.ErrorCode, result.ErrorMsg)
}

return &result, nil


+ 69
- 19
modules/setting/setting.go View File

@@ -75,6 +75,26 @@ type C2NetSqInfos struct {
C2NetSqInfo []*C2NetSequenceInfo `json:"sequence"`
}

type StFlavorInfos struct {
FlavorInfo []*FlavorInfo `json:"flavor_info"`
}

type FlavorInfo struct {
Id int `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
}

type StImageInfosModelArts struct {
ImageInfo []*ImageInfoModelArts `json:"image_info"`
}

type ImageInfoModelArts struct {
Id string `json:"id"`
Value string `json:"value"`
Desc string `json:"desc"`
}

var (
// AppVer settings
AppVer string
@@ -535,33 +555,29 @@ var (
AllowedOrg string
ProfileID string
PoolInfos string
Flavor string
FlavorInfos string
DebugHost string
ImageInfos string
Capacity int
MaxTempQueryTimes int
StFlavorInfo *StFlavorInfos
StImageInfos *StImageInfosModelArts
//train-job
ResourcePools string
Engines string
EngineVersions string
FlavorInfos string
TrainJobFLAVORINFOS string
ModelArtsSpecialPools string

// modelarts-cd config
ModelartsCD = struct {
ModelArtsHost string
IamHost string
ProjectID string
ProjectName string
ModelArtsUsername string
ModelArtsPassword string
ModelArtsDomain string
AllowedOrg string
ProfileID string
PoolInfos string
Flavor string
DebugHost string
Enabled bool
EndPoint string
ProjectID string
AccessKey string
SecretKey string
ImageInfos string
FlavorInfos string
}{}

//grampus config
@@ -1438,9 +1454,8 @@ func NewContext() {
AllowedOrg = sec.Key("ORGANIZATION").MustString("")
ProfileID = sec.Key("PROFILE_ID").MustString("")
PoolInfos = sec.Key("POOL_INFOS").MustString("")
Flavor = sec.Key("FLAVOR").MustString("")
ImageInfos = sec.Key("IMAGE_INFOS").MustString("")
Capacity = sec.Key("IMAGE_INFOS").MustInt(100)
Capacity = sec.Key("CAPACITY").MustInt(100)
MaxTempQueryTimes = sec.Key("MAX_TEMP_QUERY_TIMES").MustInt(30)
ResourcePools = sec.Key("Resource_Pools").MustString("")
Engines = sec.Key("Engines").MustString("")
@@ -1488,8 +1503,8 @@ func NewContext() {
Course.OrgName = sec.Key("org_name").MustString("")
Course.TeamName = sec.Key("team_name").MustString("")

GetGrampusConfig()
getGrampusConfig()
getModelartsCDConfig()
getModelConvertConfig()
}

@@ -1512,7 +1527,22 @@ func getModelConvertConfig() {
ModelConvert.NPU_TENSORFLOW_IMAGE_ID = sec.Key("NPU_TENSORFLOW_IMAGE_ID").MustInt(35)
}

func GetGrampusConfig() {
func getModelartsCDConfig() {
sec := Cfg.Section("modelarts-cd")

ModelartsCD.Enabled = sec.Key("ENABLED").MustBool(false)
ModelartsCD.EndPoint = sec.Key("ENDPOINT").MustString("https://modelarts.cn-southwest-228.cdzs.cn")
ModelartsCD.ProjectID = sec.Key("PROJECT_ID").MustString("")
ModelartsCD.AccessKey = sec.Key("ACCESS_KEY").MustString("")
ModelartsCD.SecretKey = sec.Key("SECRET_KEY").MustString("")
ModelartsCD.ImageInfos = sec.Key("IMAGE_INFOS").MustString("")
ModelartsCD.FlavorInfos = sec.Key("FLAVOR_INFOS").MustString("")

getNotebookImageInfos()
getNotebookFlavorInfos()
}

func getGrampusConfig() {
sec := Cfg.Section("grampus")

Grampus.Env = sec.Key("ENV").MustString("TEST")
@@ -1646,6 +1676,26 @@ func ensureLFSDirectory() {
}
}

func getNotebookImageInfos() {
if StImageInfos == nil {
if ModelartsCD.Enabled {
json.Unmarshal([]byte(ModelartsCD.ImageInfos), &StImageInfos)
} else {
json.Unmarshal([]byte(ImageInfos), &StImageInfos)
}
}
}

func getNotebookFlavorInfos() {
if StFlavorInfo == nil {
if ModelartsCD.Enabled {
json.Unmarshal([]byte(ModelartsCD.FlavorInfos), &StFlavorInfo)
} else {
json.Unmarshal([]byte(FlavorInfos), &StFlavorInfo)
}
}
}

// NewServices initializes the services
func NewServices() {
InitDBConfig()


+ 59
- 110
routers/repo/modelarts.go View File

@@ -2,6 +2,7 @@ package repo

import (
"archive/zip"
"code.gitea.io/gitea/modules/modelarts_cd"
"encoding/json"
"errors"
"fmt"
@@ -60,18 +61,11 @@ func DebugJobIndex(ctx *context.Context) {
if page <= 0 {
page = 1
}
typeCloudBrain := models.TypeCloudBrainAll
jobTypeNot := false
if listType == models.GPUResource {
typeCloudBrain = models.TypeCloudBrainOne
} else if listType == models.NPUResource {
typeCloudBrain = models.TypeCloudBrainTwo
} else if listType == models.AllResource {
typeCloudBrain = models.TypeCloudBrainAll
} else {
log.Error("listType(%s) error", listType)
ctx.ServerError("listType error", errors.New("listType error"))
return
var computeResource string
if listType != models.AllResource {
computeResource = listType
}

var jobTypes []string
@@ -81,10 +75,11 @@ func DebugJobIndex(ctx *context.Context) {
Page: page,
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
Type: typeCloudBrain,
JobTypeNot: jobTypeNot,
JobTypes: jobTypes,
RepoID: repo.ID,
ComputeResource: computeResource,
Type: models.TypeCloudBrainAll,
JobTypeNot: jobTypeNot,
JobTypes: jobTypes,
})
if err != nil {
ctx.ServerError("Get debugjob faild:", err)
@@ -134,16 +129,8 @@ func notebookNewDataPrepare(ctx *context.Context) error {
return err
}
ctx.Data["attachments"] = attachs

if modelarts.ImageInfos == nil {
json.Unmarshal([]byte(setting.ImageInfos), &modelarts.ImageInfos)
}
ctx.Data["images"] = modelarts.ImageInfos.ImageInfo

if modelarts.FlavorInfos == nil {
json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
}
ctx.Data["flavors"] = modelarts.FlavorInfos.FlavorInfo
ctx.Data["images"] = setting.StImageInfos.ImageInfo
ctx.Data["flavors"] = setting.StFlavorInfo.FlavorInfo
setSpecBySpecialPoolConfig(ctx, string(models.JobTypeDebug))

ctx.Data["datasetType"] = models.TypeCloudBrainTwo
@@ -154,50 +141,6 @@ func notebookNewDataPrepare(ctx *context.Context) error {
return nil
}

func NotebookCreate(ctx *context.Context, form auth.CreateModelArtsNotebookForm) {
ctx.Data["PageIsNotebook"] = true
jobName := form.JobName
uuid := form.Attachment
description := form.Description
flavor := form.Flavor

count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form)
return
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("you have already a running or waiting task, can not create more", tplModelArtsNotebookNew, &form)
return
}
}
_, err = models.GetCloudbrainByName(jobName)
if err == nil {
log.Error("the job name did already exist", ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("the job name did already exist", tplModelArtsNotebookNew, &form)
return
} else {
if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"])
cloudBrainNewDataPrepare(ctx)
ctx.RenderWithErr("system error", tplModelArtsNotebookNew, &form)
return
}
}

err = modelarts.GenerateTask(ctx, jobName, uuid, description, flavor)
if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookNew, &form)
return
}
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/debugjob?debugListType=all")
}

func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm) {
ctx.Data["PageIsNotebook"] = true
displayJobName := form.DisplayJobName
@@ -208,7 +151,12 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm
imageId := form.ImageId
repo := ctx.Repo.Repository

count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
typeCloudbrain := models.TypeCloudBrainTwo
if setting.ModelartsCD.Enabled {
typeCloudbrain = models.TypeCDCenter
}

count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID, typeCloudbrain)
if err != nil {
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
notebookNewDataPrepare(ctx)
@@ -247,7 +195,12 @@ func Notebook2Create(ctx *context.Context, form auth.CreateModelArtsNotebookForm
return
}

err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, flavor, imageId)
if setting.ModelartsCD.Enabled {
err = modelarts_cd.GenerateNotebook(ctx, displayJobName, jobName, uuid, description, flavor, imageId)
} else {
err = modelarts.GenerateNotebook2(ctx, displayJobName, jobName, uuid, description, flavor, imageId)
}

if err != nil {
log.Error("GenerateNotebook2 failed, %v", err, ctx.Data["MsgID"])
notebookNewDataPrepare(ctx)
@@ -292,14 +245,11 @@ func NotebookShow(ctx *context.Context) {
if err == nil {
task.User = user
}
if modelarts.FlavorInfos == nil {
json.Unmarshal([]byte(setting.FlavorInfos), &modelarts.FlavorInfos)
}

findSpec := false
if modelarts.FlavorInfos != nil {
ctx.Data["resource_spec"] = modelarts.FlavorInfos.FlavorInfo[0].Desc
for _, f := range modelarts.FlavorInfos.FlavorInfo {
if setting.StFlavorInfo != nil {
ctx.Data["resource_spec"] = setting.StFlavorInfo.FlavorInfo[0].Desc
for _, f := range setting.StFlavorInfo.FlavorInfo {
if fmt.Sprint(f.Value) == task.FlavorCode {
ctx.Data["resource_spec"] = f.Desc
findSpec = true
@@ -378,36 +328,16 @@ func setShowSpecBySpecialPoolConfig(ctx *context.Context, findSpec bool, task *m
}
}

func NotebookDebug(ctx *context.Context) {
var jobID = ctx.Params(":jobid")

result, err := modelarts.GetJob(jobID)
if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
return
}

res, err := modelarts.GetJobToken(jobID)
if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
return
}

urls := strings.Split(result.Spec.Annotations.Url, "/")
urlPrefix := result.Spec.Annotations.TargetDomain
for i, url := range urls {
if i > 2 {
urlPrefix += "/" + url
}
}

debugUrl := urlPrefix + "?token=" + res.Token
ctx.Redirect(debugUrl)
}

func NotebookDebug2(ctx *context.Context) {
var err error
var result *models.GetNotebook2Result
task := ctx.Cloudbrain
result, err := modelarts.GetNotebook2(task.JobID)
if task.Type == models.TypeCloudBrainTwo {
result, err = modelarts.GetNotebook2(task.JobID)
} else if task.Type == models.TypeCDCenter {
result, err = modelarts_cd.GetNotebook(task.JobID)
}

if err != nil {
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookIndex, nil)
return
@@ -435,7 +365,7 @@ func NotebookRestart(ctx *context.Context) {
break
}

count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID, task.Type)
if err != nil {
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
errorMsg = "system error"
@@ -453,7 +383,13 @@ func NotebookRestart(ctx *context.Context) {
Action: models.ActionStart,
}

res, err := modelarts.ManageNotebook2(task.JobID, param)
var res *models.NotebookActionResult
if task.Type == models.TypeCloudBrainTwo {
res, err = modelarts.ManageNotebook2(task.JobID, param)
} else if task.Type == models.TypeCDCenter {
res, err = modelarts_cd.ManageNotebook(task.JobID, param)
}

if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["MsgID"])
/* 暂不处理再次调试502的场景,详情见方案
@@ -537,7 +473,14 @@ func NotebookStop(ctx *context.Context) {
Action: models.ActionStop,
}

res, err := modelarts.ManageNotebook2(task.JobID, param)
var err error
var res *models.NotebookActionResult
if task.Type == models.TypeCloudBrainTwo {
res, err = modelarts.ManageNotebook2(task.JobID, param)
} else if task.Type == models.TypeCDCenter {
res, err = modelarts_cd.ManageNotebook(task.JobID, param)
}

if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
@@ -587,7 +530,13 @@ func NotebookDel(ctx *context.Context) {
return
}

_, err := modelarts.DelNotebook2(task.JobID)
var err error
if task.Type == models.TypeCloudBrainTwo {
_, err = modelarts.DelNotebook2(task.JobID)
} else if task.Type == models.TypeCDCenter {
_, err = modelarts_cd.DelNotebook(task.JobID)
}

if err != nil {
log.Error("DelNotebook2(%s) failed:%v", task.JobName, err.Error())
if strings.Contains(err.Error(), modelarts.NotebookNotFound) || strings.Contains(err.Error(), modelarts.NotebookNoPermission) || strings.Contains(err.Error(), modelarts.NotebookInvalid) {
@@ -2187,7 +2136,7 @@ func checkModelArtsSpecialPool(ctx *context.Context, flavorCode string, jobType
if !isMatchPool {
isMatchSpec := false
if jobType == string(models.JobTypeDebug) {
for _, flavor := range modelarts.FlavorInfos.FlavorInfo {
for _, flavor := range setting.StFlavorInfo.FlavorInfo {
if flavor.Value == flavorCode {
isMatchSpec = true
break


Loading…
Cancel
Save