Browse Source

sync modelarts

opt-modelarts
lewis 3 years ago
parent
commit
79cc83e9c9
10 changed files with 921 additions and 633 deletions
  1. +66
    -6
      models/cloudbrain.go
  2. +57
    -0
      models/cloudbrain_temp.go
  3. +4
    -4
      modules/cloudbrain/cloudbrain.go
  4. +476
    -240
      modules/modelarts/modelarts.go
  5. +171
    -21
      modules/modelarts/resty.go
  6. +4
    -123
      routers/api/v1/repo/modelarts.go
  7. +6
    -52
      routers/repo/cloudbrain.go
  8. +2
    -2
      routers/repo/grampus.go
  9. +133
    -184
      routers/repo/modelarts.go
  10. +2
    -1
      routers/routes/routes.go

+ 66
- 6
models/cloudbrain.go View File

@@ -31,9 +31,11 @@ const (
)

const (
NPUResource = "NPU"
GPUResource = "CPU/GPU"
AllResource = "all"
TempJobIdPrefix = "TEMP"
JobStatusTemp = "TEMP"
NPUResource = "NPU"
GPUResource = "CPU/GPU"
AllResource = "all"

//notebook storage category
EVSCategory = "EVS"
@@ -353,6 +355,7 @@ type CloudbrainsOptions struct {
RepoID int64 // include all repos if empty
UserID int64
JobID string
JobName string
SortType string
CloudbrainIDs []int64
JobStatus []string
@@ -1256,6 +1259,52 @@ type LogFile struct {
Name string
}

type JobList struct {
JobName string `json:"job_name"`
JobID int64 `json:"job_id"`
VersionID int64 `json:"version_id"`
VersionCount int64 `json:"version_count"`
Description string `json:"job_desc"`
IntStatus int `json:"status"`
}

type GetTrainJobListResult struct {
ErrorResult
JobTotalCount int `json:"job_total_count"` //查询到的用户创建作业总数
JobCountLimit int `json:"job_count_limit"` //用户还可以创建训练作业的数量
Quotas int `json:"quotas"` //训练作业的运行数量上限
JobList []JobList `json:"jobs"`
}

type JobVersionList struct {
VersionName string `json:"version_name"`
VersionID int64 `json:"version_id"`
IntStatus int `json:"status"`
}

type GetTrainJobVersionListResult struct {
ErrorResult
JobID int64 `json:"job_id"`
JobName string `json:"job_name"`
JobDesc string `json:"job_desc"`
VersionCount int64 `json:"version_count"`
JobVersionList []JobVersionList `json:"versions"`
}

type NotebookList struct {
JobName string `json:"name"`
JobID string `json:"id"`
Status string `json:"status"`
}

type GetNotebookListResult struct {
TotalCount int64 `json:"total"` //总的记录数量
CurrentPage int `json:"current"` //当前页数
TotalPages int `json:"pages"` //总的页数
Size int `json:"size"` //每一页的数量
NotebookList []NotebookList `json:"data"`
}

//Grampus
type GrampusResult struct {
ErrorCode int `json:"errorCode"`
@@ -1568,6 +1617,12 @@ func CloudbrainsVersionList(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int, e
)
}

if (opts.JobName) != "" {
cond = cond.And(
builder.Eq{"cloudbrain.job_name": opts.JobName},
)
}

if len(opts.JobTypes) > 0 {
cond = cond.And(
builder.In("cloudbrain.job_type", opts.JobTypes),
@@ -1701,9 +1756,9 @@ func SetTrainJobStatusByJobID(jobID string, status string, duration int64, train
return
}

func SetVersionCountAndLatestVersion(jobID string, versionName string, versionCount int, isLatestVersion string, totalVersionCount int) (err error) {
cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion, TotalVersionCount: totalVersionCount}
_, err = x.Cols("version_Count", "is_latest_version", "total_version_count").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb)
func SetVersionCountAndLatestVersion(jobName string, versionName string, versionCount int, isLatestVersion string, totalVersionCount int) (err error) {
cb := &Cloudbrain{JobName: jobName, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion, TotalVersionCount: totalVersionCount}
_, err = x.Cols("version_Count", "is_latest_version", "total_version_count").Where("cloudbrain.job_name=? AND cloudbrain.version_name=?", jobName, versionName).Update(cb)
return
}

@@ -2123,3 +2178,8 @@ func GetCloudbrainByIDs(ids []int64) ([]*Cloudbrain, error) {
In("id", ids).
Find(&cloudbrains)
}

func GetCloudbrainCountByJobName(jobName, jobType string) (int, error) {
count, err := x.Where("job_name = ? and job_type= ?", jobName, jobType).Count(new(Cloudbrain))
return int(count), err
}

+ 57
- 0
models/cloudbrain_temp.go View File

@@ -0,0 +1,57 @@
package models

import (
"time"

"code.gitea.io/gitea/modules/timeutil"
)

const (
//TempJobIdPrefix = "TEMP"

)

type CloudbrainTemp struct {
CloudbrainID int64 `xorm:"pk"`
JobName string
Type int
JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"`
Status string `xorm:"INDEX NOT NULL DEFAULT 'TEMP'"`
VersionCount int `xorm:"NOT NULL DEFAULT 0"`
QueryTimes int `xorm:"INDEX NOT NULL DEFAULT 0"`
CreatedUnix timeutil.TimeStamp `xorm:"INDEX"`
UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
DeletedAt time.Time `xorm:"deleted"`
}

func InsertCloudbrainTemp(temp *CloudbrainTemp) (err error) {
if _, err = x.Insert(temp); err != nil {
return err
}

return nil
}

func getCloudBrainTemp(temp *CloudbrainTemp) (*CloudbrainTemp, error) {
has, err := x.Get(temp)
if err != nil {
return nil, err
} else if !has {
return nil, ErrJobNotExist{}
}
return temp, nil
}

func GetCloudbrainTempByCloudbrainID(id int64) (*CloudbrainTemp, error) {
temp := &CloudbrainTemp{CloudbrainID: id}
return getCloudBrainTemp(temp)
}

func DeleteCloudbrainTemp(temp *CloudbrainTemp) error {
return deleteCloudbrainTemp(x, temp)
}

func deleteCloudbrainTemp(e Engine, temp *CloudbrainTemp) error {
_, err := e.Where("cloudbrain_id = ?", temp.CloudbrainID).Delete(temp)
return err
}

+ 4
- 4
modules/cloudbrain/cloudbrain.go View File

@@ -142,8 +142,8 @@ func isAdminOrImageCreater(ctx *context.Context, image *models.Image, err error)

func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) {

var ID = ctx.Params(":id")
job, err := models.GetCloudbrainByID(ID)
var id = ctx.Params(":id")
job, err := models.GetCloudbrainByID(id)
if err != nil {
log.Error("GetCloudbrainByID failed:%v", err.Error())
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
@@ -158,8 +158,8 @@ func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) {

func AdminOrJobCreaterRight(ctx *context.Context) {

var ID = ctx.Params(":id")
job, err := models.GetCloudbrainByID(ID)
var id = ctx.Params(":id")
job, err := models.GetCloudbrainByID(id)
if err != nil {
log.Error("GetCloudbrainByID failed:%v", err.Error())
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)


+ 476
- 240
modules/modelarts/modelarts.go View File

@@ -4,8 +4,11 @@ import (
"encoding/json"
"errors"
"fmt"
"math/rand"
"path"
"strconv"
"strings"
"time"

"code.gitea.io/gitea/modules/timeutil"

@@ -59,7 +62,7 @@ const (
PerPage = 10
IsLatestVersion = "1"
NotLatestVersion = "0"
VersionCount = 1
VersionCountOne = 1

SortByCreateTime = "create_time"
ConfigTypeCustom = "custom"
@@ -264,31 +267,13 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc
log.Error("GetNotebookImageName failed: %v", err.Error())
return err
}

createTime := timeutil.TimeStampNow()
jobResult, err := createNotebook2(models.CreateNotebook2Params{
JobName: jobName,
Description: description,
Flavor: flavor,
Duration: autoStopDurationMs,
ImageID: imageId,
PoolID: poolInfos.PoolInfo[0].PoolId,
Feature: models.NotebookFeature,
Volume: models.VolumeReq{
Capacity: setting.Capacity,
Category: models.EVSCategory,
Ownership: models.ManagedOwnership,
},
WorkspaceID: "0",
})
if err != nil {
log.Error("createNotebook2 failed: %v", err.Error())
return err
}
err = models.CreateCloudbrain(&models.Cloudbrain{
Status: jobResult.Status,
task := &models.Cloudbrain{
Status: string(models.ModelArtsTrainJobWaiting),
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
JobID: jobResult.ID,
JobID: models.TempJobIdPrefix + jobName + strconv.Itoa(int(rand.New(rand.NewSource(time.Now().UnixNano())).Int31n(100000))),
JobName: jobName,
FlavorCode: flavor,
DisplayJobName: displayJobName,
@@ -300,16 +285,66 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc
Description: description,
CreatedUnix: createTime,
UpdatedUnix: createTime,
})
}

err = models.CreateCloudbrain(task)
if err != nil {
log.Error("CreateCloudbrain(%s) failed:%v", displayJobName, err.Error())
return err
}
task, err := models.GetCloudbrainByName(jobName)

jobResult, err := createNotebook2(models.CreateNotebook2Params{
JobName: jobName,
Description: description,
Flavor: flavor,
Duration: autoStopDurationMs,
ImageID: imageId,
PoolID: poolInfos.PoolInfo[0].PoolId,
Feature: models.NotebookFeature,
Volume: models.VolumeReq{
Capacity: setting.Capacity,
Category: models.EVSCategory,
Ownership: models.ManagedOwnership,
},
WorkspaceID: "0",
})
if err != nil {
log.Error("GetCloudbrainByName failed: %v", err.Error())
return err
log.Error("createNotebook2 failed: %v", err.Error())
if strings.HasPrefix(err.Error(), UnknownErrorPrefix) {
log.Info("(%s)unknown error, set temp status", displayJobName)
errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
CloudbrainID: task.ID,
Status: models.JobStatusTemp,
Type: task.Type,
JobName: task.JobName,
JobType: task.JobType,
})
if errTemp != nil {
log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
return errTemp
}
} else {
task.Status = string(models.ModelArtsCreateFailed)
errTemp := models.UpdateJob(task)
if errTemp != nil {
log.Error("UpdateJob failed: %v", errTemp.Error())
}
errTemp = models.DeleteJob(task)
if errTemp != nil {
log.Error("DeleteJob failed: %v", errTemp.Error())
}
return err
}
} else {
task.Status = jobResult.Status
task.JobID = jobResult.ID
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob failed: %v", err.Error())
return err
}
}

stringId := strconv.FormatInt(task.ID, 10)
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask)
return nil
@@ -317,66 +352,15 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc

func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) {
createTime := timeutil.TimeStampNow()
var jobResult *models.CreateTrainJobResult
var createErr error
if req.EngineID < 0 {
jobResult, createErr = createTrainJobUserImage(models.CreateUserImageTrainJobParams{
JobName: req.JobName,
Description: req.Description,
Config: models.UserImageConfig{
WorkServerNum: req.WorkServerNumber,
AppUrl: req.CodeObsPath,
BootFileUrl: req.BootFileUrl,
DataUrl: req.DataUrl,
TrainUrl: req.TrainUrl,
LogUrl: req.LogUrl,
PoolID: req.PoolID,
CreateVersion: true,
Flavor: models.Flavor{
Code: req.FlavorCode,
},
Parameter: req.Parameters,
UserImageUrl: req.UserImageUrl,
UserCommand: req.UserCommand,
},
})
} else {
jobResult, createErr = createTrainJob(models.CreateTrainJobParams{
JobName: req.JobName,
Description: req.Description,
Config: models.Config{
WorkServerNum: req.WorkServerNumber,
AppUrl: req.CodeObsPath,
BootFileUrl: req.BootFileUrl,
DataUrl: req.DataUrl,
EngineID: req.EngineID,
TrainUrl: req.TrainUrl,
LogUrl: req.LogUrl,
PoolID: req.PoolID,
CreateVersion: true,
Flavor: models.Flavor{
Code: req.FlavorCode,
},
Parameter: req.Parameters,
},
})
}
if createErr != nil {
log.Error("CreateJob failed: %v", createErr.Error())
return createErr
}
jobId := strconv.FormatInt(jobResult.JobID, 10)
createErr = models.CreateCloudbrain(&models.Cloudbrain{
Status: TransTrainJobStatus(jobResult.Status),
task := &models.Cloudbrain{
Status: string(models.ModelArtsTrainJobWaiting),
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
JobID: jobId,
JobID: models.TempJobIdPrefix + req.JobName + strconv.Itoa(int(rand.New(rand.NewSource(time.Now().UnixNano())).Int31n(100000))),
JobName: req.JobName,
DisplayJobName: req.DisplayJobName,
JobType: string(models.JobTypeTrain),
Type: models.TypeCloudBrainTwo,
VersionID: jobResult.VersionID,
VersionName: jobResult.VersionName,
Uuid: req.Uuid,
DatasetName: req.DatasetName,
CommitID: req.CommitID,
@@ -398,49 +382,21 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
TotalVersionCount: req.TotalVersionCount,
CreatedUnix: createTime,
UpdatedUnix: createTime,
})

if createErr != nil {
log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, createErr.Error())
return createErr
}
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobId, req.DisplayJobName, models.ActionCreateTrainTask)
return nil
}

func GenerateModelConvertTrainJob(req *GenerateTrainJobReq) (*models.CreateTrainJobResult, error) {

return createTrainJobUserImage(models.CreateUserImageTrainJobParams{
JobName: req.JobName,
Description: req.Description,
Config: models.UserImageConfig{
WorkServerNum: req.WorkServerNumber,
AppUrl: req.CodeObsPath,
BootFileUrl: req.BootFileUrl,
DataUrl: req.DataUrl,
TrainUrl: req.TrainUrl,
LogUrl: req.LogUrl,
PoolID: req.PoolID,
CreateVersion: true,
Flavor: models.Flavor{
Code: req.FlavorCode,
},
Parameter: req.Parameters,
UserImageUrl: req.UserImageUrl,
UserCommand: req.UserCommand,
},
})
}
err = models.CreateCloudbrain(task)
if err != nil {
log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error())
return err
}

func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) {
createTime := timeutil.TimeStampNow()
var jobResult *models.CreateTrainJobResult
var createErr error
log.Info(" req.EngineID =" + fmt.Sprint(req.EngineID))

if req.EngineID < 0 {
jobResult, createErr = createTrainJobVersionUserImage(models.CreateTrainJobVersionUserImageParams{
jobResult, createErr = createTrainJobUserImage(models.CreateUserImageTrainJobParams{
JobName: req.JobName,
Description: req.Description,
Config: models.TrainJobVersionUserImageConfig{
Config: models.UserImageConfig{
WorkServerNum: req.WorkServerNumber,
AppUrl: req.CodeObsPath,
BootFileUrl: req.BootFileUrl,
@@ -448,19 +404,20 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
TrainUrl: req.TrainUrl,
LogUrl: req.LogUrl,
PoolID: req.PoolID,
CreateVersion: true,
Flavor: models.Flavor{
Code: req.FlavorCode,
},
Parameter: req.Parameters,
PreVersionId: req.PreVersionId,
UserImageUrl: req.UserImageUrl,
UserCommand: req.UserCommand,
},
}, jobId)
})
} else {
jobResult, createErr = createTrainJobVersion(models.CreateTrainJobVersionParams{
jobResult, createErr = createTrainJob(models.CreateTrainJobParams{
JobName: req.JobName,
Description: req.Description,
Config: models.TrainJobVersionConfig{
Config: models.Config{
WorkServerNum: req.WorkServerNumber,
AppUrl: req.CodeObsPath,
BootFileUrl: req.BootFileUrl,
@@ -469,87 +426,60 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job
TrainUrl: req.TrainUrl,
LogUrl: req.LogUrl,
PoolID: req.PoolID,
CreateVersion: true,
Flavor: models.Flavor{
Code: req.FlavorCode,
},
Parameter: req.Parameters,
PreVersionId: req.PreVersionId,
Parameter: req.Parameters,
},
}, jobId)
}
if createErr != nil {
log.Error("CreateJob failed: %v", createErr.Error())
return createErr
}

var jobTypes []string
jobTypes = append(jobTypes, string(models.JobTypeTrain))
repo := ctx.Repo.Repository
VersionTaskList, VersionListCount, createErr := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
RepoID: repo.ID,
Type: models.TypeCloudBrainTwo,
JobTypes: jobTypes,
JobID: strconv.FormatInt(jobResult.JobID, 10),
})
if createErr != nil {
ctx.ServerError("Cloudbrain", createErr)
return createErr
}
//将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount

createErr = models.CreateCloudbrain(&models.Cloudbrain{
Status: TransTrainJobStatus(jobResult.Status),
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
JobID: strconv.FormatInt(jobResult.JobID, 10),
JobName: req.JobName,
DisplayJobName: req.DisplayJobName,
JobType: string(models.JobTypeTrain),
Type: models.TypeCloudBrainTwo,
VersionID: jobResult.VersionID,
VersionName: jobResult.VersionName,
Uuid: req.Uuid,
DatasetName: req.DatasetName,
CommitID: req.CommitID,
IsLatestVersion: req.IsLatestVersion,
PreVersionName: req.PreVersionName,
ComputeResource: models.NPUResource,
EngineID: req.EngineID,
TrainUrl: req.TrainUrl,
BranchName: req.BranchName,
Parameters: req.Params,
BootFile: req.BootFile,
DataUrl: req.DataUrl,
LogUrl: req.LogUrl,
PreVersionId: req.PreVersionId,
FlavorCode: req.FlavorCode,
Description: req.Description,
WorkServerNumber: req.WorkServerNumber,
FlavorName: req.FlavorName,
EngineName: req.EngineName,
TotalVersionCount: VersionTaskList[0].TotalVersionCount + 1,
VersionCount: VersionListCount + 1,
CreatedUnix: createTime,
UpdatedUnix: createTime,
})
if createErr != nil {
log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, createErr.Error())
return createErr
})
}

//将训练任务的上一版本的isLatestVersion设置为"0"
createErr = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount)
if createErr != nil {
ctx.ServerError("Update IsLatestVersion failed", createErr)
return createErr
log.Error("createTrainJob failed: %v", createErr.Error())
if strings.HasPrefix(createErr.Error(), UnknownErrorPrefix) {
log.Info("(%s)unknown error, set temp status", req.DisplayJobName)
errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
CloudbrainID: task.ID,
Status: models.JobStatusTemp,
Type: task.Type,
JobName: task.JobName,
JobType: task.JobType,
})
if errTemp != nil {
log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
return errTemp
}
} else {
task.Status = string(models.ModelArtsTrainJobFailed)
errTemp := models.UpdateJob(task)
if errTemp != nil {
log.Error("UpdateJob failed: %v", errTemp.Error())
}
errTemp = models.DeleteJob(task)
if errTemp != nil {
log.Error("DeleteJob failed: %v", errTemp.Error())
}
return createErr
}
} else {
task.Status = TransTrainJobStatus(jobResult.Status)
task.JobID = strconv.FormatInt(jobResult.JobID, 10)
task.VersionID = jobResult.VersionID
task.VersionName = jobResult.VersionName
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob failed: %v", err.Error())
return err
}
}

return createErr
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, task.JobID, req.DisplayJobName, models.ActionCreateTrainTask)
return nil
}

func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) {
createTime := timeutil.TimeStampNow()
jobResult, err := createTrainJobUserImage(models.CreateUserImageTrainJobParams{
func GenerateModelConvertTrainJob(req *GenerateTrainJobReq) (*models.CreateTrainJobResult, error) {

return createTrainJobUserImage(models.CreateUserImageTrainJobParams{
JobName: req.JobName,
Description: req.Description,
Config: models.UserImageConfig{
@@ -569,11 +499,9 @@ func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrain
UserCommand: req.UserCommand,
},
})
if err != nil {
log.Error("CreateJob failed: %v", err.Error())
return err
}
}

func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) {
var jobTypes []string
jobTypes = append(jobTypes, string(models.JobTypeTrain))
repo := ctx.Repo.Repository
@@ -581,7 +509,7 @@ func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrain
RepoID: repo.ID,
Type: models.TypeCloudBrainTwo,
JobTypes: jobTypes,
JobID: strconv.FormatInt(jobResult.JobID, 10),
JobID: jobId,
})
if err != nil {
ctx.ServerError("Cloudbrain", err)
@@ -589,25 +517,23 @@ func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrain
}
//将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount

err = models.CreateCloudbrain(&models.Cloudbrain{
Status: TransTrainJobStatus(jobResult.Status),
createTime := timeutil.TimeStampNow()
task := &models.Cloudbrain{
Status: models.JobStatusTemp,
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
JobID: strconv.FormatInt(jobResult.JobID, 10),
JobID: jobId,
JobName: req.JobName,
DisplayJobName: req.DisplayJobName,
JobType: string(models.JobTypeTrain),
Type: models.TypeCloudBrainTwo,
VersionID: jobResult.VersionID,
VersionName: jobResult.VersionName,
Uuid: req.Uuid,
DatasetName: req.DatasetName,
CommitID: req.CommitID,
IsLatestVersion: req.IsLatestVersion,
PreVersionName: req.PreVersionName,
ComputeResource: models.NPUResource,
EngineID: MORDELART_USER_IMAGE_ENGINE_ID,
Image: req.UserImageUrl,
EngineID: req.EngineID,
TrainUrl: req.TrainUrl,
BranchName: req.BranchName,
Parameters: req.Params,
@@ -624,20 +550,103 @@ func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrain
VersionCount: VersionListCount + 1,
CreatedUnix: createTime,
UpdatedUnix: createTime,
})
}
err = models.CreateCloudbrain(task)
if err != nil {
log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error())
return err
}

//将训练任务的上一版本的isLatestVersion设置为"0"
err = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount)
err = models.SetVersionCountAndLatestVersion(req.JobName, VersionTaskList[0].VersionName, VersionListCount, NotLatestVersion, VersionTaskList[0].TotalVersionCount)
if err != nil {
ctx.ServerError("Update IsLatestVersion failed", err)
return err
}

return err
var jobResult *models.CreateTrainJobResult
var createErr error

if req.EngineID < 0 {
jobResult, createErr = createTrainJobVersionUserImage(models.CreateTrainJobVersionUserImageParams{
Description: req.Description,
Config: models.TrainJobVersionUserImageConfig{
WorkServerNum: req.WorkServerNumber,
AppUrl: req.CodeObsPath,
BootFileUrl: req.BootFileUrl,
DataUrl: req.DataUrl,
TrainUrl: req.TrainUrl,
LogUrl: req.LogUrl,
PoolID: req.PoolID,
Flavor: models.Flavor{
Code: req.FlavorCode,
},
Parameter: req.Parameters,
PreVersionId: req.PreVersionId,
UserImageUrl: req.UserImageUrl,
UserCommand: req.UserCommand,
},
}, jobId)
} else {
jobResult, createErr = createTrainJobVersion(models.CreateTrainJobVersionParams{
Description: req.Description,
Config: models.TrainJobVersionConfig{
WorkServerNum: req.WorkServerNumber,
AppUrl: req.CodeObsPath,
BootFileUrl: req.BootFileUrl,
DataUrl: req.DataUrl,
EngineID: req.EngineID,
TrainUrl: req.TrainUrl,
LogUrl: req.LogUrl,
PoolID: req.PoolID,
Flavor: models.Flavor{
Code: req.FlavorCode,
},
Parameter: req.Parameters,
PreVersionId: req.PreVersionId,
},
}, jobId)
}
if createErr != nil {
log.Error("createTrainJobVersion failed: %v", err.Error())
if strings.HasPrefix(err.Error(), UnknownErrorPrefix) {
log.Info("(%s)unknown error, set temp status", req.DisplayJobName)
errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
CloudbrainID: task.ID,
Status: models.JobStatusTemp,
Type: task.Type,
JobName: task.JobName,
JobType: task.JobType,
})
if errTemp != nil {
log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
return errTemp
}
} else {
task.Status = string(models.ModelArtsTrainJobFailed)
errTemp := models.UpdateJob(task)
if errTemp != nil {
log.Error("UpdateJob failed: %v", errTemp.Error())
}
errTemp = models.DeleteJob(task)
if errTemp != nil {
log.Error("DeleteJob failed: %v", errTemp.Error())
}
return createErr
}
} else {
task.Status = TransTrainJobStatus(jobResult.Status)
task.JobID = strconv.FormatInt(jobResult.JobID, 10)
task.VersionID = jobResult.VersionID
task.VersionName = jobResult.VersionName
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob failed: %v", err.Error())
return err
}
}

return nil
}

func TransTrainJobStatus(status int) string {
@@ -700,47 +709,22 @@ func GetOutputPathByCount(TotalVersionCount int) (VersionOutputPath string) {

func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (err error) {
createTime := timeutil.TimeStampNow()
jobResult, err := createInferenceJob(models.CreateInferenceJobParams{
JobName: req.JobName,
Description: req.Description,
InfConfig: models.InfConfig{
WorkServerNum: req.WorkServerNumber,
AppUrl: req.CodeObsPath,
BootFileUrl: req.BootFileUrl,
DataUrl: req.DataUrl,
EngineID: req.EngineID,
// TrainUrl: req.TrainUrl,
LogUrl: req.LogUrl,
PoolID: req.PoolID,
CreateVersion: true,
Flavor: models.Flavor{
Code: req.FlavorCode,
},
Parameter: req.Parameters,
},
})
if err != nil {
log.Error("CreateJob failed: %v", err.Error())
return err
}

attach, err := models.GetAttachmentByUUID(req.Uuid)
if err != nil {
log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error())
log.Error("GetAttachmentByUUID(%s) failed:%v", req.DisplayJobName, err.Error())
return err
}
jobID := strconv.FormatInt(jobResult.JobID, 10)
err = models.CreateCloudbrain(&models.Cloudbrain{
Status: TransTrainJobStatus(jobResult.Status),

task := &models.Cloudbrain{
Status: string(models.ModelArtsTrainJobWaiting),
UserID: ctx.User.ID,
RepoID: ctx.Repo.Repository.ID,
JobID: jobID,
JobID: models.TempJobIdPrefix + req.JobName + strconv.Itoa(int(rand.New(rand.NewSource(time.Now().UnixNano())).Int31n(100000))),
JobName: req.JobName,
DisplayJobName: req.DisplayJobName,
JobType: string(models.JobTypeInference),
Type: models.TypeCloudBrainTwo,
VersionID: jobResult.VersionID,
VersionName: jobResult.VersionName,
Uuid: req.Uuid,
DatasetName: attach.Name,
CommitID: req.CommitID,
@@ -767,13 +751,74 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e
ResultUrl: req.ResultUrl,
CreatedUnix: createTime,
UpdatedUnix: createTime,
})
}

err = models.CreateCloudbrain(task)
if err != nil {
log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error())
return err
}
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, models.ActionCreateInferenceTask)

jobResult, err := createInferenceJob(models.CreateInferenceJobParams{
JobName: req.JobName,
Description: req.Description,
InfConfig: models.InfConfig{
WorkServerNum: req.WorkServerNumber,
AppUrl: req.CodeObsPath,
BootFileUrl: req.BootFileUrl,
DataUrl: req.DataUrl,
EngineID: req.EngineID,
// TrainUrl: req.TrainUrl,
LogUrl: req.LogUrl,
PoolID: req.PoolID,
CreateVersion: true,
Flavor: models.Flavor{
Code: req.FlavorCode,
},
Parameter: req.Parameters,
},
})
if err != nil {
log.Error("createTrainJob failed: %v", err.Error())
if strings.HasPrefix(err.Error(), UnknownErrorPrefix) {
log.Info("(%s)unknown error, set temp status", req.DisplayJobName)
err = models.InsertCloudbrainTemp(&models.CloudbrainTemp{
CloudbrainID: task.ID,
Status: models.JobStatusTemp,
Type: task.Type,
JobName: task.JobName,
JobType: task.JobType,
})
if err != nil {
log.Error("InsertCloudbrainTemp failed: %v", err.Error())
return err
}
} else {
task.Status = string(models.ModelArtsTrainJobFailed)
errTemp := models.UpdateJob(task)
if errTemp != nil {
log.Error("UpdateJob failed: %v", errTemp.Error())
}
errTemp = models.DeleteJob(task)
if errTemp != nil {
log.Error("DeleteJob failed: %v", errTemp.Error())
}
return err
}
} else {
task.Status = TransTrainJobStatus(jobResult.Status)
task.JobID = strconv.FormatInt(jobResult.JobID, 10)
task.VersionID = jobResult.VersionID
task.VersionName = jobResult.VersionName
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob failed: %v", err.Error())
return err
}
}

notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, task.JobID, req.DisplayJobName, models.ActionCreateInferenceTask)

return nil
}

@@ -799,3 +844,194 @@ func GetNotebookImageName(imageId string) (string, error) {

return imageName, nil
}

func HandleTrainJobInfo(task *models.Cloudbrain) error {
if isTempJob(task.JobID, task.Status) {
if task.VersionCount > VersionCountOne {
//multi version
result, err := GetTrainJobVersionList(1000, 1, strings.TrimPrefix(task.JobID, models.TempJobIdPrefix))
if err != nil {
log.Error("GetTrainJobVersionList failed:%v", err)
return err
}

if result != nil {
if strconv.FormatInt(result.JobID, 10) == task.JobID && result.JobName == task.JobName {
if result.VersionCount == int64(task.VersionCount) {
log.Info("find the record(%s)", task.DisplayJobName)
task.Status = TransTrainJobStatus(result.JobVersionList[0].IntStatus)
task.VersionName = result.JobVersionList[0].VersionName
task.VersionID = result.JobVersionList[0].VersionID

err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
return err
}
temp, err := models.GetCloudbrainTempByCloudbrainID(task.ID)
if err != nil {
log.Error("no such temp record(%s):%v", task.DisplayJobName, err.Error())
} else {
err = models.DeleteCloudbrainTemp(temp)
if err != nil {
log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err)
}
}

return nil
} else {
log.Error("can not find the record(%s) until now", task.DisplayJobName)
}
} else {
log.Error("can not find the record(%s) until now", task.DisplayJobName)
}
}
} else {
//inference or one version
result, err := GetTrainJobList(1000, 1, "create_time", "desc", task.JobName)
if err != nil {
log.Error("GetTrainJobList failed:%v", err)
return err
}

if result != nil {
for _, job := range result.JobList {
if task.JobName == job.JobName {
log.Info("find the record(%s)", task.DisplayJobName)
task.Status = TransTrainJobStatus(job.IntStatus)
task.JobID = strconv.FormatInt(job.JobID, 10)

err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err)
return err
}
temp, err := models.GetCloudbrainTempByCloudbrainID(task.ID)
if err != nil {
log.Error("no such temp record(%s):%v", task.DisplayJobName, err.Error())
return err
}
err = models.DeleteCloudbrainTemp(temp)
if err != nil {
log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err)
return err
}
return nil
}
}
}

}
} else {
//normal
result, err := GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
if err != nil {
log.Error("GetTrainJob(%s) failed:%v", task.DisplayJobName, err)
return err
}

if result != nil {
task.Status = TransTrainJobStatus(result.IntStatus)
task.Duration = result.Duration / 1000
task.TrainJobDuration = result.TrainJobDuration

if task.StartTime == 0 && result.StartTime > 0 {
task.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
}
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
task.EndTime = task.StartTime.Add(task.Duration)
}
task.CorrectCreateUnix()
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
return err
}
}
}

return nil
}

func HandleNotebookInfo(task *models.Cloudbrain) error {
if isTempJob(task.JobID, task.Status) {
result, err := GetNotebookList(1000, 0, "createTime", "DESC", task.JobName)
if err != nil {
log.Error("GetNotebookList failed:%v", err)
return err
}

if result != nil {
count, err := models.GetCloudbrainCountByJobName(task.JobName, task.JobType)
if err != nil {
log.Error("GetCloudbrainCountByJobName failed:%v", err)
return err
}

if len(result.NotebookList) == count {
if result.NotebookList[0].JobName == task.JobName {
log.Info("find the record(%s)", task.DisplayJobName)
task.Status = result.NotebookList[0].Status
task.JobID = result.NotebookList[0].JobID

err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
return err
}
temp, err := models.GetCloudbrainTempByCloudbrainID(task.ID)
if err != nil {
log.Error("no such temp record(%s):%v", task.DisplayJobName, err.Error())
return err
}
err = models.DeleteCloudbrainTemp(temp)
if err != nil {
log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err)
return err
}
return nil
} else {
log.Error("can not find the record(%s) until now", task.DisplayJobName)
}
} else {
log.Error("can not find the record(%s) until now", task.DisplayJobName)
}
} else {
log.Error("can not find the record(%s) until now", task.DisplayJobName)
}
} else {
//normal
result, err := GetNotebook2(task.JobID)
if err != nil {
log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err)
return err
}

if result != nil {
task.Status = result.Status
if task.StartTime == 0 && result.Lease.UpdateTime > 0 {
task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000)
}
if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
task.EndTime = timeutil.TimeStampNow()
}
task.CorrectCreateUnix()
task.ComputeAndSetDuration()
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err)
return err
}
}
}

return nil
}

func isTempJob(jobID, status string) bool {
if (strings.HasPrefix(jobID, models.TempJobIdPrefix) && status == string(models.ModelArtsTrainJobWaiting)) || status == models.JobStatusTemp {
return true
}
return false
}

+ 171
- 21
modules/modelarts/resty.go View File

@@ -37,6 +37,7 @@ const (
NotebookNotFound = "ModelArts.6404"
NotebookNoPermission = "ModelArts.6407"
NotebookInvalid = "ModelArts.6400"
UnknownErrorPrefix = "UNKNOWN:"
)

func getRestyClient() *resty.Client {
@@ -298,6 +299,10 @@ sendjob:
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
}

if res.StatusCode() == http.StatusBadGateway {
return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
}

if len(response.ErrorCode) != 0 {
log.Error("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
if response.ErrorCode == modelartsIllegalToken && retry < 1 {
@@ -547,9 +552,6 @@ sendjob:
return nil, fmt.Errorf("resty create train-job: %s", err)
}

req, _ := json.Marshal(createJobParams)
log.Info("%s", req)

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
@@ -563,17 +565,21 @@ sendjob:
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
}
log.Error("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
BootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
DataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
if temp.ErrorMsg == BootFileErrorMsg {
bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'."
dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'."
if temp.ErrorMsg == bootFileErrorMsg {
log.Error("启动文件错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("启动文件错误!")
}
if temp.ErrorMsg == DataSetErrorMsg {
if temp.ErrorMsg == dataSetErrorMsg {
log.Error("数据集错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("数据集错误!")
}
return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
if res.StatusCode() == http.StatusBadGateway {
return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
} else {
return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
}
}

if !result.IsSuccess {
@@ -603,9 +609,6 @@ sendjob:
return nil, fmt.Errorf("resty create train-job version: %s", err)
}

req, _ := json.Marshal(createJobVersionParams)
log.Info("%s", req)

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
@@ -618,17 +621,23 @@ sendjob:
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
}
BootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'."
DataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'."
if temp.ErrorMsg == BootFileErrorMsg {

log.Error("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
bootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'."
dataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'."
if temp.ErrorMsg == bootFileErrorMsg {
log.Error("启动文件错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("启动文件错误!")
}
if temp.ErrorMsg == DataSetErrorMsg {
if temp.ErrorMsg == dataSetErrorMsg {
log.Error("数据集错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("数据集错误!")
}
return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
if res.StatusCode() == http.StatusBadGateway {
return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
} else {
return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
}
}

if !result.IsSuccess {
@@ -761,9 +770,6 @@ sendjob:
goto sendjob
}

//temp, _ := json.Marshal(req)
//log.Info("%s", temp)

if res.StatusCode() != http.StatusOK {
var temp models.ErrorResult
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
@@ -1172,7 +1178,11 @@ sendjob:
log.Error("数据集错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf("数据集错误!")
}
return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
if res.StatusCode() == http.StatusBadGateway {
return &result, fmt.Errorf(UnknownErrorPrefix+"createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
} else {
return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
}
}

if !result.IsSuccess {
@@ -1212,7 +1222,11 @@ sendjob:
err = json.Unmarshal(res.Body(), &response)
if err != nil {
log.Error("json.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error())
return &result, fmt.Errorf("json.Unmarshal failed: %s", err.Error())
}

if res.StatusCode() == http.StatusBadGateway {
return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg)
}

if len(response.ErrorCode) != 0 {
@@ -1271,3 +1285,139 @@ sendjob:

return &result, nil
}

func GetTrainJobList(perPage, page int, sortBy, order, searchContent string) (*models.GetTrainJobListResult, error) {
checkSetting()
client := getRestyClient()
var result models.GetTrainJobListResult

retry := 0

sendjob:
res, err := client.R().
SetQueryParams(map[string]string{
"per_page": strconv.Itoa(perPage),
"page": strconv.Itoa(page),
"sortBy": sortBy,
"order": order,
"search_content": searchContent,
}).
SetAuthToken(TOKEN).
SetResult(&result).
Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob)

if err != nil {
return nil, fmt.Errorf("resty GetTrainJobList: %v", err)
}

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
goto sendjob
}

if res.StatusCode() != http.StatusOK {
var temp models.ErrorResult
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
}
log.Error("GetTrainJobList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf(temp.ErrorMsg)
}

if !result.IsSuccess {
log.Error("GetTrainJobList failed(%s): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf(result.ErrorMsg)
}

return &result, nil
}

func GetTrainJobVersionList(perPage, page int, jobID string) (*models.GetTrainJobVersionListResult, error) {
checkSetting()
client := getRestyClient()
var result models.GetTrainJobVersionListResult

retry := 0

sendjob:
res, err := client.R().
SetQueryParams(map[string]string{
"per_page": strconv.Itoa(perPage),
"page": strconv.Itoa(page),
}).
SetAuthToken(TOKEN).
SetResult(&result).
Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions")

if err != nil {
return nil, fmt.Errorf("resty GetTrainJobVersionList: %v", err)
}

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
goto sendjob
}

if res.StatusCode() != http.StatusOK {
var temp models.ErrorResult
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
}
log.Error("GetTrainJobVersionList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf(temp.ErrorMsg)
}

if !result.IsSuccess {
log.Error("GetTrainJobVersionList failed(%s): %s", result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf(result.ErrorMsg)
}

return &result, nil
}

func GetNotebookList(limit, offset int, sortBy, order, searchContent string) (*models.GetNotebookListResult, error) {
checkSetting()
client := getRestyClient()
var result models.GetNotebookListResult

retry := 0

sendjob:
res, err := client.R().
SetQueryParams(map[string]string{
"limit": strconv.Itoa(limit),
"offset": strconv.Itoa(offset),
"name": searchContent,
"sort_key": sortBy,
"sort_dir": order,
}).
SetAuthToken(TOKEN).
SetResult(&result).
Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2)

if err != nil {
return nil, fmt.Errorf("resty GetNotebookList: %v", err)
}

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
_ = getToken()
goto sendjob
}

if res.StatusCode() != http.StatusOK {
var temp models.ErrorResult
if err = json.Unmarshal([]byte(res.String()), &temp); err != nil {
log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error())
return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error())
}
log.Error("GetNotebookList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg)
return &result, fmt.Errorf(temp.ErrorMsg)
}

return &result, nil
}

+ 4
- 123
routers/api/v1/repo/modelarts.go View File

@@ -26,40 +26,6 @@ import (
routerRepo "code.gitea.io/gitea/routers/repo"
)

func GetModelArtsNotebook(ctx *context.APIContext) {
var (
err error
)

jobID := ctx.Params(":jobid")
repoID := ctx.Repo.Repository.ID
job, err := models.GetRepoCloudBrainByJobID(repoID, jobID)
if err != nil {
ctx.NotFound(err)
return
}
result, err := modelarts.GetJob(jobID)
if err != nil {
ctx.NotFound(err)
return
}
oldStatus := job.Status
job.Status = result.Status
if oldStatus != result.Status {
notification.NotifyChangeCloudbrainStatus(job, oldStatus)
}
err = models.UpdateJob(job)
if err != nil {
log.Error("UpdateJob failed:", err)
}

ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": jobID,
"JobStatus": result.Status,
})

}

func GetModelArtsNotebook2(ctx *context.APIContext) {
var (
err error
@@ -71,70 +37,16 @@ func GetModelArtsNotebook2(ctx *context.APIContext) {
ctx.NotFound(err)
return
}
result, err := modelarts.GetNotebook2(job.JobID)
err = modelarts.HandleNotebookInfo(job)
if err != nil {
ctx.NotFound(err)
return
}
if job.StartTime == 0 && result.Lease.UpdateTime > 0 {
job.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000)
}
oldStatus := job.Status
job.Status = result.Status
if job.EndTime == 0 && models.IsModelArtsDebugJobTerminal(job.Status) {
job.EndTime = timeutil.TimeStampNow()
}
job.CorrectCreateUnix()
job.ComputeAndSetDuration()
if oldStatus != result.Status {
notification.NotifyChangeCloudbrainStatus(job, oldStatus)
}
err = models.UpdateJob(job)
if err != nil {
log.Error("UpdateJob failed:", err)
}

ctx.JSON(http.StatusOK, map[string]interface{}{
"ID": ID,
"JobName": job.JobName,
"JobStatus": result.Status,
})

}

func GetModelArtsTrainJob(ctx *context.APIContext) {
var (
err error
)

jobID := ctx.Params(":jobid")
repoID := ctx.Repo.Repository.ID
job, err := models.GetRepoCloudBrainByJobID(repoID, jobID)
if err != nil {
ctx.NotFound(err)
return
}
result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10))
if err != nil {
ctx.NotFound(err)
return
}
oldStatus := job.Status
job.Status = modelarts.TransTrainJobStatus(result.IntStatus)
job.Duration = result.Duration
job.TrainJobDuration = result.TrainJobDuration
if oldStatus != job.Status {
notification.NotifyChangeCloudbrainStatus(job, oldStatus)
}
err = models.UpdateJob(job)
if err != nil {
log.Error("UpdateJob failed:", err)
}

ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": jobID,
"JobStatus": job.Status,
"JobDuration": job.Duration,
"JobStatus": job.Status,
})

}
@@ -188,27 +100,11 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {
}
}
} else if job.Type == models.TypeCloudBrainTwo {
result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10))
err := modelarts.HandleTrainJobInfo(job)
if err != nil {
ctx.NotFound(err)
return
}

if job.StartTime == 0 && result.StartTime > 0 {
job.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
}
job.Status = modelarts.TransTrainJobStatus(result.IntStatus)
job.Duration = result.Duration / 1000
job.TrainJobDuration = models.ConvertDurationToStr(job.Duration)

if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 {
job.EndTime = job.StartTime.Add(job.Duration)
}
job.CorrectCreateUnix()
err = models.UpdateTrainJobVersion(job)
if err != nil {
log.Error("UpdateJob failed:", err)
}
} else if job.Type == models.TypeC2Net {
result, err := grampus.GetJob(jobID)
if err != nil {
@@ -557,26 +453,11 @@ func GetModelArtsInferenceJob(ctx *context.APIContext) {
ctx.NotFound(err)
return
}
result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10))
err = modelarts.HandleTrainJobInfo(job)
if err != nil {
ctx.NotFound(err)
return
}
if job.StartTime == 0 && result.StartTime > 0 {
job.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
}
job.Status = modelarts.TransTrainJobStatus(result.IntStatus)
job.Duration = result.Duration / 1000
job.TrainJobDuration = models.ConvertDurationToStr(job.Duration)

if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 {
job.EndTime = job.StartTime.Add(job.Duration)
}
job.CorrectCreateUnix()
err = models.UpdateInferenceJob(job)
if err != nil {
log.Error("UpdateJob failed:", err)
}

ctx.JSON(http.StatusOK, map[string]interface{}{
"JobID": jobID,


+ 6
- 52
routers/repo/cloudbrain.go View File

@@ -1784,70 +1784,24 @@ func SyncCloudbrainStatus() {
}
} else if task.Type == models.TypeCloudBrainTwo {
if task.JobType == string(models.JobTypeDebug) {
//result, err := modelarts.GetJob(task.JobID)
result, err := modelarts.GetNotebook2(task.JobID)
err := modelarts.HandleNotebookInfo(task)
if err != nil {
log.Error("GetJob(%s) failed:%v", task.JobName, err)
log.Error("HandleNotebookInfo(%s) failed:%v", task.DisplayJobName, err)
continue
}

if result != nil {
oldStatus := task.Status
task.Status = result.Status
if task.StartTime == 0 && result.Lease.UpdateTime > 0 {
task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000)
}
if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
task.EndTime = timeutil.TimeStampNow()
}
task.CorrectCreateUnix()
task.ComputeAndSetDuration()
if oldStatus != task.Status {
notification.NotifyChangeCloudbrainStatus(task, oldStatus)
}
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
continue
}
}
} else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) {
result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10))
err := modelarts.HandleTrainJobInfo(task)
if err != nil {
log.Error("GetTrainJob(%s) failed:%v", task.JobName, err)
log.Error("HandleTrainJobInfo(%s) failed:%v", task.DisplayJobName, err)
continue
}

if result != nil {
oldStatus := task.Status
task.Status = modelarts.TransTrainJobStatus(result.IntStatus)
task.Duration = result.Duration / 1000
task.TrainJobDuration = result.TrainJobDuration

if task.StartTime == 0 && result.StartTime > 0 {
task.StartTime = timeutil.TimeStamp(result.StartTime / 1000)
}
task.TrainJobDuration = models.ConvertDurationToStr(task.Duration)
if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 {
task.EndTime = task.StartTime.Add(task.Duration)
}
task.CorrectCreateUnix()
if oldStatus != task.Status {
notification.NotifyChangeCloudbrainStatus(task, oldStatus)
}
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err)
continue
}
}
} else {
log.Error("task.JobType(%s) is error:%s", task.JobName, task.JobType)
log.Error("task.JobType(%s) is error:%s", task.DisplayJobName, task.JobType)
}
} else if task.Type == models.TypeC2Net {
result, err := grampus.GetJob(task.JobID)
if err != nil {
log.Error("GetTrainJob(%s) failed:%v", task.JobName, err)
log.Error("GetTrainJob(%s) failed:%v", task.DisplayJobName, err)
continue
}



+ 2
- 2
routers/repo/grampus.go View File

@@ -337,7 +337,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
EngineName: image,
DatasetName: attachment.Name,
IsLatestVersion: modelarts.IsLatestVersion,
VersionCount: modelarts.VersionCount,
VersionCount: modelarts.VersionCountOne,
WorkServerNumber: 1,
}

@@ -387,7 +387,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain
branchName := form.BranchName
isLatestVersion := modelarts.IsLatestVersion
flavorName := form.FlavorName
versionCount := modelarts.VersionCount
versionCount := modelarts.VersionCountOne
engineName := form.EngineName

if !jobNamePattern.MatchString(displayJobName) {


+ 133
- 184
routers/repo/modelarts.go View File

@@ -7,6 +7,7 @@ import (
"fmt"
"io"
"io/ioutil"
"math/rand"
"net/http"
"os"
"path"
@@ -262,30 +263,15 @@ func NotebookShow(ctx *context.Context) {
return
}

result, err := modelarts.GetNotebook2(task.JobID)
if err != nil {
log.Error("GET job error", err.Error())
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
return
}

if result != nil {
if task.DeletedAt.IsZero() { //normal record
if task.Status != result.Status {
oldStatus := task.Status
task.Status = result.Status
models.ParseAndSetDurationFromModelArtsNotebook(result, task)
notification.NotifyChangeCloudbrainStatus(task, oldStatus)
err = models.UpdateJob(task)
if err != nil {
log.Error("GET job error", err.Error())
ctx.NotFound(ctx.Req.URL.RequestURI(), nil)
return
}
}
} else { //deleted record

if task.DeletedAt.IsZero() { //normal record
err := modelarts.HandleNotebookInfo(task)
if err != nil {
ctx.Data["error"] = err.Error()
ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil)
return
}
} else { //deleted record

}

datasetDownload := make([]models.DatasetDownload, 0)
@@ -396,82 +382,141 @@ func NotebookDebug2(ctx *context.Context) {
ctx.Redirect(result.Url + "?token=" + result.Token)
}

func NotebookManage(ctx *context.Context) {
func NotebookRestart(ctx *context.Context) {
var ID = ctx.Params(":id")
var action = ctx.Params(":action")
var resultCode = "0"
var resultCode = "-1"
var errorMsg = ""
var status = ""

task := ctx.Cloudbrain

for {
task, err := models.GetCloudbrainByID(ID)
if err != nil {
log.Error("get task(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "system error"
ctx.CheckWechatBind()
if ctx.Written() {
return
}
if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) {
log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"])
errorMsg = "the job is not stopped"
break
}

if action == models.ActionStop {
if task.Status != string(models.ModelArtsRunning) {
log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "the job is not running"
count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
if err != nil {
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
errorMsg = "system error"
break
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
errorMsg = "you have already a running or waiting task, can not create more"
break
}
}

if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin() && !ctx.IsUserRepoOwner()) {
log.Error("the user has no right ro stop the job", task.JobName, ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "you have no right to stop the job"
break
}
} else if action == models.ActionRestart {
ctx.CheckWechatBind()
if ctx.Written() {
return
}
if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) {
log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "the job is not stopped"
break
}
createTime := timeutil.TimeStampNow()
newTask := &models.Cloudbrain{
Status: string(models.ModelArtsTrainJobWaiting),
UserID: task.UserID,
RepoID: task.RepoID,
JobID: models.TempJobIdPrefix + task.JobName + strconv.Itoa(int(rand.New(rand.NewSource(time.Now().UnixNano())).Int31n(100000))),
JobName: task.JobName,
DisplayJobName: task.DisplayJobName,
JobType: task.JobType,
Type: task.Type,
Uuid: task.Uuid,
Image: task.Image,
ComputeResource: task.ComputeResource,
Description: task.Description,
CreatedUnix: createTime,
UpdatedUnix: createTime,
}

if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin()) {
log.Error("the user has no right ro restart the job", task.JobName, ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "you have no right to restart the job"
err = models.RestartCloudbrain(task, newTask)
if err != nil {
log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
errorMsg = "system error"
break
}

param := models.NotebookAction{
Action: models.ActionStart,
}

res, err := modelarts.ManageNotebook2(task.JobID, param)
if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["MsgID"])
if strings.HasPrefix(err.Error(), modelarts.UnknownErrorPrefix) {
log.Info("(%s)unknown error, set temp status", newTask.DisplayJobName)
errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{
CloudbrainID: newTask.ID,
Status: models.JobStatusTemp,
Type: newTask.Type,
JobName: newTask.JobName,
JobType: newTask.JobType,
})
if errTemp != nil {
log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error())
}
} else {
newTask.Status = string(models.ModelArtsTrainJobFailed)
errTemp := models.UpdateJob(newTask)
if errTemp != nil {
log.Error("UpdateJob failed: %v", errTemp.Error())
}
errTemp = models.DeleteJob(newTask)
if errTemp != nil {
log.Error("DeleteJob failed: %v", errTemp.Error())
}
errorMsg = err.Error()
break
}

count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID)
} else {
newTask.Status = res.Status
newTask.JobID = task.JobID
err = models.UpdateJob(newTask)
if err != nil {
log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "system error"
log.Error("UpdateJob failed: %v", err.Error())
errorMsg = err.Error()
break
} else {
if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "you have already a running or waiting task, can not create more"
break
}
}
}

action = models.ActionStart
} else {
log.Error("the action(%s) is illegal", action, ctx.Data["MsgID"])
status = res.Status
resultCode = "0"
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, strconv.FormatInt(newTask.ID, 10), newTask.DisplayJobName, models.ActionCreateDebugNPUTask)

break
}

ctx.JSON(200, map[string]string{
"result_code": resultCode,
"error_msg": errorMsg,
"status": status,
"id": ID,
})
}

func NotebookStop(ctx *context.Context) {
var ID = ctx.Params(":id")
var resultCode = "0"
var errorMsg = ""
var status = ""

task := ctx.Cloudbrain

for {
if task.Status != string(models.ModelArtsRunning) {
log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "非法操作"
errorMsg = "the job is not running"
break
}

param := models.NotebookAction{
Action: action,
Action: models.ActionStop,
}
createTime := timeutil.TimeStampNow()
res, err := modelarts.ManageNotebook2(task.JobID, param)
if err != nil {
log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
@@ -484,50 +529,17 @@ func NotebookManage(ctx *context.Context) {
}

status = res.Status
if action == models.ActionStart {
newTask := &models.Cloudbrain{
Status: status,
UserID: task.UserID,
RepoID: task.RepoID,
JobID: task.JobID,
JobName: task.JobName,
DisplayJobName: task.DisplayJobName,
JobType: task.JobType,
Type: task.Type,
Uuid: task.Uuid,
Image: task.Image,
ComputeResource: task.ComputeResource,
Description: task.Description,
CreatedUnix: createTime,
UpdatedUnix: createTime,
}

err = models.RestartCloudbrain(task, newTask)
if err != nil {
log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "system error"
break
}
ID = strconv.FormatInt(newTask.ID, 10)
notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, ID, task.DisplayJobName, models.ActionCreateDebugNPUTask)
} else {
oldStatus := task.Status
task.Status = res.Status
if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
task.EndTime = timeutil.TimeStampNow()
}
task.ComputeAndSetDuration()
if oldStatus != task.Status {
notification.NotifyChangeCloudbrainStatus(task, oldStatus)
}
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "system error"
break
}
task.Status = res.Status
if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) {
task.EndTime = timeutil.TimeStampNow()
}
task.ComputeAndSetDuration()
err = models.UpdateJob(task)
if err != nil {
log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"])
resultCode = "-1"
errorMsg = "system error"
break
}

break
@@ -1000,7 +1012,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
branch_name := form.BranchName
isLatestVersion := modelarts.IsLatestVersion
FlavorName := form.FlavorName
VersionCount := modelarts.VersionCount
VersionCount := modelarts.VersionCountOne
EngineName := form.EngineName

count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID)
@@ -1702,60 +1714,6 @@ func TrainJobShow(ctx *context.Context) {
ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
}

func TrainJobGetLog(ctx *context.Context) {
ctx.Data["PageIsTrainJob"] = true

var jobID = ctx.Params(":jobid")
var logFileName = ctx.Query("file_name")
var baseLine = ctx.Query("base_line")
var order = ctx.Query("order")

if order != modelarts.OrderDesc && order != modelarts.OrderAsc {
log.Error("order(%s) check failed", order)
ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow)
return
}

task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
return
}

result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines)
if err != nil {
log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil)
return
}

ctx.Data["log"] = result
//ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow)
}

func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) {
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error())
return nil, nil, err
}

resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10))
if err != nil {
log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error())
return nil, nil, err
}

result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), "", resultLogFile.LogFileList[0], modelarts.OrderDesc, modelarts.Lines)
if err != nil {
log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error())
return nil, nil, err
}

return resultLogFile, result, err
}

func TrainJobDel(ctx *context.Context) {
var jobID = ctx.Params(":jobid")
var listType = ctx.Query("listType")
@@ -1822,15 +1780,6 @@ func TrainJobStop(ctx *context.Context) {
ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType)
}

func canUserCreateTrainJob(uid int64) (bool, error) {
org, err := models.GetOrgByName(setting.AllowedOrg)
if err != nil {
log.Error("get allowed org failed: ", setting.AllowedOrg)
return false, err
}

return org.IsOrgMember(uid)
}
func canUserCreateTrainJobVersion(ctx *context.Context, userID int64) (bool, error) {
if ctx == nil || ctx.User == nil {
log.Error("user unlogin!")
@@ -1922,7 +1871,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference
EngineName := form.EngineName
LabelName := form.LabelName
isLatestVersion := modelarts.IsLatestVersion
VersionCount := modelarts.VersionCount
VersionCount := modelarts.VersionCountOne
trainUrl := form.TrainUrl
modelName := form.ModelName
modelVersion := form.ModelVersion


+ 2
- 1
routers/routes/routes.go View File

@@ -1183,7 +1183,8 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Group("/:id", func() {
m.Get("", reqRepoCloudBrainReader, repo.NotebookShow)
m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug2)
m.Post("/:action", reqRepoCloudBrainWriter, repo.NotebookManage)
m.Post("/restart", cloudbrain.AdminOrJobCreaterRight, repo.NotebookRestart)
m.Post("/stop", cloudbrain.AdminOrJobCreaterRight, repo.NotebookStop)
m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel)
})
m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.NotebookNew)


Loading…
Cancel
Save