Browse Source

update

tags/v1.21.12.1
liuzx 3 years ago
parent
commit
b475be05d6
5 changed files with 69 additions and 163 deletions
  1. +58
    -152
      models/cloudbrain.go
  2. +0
    -1
      models/models.go
  3. +2
    -2
      modules/modelarts/modelarts.go
  4. +1
    -2
      routers/api/v1/repo/modelarts.go
  5. +8
    -6
      routers/repo/modelarts.go

+ 58
- 152
models/cloudbrain.go View File

@@ -52,32 +52,32 @@ type Cloudbrain struct {
ID int64 `xorm:"pk autoincr"`
JobID string `xorm:"INDEX NOT NULL"`
JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"`
JobName string `xorm:"INDEX"`
Status string `xorm:"INDEX"`
UserID int64 `xorm:"INDEX"`
RepoID int64 `xorm:"INDEX"`
SubTaskName string `xorm:"INDEX"`
JobName string
Status string
UserID int64
RepoID int64
SubTaskName string
ContainerID string
ContainerIp string
CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"`
UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
Duration int64 `xorm:"INDEX duration"`
TrainJobDuration string `xorm:"INDEX DEFAULT '00:00:00'"`
DeletedAt time.Time `xorm:"deleted"`
CanDebug bool `xorm:"-"`
CanDel bool `xorm:"-"`
Type int `xorm:"INDEX DEFAULT 0"`
VersionID int64 `xorm:"INDEX DEFAULT 0"`
VersionName string `xorm:"INDEX"`
Uuid string //数据集id
DatasetName string
VersionCount int `xorm:"INDEX DEFAULT 1"` //任务的当前版本数量,不包括删除的
IsLatestVersion string //是否是最新版本,1是,0否
CommitID string //提交的仓库代码id
FatherVersionName string //父版本名称
ComputeResource string //计算资源,例如npu
EngineID int64 //引擎id
CreatedUnix timeutil.TimeStamp
UpdatedUnix timeutil.TimeStamp
Duration int64
TrainJobDuration string
DeletedAt time.Time `xorm:"deleted"`
CanDebug bool `xorm:"-"`
CanDel bool `xorm:"-"`
Type int
VersionID int64 //版本id
VersionName string //当前版本
Uuid string //数据集id
DatasetName string
VersionCount int //任务的当前版本数量,不包括删除的
IsLatestVersion string //是否是最新版本,1是,0否
CommitID string //提交的仓库代码id
PreVersionName string //父版本名称
ComputeResource string //计算资源,例如npu
EngineID int64 //引擎id

TrainUrl string //输出的obs路径
BranchName string //分支名称
@@ -87,7 +87,7 @@ type Cloudbrain struct {
LogUrl string //日志输出的obs路径
PreVersionId int64 //父版本的版本id
FlavorCode string //modelarts上的规格id
Description string
Description string //描述
WorkServerNumber int //节点数
FlavorName string //规格名称
EngineName string //引擎名称
@@ -97,25 +97,6 @@ type Cloudbrain struct {
Repo *Repository `xorm:"-"`
}

type TrainjobConfigDetail struct {
ID int64 `xorm:"pk autoincr"`
JobID string `xorm:"INDEX"`
JobName string `xorm:"INDEX"`
ResourcePools string `xorm:"INDEX"`
EngineVersions int `xorm:"INDEX"`
FlavorInfos string `xorm:"INDEX"`
TrainUrl string `xorm:"INDEX"`
BootFile string `xorm:"INDEX"`
Uuid string `xorm:"INDEX"`
DatasetName string `xorm:"INDEX"`
Params string `xorm:"INDEX"`
BranchName string `xorm:"INDEX"`
VersionName string `xorm:"INDEX"`

User *User `xorm:"-"`
Repo *Repository `xorm:"-"`
}

type CloudbrainInfo struct {
Cloudbrain `xorm:"extends"`
User `xorm:"extends"`
@@ -621,20 +602,14 @@ type Config struct {
BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
Parameter []Parameter `json:"parameter"`
DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
//DatasetID string `json:"dataset_id"`
//DataVersionID string `json:"dataset_version_id"`
//DataSource []DataSource `json:"data_source"`
//SpecID int64 `json:"spec_id"`
EngineID int64 `json:"engine_id"`
//ModelID int64 `json:"model_id"`
TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
LogUrl string `json:"log_url"`
EngineID int64 `json:"engine_id"`
TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
LogUrl string `json:"log_url"`
//UserImageUrl string `json:"user_image_url"`
//UserCommand string `json:"user_command"`
CreateVersion bool `json:"create_version"`
//Volumes []Volumes `json:"volumes"`
Flavor Flavor `json:"flavor"`
PoolID string `json:"pool_id"`
CreateVersion bool `json:"create_version"`
Flavor Flavor `json:"flavor"`
PoolID string `json:"pool_id"`
}

type CreateTrainJobVersionParams struct {
@@ -648,20 +623,12 @@ type TrainJobVersionConfig struct {
BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
Parameter []Parameter `json:"parameter"`
DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
//DatasetID string `json:"dataset_id"`
//DataVersionID string `json:"dataset_version_id"`
//DataSource []DataSource `json:"data_source"`
//SpecID int64 `json:"spec_id"`
EngineID int64 `json:"engine_id"`
//ModelID int64 `json:"model_id"`
TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
LogUrl string `json:"log_url"`
//UserImageUrl string `json:"user_image_url"`
//UserCommand string `json:"user_command"`
//Volumes []Volumes `json:"volumes"`
Flavor Flavor `json:"flavor"`
PoolID string `json:"pool_id"`
PreVersionId int64 `json:"pre_version_id"`
EngineID int64 `json:"engine_id"`
TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
LogUrl string `json:"log_url"`
Flavor Flavor `json:"flavor"`
PoolID string `json:"pool_id"`
PreVersionId int64 `json:"pre_version_id"`
}

type CreateConfigParams struct {
@@ -672,20 +639,11 @@ type CreateConfigParams struct {
BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
Parameter []Parameter `json:"parameter"`
DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
//DatasetID string `json:"dataset_id"`
//DataVersionID string `json:"dataset_version_id"`
//DataSource []DataSource `json:"data_source"`
//SpecID int64 `json:"spec_id"`
EngineID int64 `json:"engine_id"`
//ModelID int64 `json:"model_id"`
TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
LogUrl string `json:"log_url"`
//UserImageUrl string `json:"user_image_url"`
//UserCommand string `json:"user_command"`
//CreateVersion bool `json:"create_version"`
//Volumes []Volumes `json:"volumes"`
Flavor Flavor `json:"flavor"`
PoolID string `json:"pool_id"`
EngineID int64 `json:"engine_id"`
TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
LogUrl string `json:"log_url"`
Flavor Flavor `json:"flavor"`
PoolID string `json:"pool_id"`
}

type Parameter struct {
@@ -799,18 +757,10 @@ type GetConfigResult struct {
BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
Parameter []Parameter `json:"parameter"`
DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
//DatasetID string `json:"dataset_id"`
//DataVersionID string `json:"dataset_version_id"`
//DataSource []DataSource `json:"data_source"`
//SpecID int64 `json:"spec_id"`
EngineID int64 `json:"engine_id"`
//ModelID int64 `json:"model_id"`
TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
LogUrl string `json:"log_url"`
//UserImageUrl string `json:"user_image_url"`
//UserCommand string `json:"user_command"`
//CreateVersion bool `json:"create_version"`
//Volumes []Volumes `json:"volumes"`
EngineID int64 `json:"engine_id"`
TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
LogUrl string `json:"log_url"`

Flavor Flavor `json:"flavor"`
PoolID string `json:"pool_id"`
}
@@ -841,26 +791,18 @@ type GetTrainJobResult struct {
BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
Parameter []Parameter `json:"parameter"`
DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
//DatasetID string `json:"dataset_id"`
//DataVersionID string `json:"dataset_version_id"`
//DataSource []DataSource `json:"data_source"`
//SpecID int64 `json:"spec_id"`
EngineID int64 `json:"engine_id"`
EngineName string `json:"engine_name"`
EngineVersion string `json:"engine_version"`
//ModelID int64 `json:"model_id"`
TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
LogUrl string `json:"log_url"`
//UserImageUrl string `json:"user_image_url"`
//UserCommand string `json:"user_command"`
//Volumes []Volumes `json:"volumes"`
Flavor Flavor `json:"flavor"`
PoolID string `json:"pool_id"`
PoolName string `json:"pool_name"`
NasMountPath string `json:"nas_mount_path"`
NasShareAddr string `json:"nas_share_addr"`
DatasetName string
ModelMetricList string `json:"model_metric_list"` //列表里包含f1_score,recall,precision,accuracy,若有的话
EngineID int64 `json:"engine_id"`
EngineName string `json:"engine_name"`
EngineVersion string `json:"engine_version"`
TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
LogUrl string `json:"log_url"`
Flavor Flavor `json:"flavor"`
PoolID string `json:"pool_id"`
PoolName string `json:"pool_name"`
NasMountPath string `json:"nas_mount_path"`
NasShareAddr string `json:"nas_share_addr"`
DatasetName string
ModelMetricList string `json:"model_metric_list"` //列表里包含f1_score,recall,precision,accuracy,若有的话
}

type GetTrainJobLogResult struct {
@@ -931,17 +873,6 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
)
}

// switch opts.JobStatus {
// case JobWaiting:
// cond.And(builder.Eq{"cloudbrain.status": int(JobWaiting)})
// case JobFailed:
// cond.And(builder.Eq{"cloudbrain.status": int(JobFailed)})
// case JobStopped:
// cond.And(builder.Eq{"cloudbrain.status": int(JobStopped)})
// case JobSucceeded:
// cond.And(builder.Eq{"cloudbrain.status": int(JobSucceeded)})
// }

if len(opts.CloudbrainIDs) > 0 {
cond = cond.And(builder.In("cloudbrain.id", opts.CloudbrainIDs))
}
@@ -968,7 +899,6 @@ func Cloudbrains(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int64, error) {
Find(&cloudbrains); err != nil {
return nil, 0, fmt.Errorf("Find: %v", err)
}
sess.Close()

return cloudbrains, count, nil
}
@@ -1034,7 +964,6 @@ func CloudbrainsVersionList(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int, e
Find(&cloudbrains); err != nil {
return nil, 0, fmt.Errorf("Find: %v", err)
}
sess.Close()

return cloudbrains, int(count), nil
}
@@ -1046,13 +975,6 @@ func CreateCloudbrain(cloudbrain *Cloudbrain) (err error) {
return nil
}

func CreateTrainjobConfigDetail(trainjobConfigDetail *TrainjobConfigDetail) (err error) {
if _, err = x.Insert(trainjobConfigDetail); err != nil {
return err
}
return nil
}

func getRepoCloudBrain(cb *Cloudbrain) (*Cloudbrain, error) {
has, err := x.Get(cb)
if err != nil {
@@ -1068,11 +990,6 @@ func GetRepoCloudBrainByJobID(repoID int64, jobID string) (*Cloudbrain, error) {
return getRepoCloudBrain(cb)
}

func GetRepoCloudBrainByJobIDAndVersionName(repoID int64, jobID string, versionName string) (*Cloudbrain, error) {
cb := &Cloudbrain{JobID: jobID, RepoID: repoID, VersionName: versionName}
return getRepoCloudBrain(cb)
}

func GetCloudbrainByJobID(jobID string) (*Cloudbrain, error) {
cb := &Cloudbrain{JobID: jobID}
return getRepoCloudBrain(cb)
@@ -1129,17 +1046,6 @@ func updateJob(e Engine, job *Cloudbrain) error {
return err
}

// func UpdateTrainJob(job *CloudbrainInfo) error {
// return updateTrainJob(x, job)
// }

// func updateTrainJob(e Engine, job *CloudbrainInfo) error {
// var sess *xorm.Session
// sess = e.Where("job_id = ?", job.Cloudbrain.JobID)
// _, err := sess.Cols("status", "container_id", "container_ip").Update(job)
// return err
// }

func DeleteJob(job *Cloudbrain) error {
return deleteJob(x, job)
}


+ 0
- 1
models/models.go View File

@@ -133,7 +133,6 @@ func init() {
new(FileChunk),
new(BlockChain),
new(RecommendOrg),
new(TrainjobConfigDetail),
)

tablesStatistic = append(tablesStatistic,


+ 2
- 2
modules/modelarts/modelarts.go View File

@@ -79,7 +79,7 @@ type GenerateTrainJobReq struct {
IsLatestVersion string
Params string
BranchName string
FatherVersionName string
PreVersionName string
FlavorName string
VersionCount int
EngineName string
@@ -107,7 +107,7 @@ type GenerateTrainJobVersionReq struct {
BranchName string
FlavorName string
EngineName string
FatherVersionName string
PreVersionName string
TotalVersionCount int
}



+ 1
- 2
routers/api/v1/repo/modelarts.go View File

@@ -90,8 +90,7 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {

jobID := ctx.Params(":jobid")
versionName := ctx.Query("version_name")
repoID := ctx.Repo.Repository.ID
job, err := models.GetRepoCloudBrainByJobIDAndVersionName(repoID, jobID, versionName)
job, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName)
if err != nil {
ctx.NotFound(err)
return


+ 8
- 6
routers/repo/modelarts.go View File

@@ -3,6 +3,7 @@ package repo
import (
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
"net/http"
@@ -491,7 +492,8 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error {

func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) {
ctx.Data["PageIsTrainJob"] = true
VersionOutputPath := "V" + strconv.Itoa(modelarts.TotalVersionCount)
StringTotalVersionCount := fmt.Sprintf("%04d", modelarts.TotalVersionCount)
VersionOutputPath := "V" + StringTotalVersionCount
jobName := form.JobName
uuid := form.Attachment
description := form.Description
@@ -691,8 +693,8 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err)
return
}
VersionOutputPath := "V" + strconv.Itoa(latestTask.TotalVersionCount+1)
StringTotalVersionCount := fmt.Sprintf("%04d", latestTask.TotalVersionCount+1)
VersionOutputPath := "V" + StringTotalVersionCount

jobName := form.JobName
uuid := form.Attachment
@@ -711,7 +713,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/"
dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/"
branch_name := form.BranchName
fatherVersionName := form.VersionName
PreVersionName := form.VersionName
FlavorName := form.FlavorName
EngineName := form.EngineName

@@ -845,7 +847,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
return
}

task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, fatherVersionName)
task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, PreVersionName)
if err != nil {
log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error())
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form)
@@ -872,7 +874,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
BranchName: branch_name,
FlavorName: FlavorName,
EngineName: EngineName,
FatherVersionName: fatherVersionName,
PreVersionName: PreVersionName,
TotalVersionCount: latestTask.TotalVersionCount + 1,
}
err = modelarts.GenerateTrainJobVersion(ctx, req, jobID)


Loading…
Cancel
Save