Browse Source

update

tags/v1.21.12.1
liuzx 3 years ago
parent
commit
d79754e682
4 changed files with 49 additions and 120 deletions
  1. +12
    -1
      models/cloudbrain.go
  2. +20
    -36
      modules/modelarts/modelarts.go
  3. +11
    -24
      routers/api/v1/repo/modelarts.go
  4. +6
    -59
      routers/repo/modelarts.go

+ 12
- 1
models/cloudbrain.go View File

@@ -1040,9 +1040,20 @@ func UpdateJob(job *Cloudbrain) error {
}

func updateJob(e Engine, job *Cloudbrain) error {
var sess *xorm.Session
sess = e.Where("job_id = ?", job.JobID)
_, err := sess.Cols("status", "container_id", "container_ip").Update(job)
return err
}

func UpdateTrainJobVersion(job *Cloudbrain) error {
return updateJobTrainVersion(x, job)
}

func updateJobTrainVersion(e Engine, job *Cloudbrain) error {
var sess *xorm.Session
sess = e.Where("job_id = ? AND version_name=?", job.JobID, job.VersionName)
_, err := sess.Cols("status", "train_job_duration", "container_id", "container_ip").Update(job)
_, err := sess.Cols("status", "train_job_duration").Update(job)
return err
}



+ 20
- 36
modules/modelarts/modelarts.go View File

@@ -80,6 +80,7 @@ type GenerateTrainJobReq struct {
IsLatestVersion string
Params string
BranchName string
PreVersionId int64
PreVersionName string
FlavorName string
VersionCount int
@@ -297,7 +298,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error
return nil
}

func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionReq, jobId string) (err error) {
func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) {
jobResult, err := createTrainJobVersion(models.CreateTrainJobVersionParams{
Description: req.Description,
Config: models.TrainJobVersionConfig{
@@ -327,6 +328,19 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR
return err
}

repo := ctx.Repo.Repository
VersionTaskList, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
RepoID: repo.ID,
Type: models.TypeCloudBrainTwo,
JobType: string(models.JobTypeTrain),
JobID: strconv.FormatInt(jobResult.JobID, 10),
})
if err != nil {
ctx.ServerError("Cloudbrain", err)
return err
}
//将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount

err = models.CreateCloudbrain(&models.Cloudbrain{
Status: TransTrainJobStatus(jobResult.Status),
UserID: ctx.User.ID,
@@ -340,6 +354,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR
Uuid: req.Uuid,
DatasetName: attach.Name,
CommitID: req.CommitID,
IsLatestVersion: req.IsLatestVersion,
PreVersionName: req.PreVersionName,
ComputeResource: ComputeResource,
EngineID: req.EngineID,
@@ -355,49 +370,18 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR
WorkServerNumber: req.WorkServerNumber,
FlavorName: req.FlavorName,
EngineName: req.EngineName,
TotalVersionCount: req.TotalVersionCount,
TotalVersionCount: VersionTaskList[0].TotalVersionCount + 1,
VersionCount: VersionListCount + 1,
})
if err != nil {
log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error())
return err
}

repo := ctx.Repo.Repository
page := ctx.QueryInt("page")
if page <= 0 {
page = 1
}
_, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
ListOptions: models.ListOptions{
Page: page,
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
Type: models.TypeCloudBrainTwo,
JobType: string(models.JobTypeTrain),
JobID: strconv.FormatInt(jobResult.JobID, 10),
})
if err != nil {
ctx.ServerError("Cloudbrain", err)
return err
}

//将训练任务的上一版本的isLatestVersion设置为"0"
latestTask, err := models.GetCloudbrainByJobIDAndIsLatestVersion(strconv.FormatInt(jobResult.JobID, 10), IsLatestVersion)
if err != nil {
ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err)
return err
}
err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion, req.TotalVersionCount)
if err != nil {
ctx.ServerError("UpdateJobVersionCount failed", err)
return err
}

//将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount
err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), jobResult.VersionName, VersionListCount, IsLatestVersion, req.TotalVersionCount)
err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount)
if err != nil {
ctx.ServerError("UpdateJobVersionCount failed", err)
ctx.ServerError("Update IsLatestVersion failed", err)
return err
}



+ 11
- 24
routers/api/v1/repo/modelarts.go View File

@@ -14,7 +14,6 @@ import (
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/modelarts"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
)

@@ -112,7 +111,7 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) {
job.TrainJobDuration = "00:00:00"
}

err = models.UpdateJob(job)
err = models.UpdateTrainJobVersion(job)
if err != nil {
log.Error("UpdateJob failed:", err)
}
@@ -213,31 +212,24 @@ func DelTrainJobVersion(ctx *context.APIContext) {
return
}

_, err = modelarts.DelTrainJobVersion(jobID, strconv.FormatInt(task.VersionID, 10))
//删除数据库记录
err = models.DeleteJobVersion(task)
if err != nil {
log.Error("DelTrainJobVersion(%s) failed:%v", task.JobName, err.Error())
ctx.ServerError("DeleteJobVersion failed", err)
ctx.NotFound(err)
return
}
err = models.DeleteJobVersion(task)
//删除modelarts上的记录
_, err = modelarts.DelTrainJobVersion(jobID, strconv.FormatInt(task.VersionID, 10))
if err != nil {
ctx.ServerError("DeleteJobVersion failed", err)
log.Error("DelTrainJobVersion(%s) failed:%v", task.JobName, err.Error())
ctx.NotFound(err)
return
}

//获取删除后的版本数量
repo := ctx.Repo.Repository
page := ctx.QueryInt("page")
if page <= 0 {
page = 1
}
VersionListTasks, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
ListOptions: models.ListOptions{
Page: page,
PageSize: setting.UI.IssuePagingNum,
},
VersionTaskList, VersionListCount, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
RepoID: repo.ID,
Type: models.TypeCloudBrainTwo,
JobType: string(models.JobTypeTrain),
@@ -248,20 +240,15 @@ func DelTrainJobVersion(ctx *context.APIContext) {
return
}

//判断当前的任务是否是最新版本,若是,将排序后的第一个版本设置为最新版本,若不是,最新版本不变,更改最新版本的版本数。
// 判断当前删掉的任务是否是最新版本,若是,将排序后的TotalVersionCount置为删掉的最新版本的TotalVersionCount,若不是,按时间排序后的版本列表的第一个版本设置为最新版本,TotalVersionCount不变
if task.IsLatestVersion == modelarts.IsLatestVersion {
err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, VersionListTasks[0].Cloudbrain.VersionName, VersionListCount, modelarts.IsLatestVersion, VersionListTasks[0].Cloudbrain.TotalVersionCount)
err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, VersionTaskList[0].Cloudbrain.VersionName, VersionListCount, modelarts.IsLatestVersion, task.TotalVersionCount)
if err != nil {
ctx.ServerError("UpdateJobVersionCount failed", err)
return
}
} else {
latestTask, err := models.GetCloudbrainByJobIDAndIsLatestVersion(jobID, modelarts.IsLatestVersion)
if err != nil {
ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err)
return
}
err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, latestTask.VersionName, VersionListCount, modelarts.IsLatestVersion, VersionListTasks[0].Cloudbrain.TotalVersionCount)
err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, VersionTaskList[0].VersionName, VersionListCount, modelarts.IsLatestVersion, VersionTaskList[0].Cloudbrain.TotalVersionCount)
if err != nil {
ctx.ServerError("UpdateJobVersionCount failed", err)
return


+ 6
- 59
routers/repo/modelarts.go View File

@@ -34,7 +34,6 @@ const (
tplModelArtsTrainJobIndex base.TplName = "repo/modelarts/trainjob/index"
tplModelArtsTrainJobNew base.TplName = "repo/modelarts/trainjob/new"
tplModelArtsTrainJobShow base.TplName = "repo/modelarts/trainjob/show"
tplModelArtsTrainJobShowModels base.TplName = "repo/modelarts/trainjob/models/index"
tplModelArtsTrainJobVersionNew base.TplName = "repo/modelarts/trainjob/version_new"
)

@@ -481,8 +480,6 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error {
func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) {
ctx.Data["PageIsTrainJob"] = true
VersionOutputPath := modelarts.GetVersionOutputPathByTotalVersionCount(modelarts.TotalVersionCount)
// StringTotalVersionCount := fmt.Sprintf("%04d", modelarts.TotalVersionCount)
// VersionOutputPath := "V" + StringTotalVersionCount
jobName := form.JobName
uuid := form.Attachment
description := form.Description
@@ -539,15 +536,12 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
ctx.Data["params"] = form.Params
ctx.Data["branch_name"] = branch_name
trainJobNewDataPrepare(ctx)
// ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form)
ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobNew, &form)
// ctx.RenderWithErr(err, tplModelArtsTrainJobNew, &form)
return
}

//todo: upload code (send to file_server todo this work?)
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil {
// if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
trainJobNewDataPrepare(ctx)
ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form)
@@ -681,8 +675,6 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err)
return
}
// StringTotalVersionCount := fmt.Sprintf("%04d", latestTask.TotalVersionCount+1)
// VersionOutputPath := "V" + StringTotalVersionCount
VersionOutputPath := modelarts.GetVersionOutputPathByTotalVersionCount(latestTask.TotalVersionCount + 1)

jobName := form.JobName
@@ -705,6 +697,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
PreVersionName := form.VersionName
FlavorName := form.FlavorName
EngineName := form.EngineName
isLatestVersion := modelarts.IsLatestVersion

if err := paramCheckCreateTrainJob(form); err != nil {
log.Error("paramCheckCreateTrainJob failed:(%v)", err)
@@ -738,9 +731,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
ctx.Data["datasetName"] = attach.Name
ctx.Data["params"] = form.Params
ctx.Data["branch_name"] = branch_name
// ctx.RenderWithErr("Failed to clone repository", tplModelArtsTrainJobNew, &form)
ctx.RenderWithErr("创建任务失败,任务名称已存在!", tplModelArtsTrainJobVersionNew, &form)
// ctx.RenderWithErr(err, tplModelArtsTrainJobNew, &form)
return
}

@@ -842,7 +833,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form)
return
}
req := &modelarts.GenerateTrainJobVersionReq{
req := &modelarts.GenerateTrainJobReq{
JobName: task.JobName,
DataUrl: dataPath,
Description: description,
@@ -852,6 +843,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
TrainUrl: outputObsPath,
FlavorCode: flavorCode,
WorkServerNumber: workServerNumber,
IsLatestVersion: isLatestVersion,
EngineID: int64(engineID),
LogUrl: logObsPath,
PoolID: poolID,
@@ -866,6 +858,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ
PreVersionName: PreVersionName,
TotalVersionCount: latestTask.TotalVersionCount + 1,
}

err = modelarts.GenerateTrainJobVersion(ctx, req, jobID)
if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error())
@@ -1088,16 +1081,8 @@ func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *model
func TrainJobDel(ctx *context.Context) {
var jobID = ctx.Params(":jobid")
repo := ctx.Repo.Repository
page := ctx.QueryInt("page")

if page <= 0 {
page = 1
}
VersionListTasks, _, err := models.CloudbrainsVersionList(&models.CloudbrainsOptions{
ListOptions: models.ListOptions{
Page: page,
PageSize: setting.UI.IssuePagingNum,
},
RepoID: repo.ID,
Type: models.TypeCloudBrainTwo,
JobType: string(models.JobTypeTrain),
@@ -1107,6 +1092,7 @@ func TrainJobDel(ctx *context.Context) {
ctx.ServerError("get VersionListTasks failed", err)
return
}
//删除数据库Cloudbrain表的记录
for _, task := range VersionListTasks {
err = models.DeleteJobVersion(&task.Cloudbrain)
if err != nil {
@@ -1114,6 +1100,7 @@ func TrainJobDel(ctx *context.Context) {
return
}
}
//删除modelarts上的任务记录
_, err = modelarts.DelTrainJob(jobID)
if err != nil {
log.Error("DelTrainJob(%s) failed:%v", jobID, err.Error())
@@ -1206,43 +1193,3 @@ func getConfigList(perPage, page int, sortBy, order, searchContent, configType s

return list, nil
}

func TrainJobShowModels(ctx *context.Context) {
ctx.Data["PageIsCloudBrain"] = true

jobID := ctx.Params(":jobid")
parentDir := ctx.Query("parentDir")
dirArray := strings.Split(parentDir, "/")
task, err := models.GetCloudbrainByJobID(jobID)
if err != nil {
log.Error("no such job!", ctx.Data["msgID"])
ctx.ServerError("no such job:", err)
return
}

models, err := storage.GetObsListObject(task.JobName, parentDir)
if err != nil {
log.Info("get TrainJobListModel failed:", err)
ctx.ServerError("GetObsListObject:", err)
return
}

ctx.Data["Path"] = dirArray
ctx.Data["Dirs"] = models
ctx.Data["task"] = task
ctx.Data["JobID"] = jobID
ctx.HTML(200, tplModelArtsTrainJobShowModels)
}

func TrainJobDownloadModel(ctx *context.Context) {
parentDir := ctx.Query("parentDir")
fileName := ctx.Query("fileName")
jobName := ctx.Query("jobName")
url, err := storage.GetObsCreateSignedUrl(jobName, parentDir, fileName)
if err != nil {
log.Error("GetObsCreateSignedUrl failed: %v", err.Error(), ctx.Data["msgID"])
ctx.ServerError("GetObsCreateSignedUrl", err)
return
}
http.Redirect(ctx.Resp, ctx.Req.Request, url, http.StatusMovedPermanently)
}

Loading…
Cancel
Save