| @@ -80,18 +80,19 @@ type Cloudbrain struct { | |||
| ComputeResource string | |||
| EngineID int64 | |||
| TrainUrl string | |||
| BranchName string | |||
| Parameters string | |||
| BootFile string | |||
| DataUrl string | |||
| LogUrl string | |||
| PreVersionId int64 | |||
| FlavorCode string | |||
| Description string | |||
| WorkServerNumber int | |||
| FlavorName string | |||
| EngineName string | |||
| TrainUrl string | |||
| BranchName string | |||
| Parameters string | |||
| BootFile string | |||
| DataUrl string | |||
| LogUrl string | |||
| PreVersionId int64 | |||
| FlavorCode string | |||
| Description string | |||
| WorkServerNumber int | |||
| FlavorName string | |||
| EngineName string | |||
| TotalVersionCount int | |||
| User *User `xorm:"-"` | |||
| Repo *Repository `xorm:"-"` | |||
| @@ -1112,9 +1113,9 @@ func SetTrainJobStatusByJobID(jobID string, status string, duration int64, train | |||
| return | |||
| } | |||
| func SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID string, versionName string, versionCount int, isLatestVersion string) (err error) { | |||
| cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion} | |||
| _, err = x.Cols("version_Count", "is_latest_version").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb) | |||
| func SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID string, versionName string, versionCount int, isLatestVersion string, totalVersionCount int) (err error) { | |||
| cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion, TotalVersionCount: totalVersionCount} | |||
| _, err = x.Cols("version_Count", "is_latest_version", "total_version_count").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb) | |||
| return | |||
| } | |||
| @@ -35,24 +35,24 @@ const ( | |||
| // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," + | |||
| // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" + | |||
| // "]}" | |||
| CodePath = "/code/" | |||
| OutputPath = "/output/" | |||
| LogPath = "/log/" | |||
| JobPath = "/job/" | |||
| OrderDesc = "desc" //向下查询 | |||
| OrderAsc = "asc" //向上查询 | |||
| Lines = 500 | |||
| TrainUrl = "train_url" | |||
| DataUrl = "data_url" | |||
| PerPage = 10 | |||
| IsLatestVersion = "1" | |||
| NotLatestVersion = "0" | |||
| ComputeResource = "NPU" | |||
| InitFatherVersionName = "V0001" | |||
| VersionCount = 1 | |||
| SortByCreateTime = "create_time" | |||
| ConfigTypeCustom = "custom" | |||
| CodePath = "/code/" | |||
| OutputPath = "/output/" | |||
| LogPath = "/log/" | |||
| JobPath = "/job/" | |||
| OrderDesc = "desc" //向下查询 | |||
| OrderAsc = "asc" //向上查询 | |||
| Lines = 500 | |||
| TrainUrl = "train_url" | |||
| DataUrl = "data_url" | |||
| PerPage = 10 | |||
| IsLatestVersion = "1" | |||
| NotLatestVersion = "0" | |||
| ComputeResource = "NPU" | |||
| VersionCount = 1 | |||
| SortByCreateTime = "create_time" | |||
| ConfigTypeCustom = "custom" | |||
| TotalVersionCount = 1 | |||
| ) | |||
| var ( | |||
| @@ -83,6 +83,7 @@ type GenerateTrainJobReq struct { | |||
| FlavorName string | |||
| VersionCount int | |||
| EngineName string | |||
| TotalVersionCount int | |||
| } | |||
| type GenerateTrainJobVersionReq struct { | |||
| @@ -107,6 +108,7 @@ type GenerateTrainJobVersionReq struct { | |||
| FlavorName string | |||
| EngineName string | |||
| FatherVersionName string | |||
| TotalVersionCount int | |||
| } | |||
| type VersionInfo struct { | |||
| @@ -256,22 +258,22 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||
| } | |||
| err = models.CreateCloudbrain(&models.Cloudbrain{ | |||
| Status: TransTrainJobStatus(jobResult.Status), | |||
| UserID: ctx.User.ID, | |||
| RepoID: ctx.Repo.Repository.ID, | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| JobName: req.JobName, | |||
| JobType: string(models.JobTypeTrain), | |||
| Type: models.TypeCloudBrainTwo, | |||
| VersionID: jobResult.VersionID, | |||
| VersionName: jobResult.VersionName, | |||
| Uuid: req.Uuid, | |||
| DatasetName: attach.Name, | |||
| CommitID: req.CommitID, | |||
| IsLatestVersion: req.IsLatestVersion, | |||
| ComputeResource: ComputeResource, | |||
| EngineID: req.EngineID, | |||
| FatherVersionName: req.FatherVersionName, | |||
| Status: TransTrainJobStatus(jobResult.Status), | |||
| UserID: ctx.User.ID, | |||
| RepoID: ctx.Repo.Repository.ID, | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| JobName: req.JobName, | |||
| JobType: string(models.JobTypeTrain), | |||
| Type: models.TypeCloudBrainTwo, | |||
| VersionID: jobResult.VersionID, | |||
| VersionName: jobResult.VersionName, | |||
| Uuid: req.Uuid, | |||
| DatasetName: attach.Name, | |||
| CommitID: req.CommitID, | |||
| IsLatestVersion: req.IsLatestVersion, | |||
| ComputeResource: ComputeResource, | |||
| EngineID: req.EngineID, | |||
| // FatherVersionName: req.FatherVersionName, | |||
| TrainUrl: req.TrainUrl, | |||
| BranchName: req.BranchName, | |||
| Parameters: req.Params, | |||
| @@ -284,6 +286,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||
| FlavorName: req.FlavorName, | |||
| EngineName: req.EngineName, | |||
| VersionCount: req.VersionCount, | |||
| TotalVersionCount: req.TotalVersionCount, | |||
| }) | |||
| if err != nil { | |||
| @@ -352,6 +355,7 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR | |||
| WorkServerNumber: req.WorkServerNumber, | |||
| FlavorName: req.FlavorName, | |||
| EngineName: req.EngineName, | |||
| TotalVersionCount: req.TotalVersionCount, | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | |||
| @@ -384,14 +388,14 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR | |||
| ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) | |||
| return err | |||
| } | |||
| err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion) | |||
| err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion, req.TotalVersionCount) | |||
| if err != nil { | |||
| ctx.ServerError("UpdateJobVersionCount failed", err) | |||
| return err | |||
| } | |||
| //将当前版本的isLatestVersion设置为"1"和任务数量更新 | |||
| err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), jobResult.VersionName, VersionListCount, IsLatestVersion) | |||
| //将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount | |||
| err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), jobResult.VersionName, VersionListCount, IsLatestVersion, req.TotalVersionCount) | |||
| if err != nil { | |||
| ctx.ServerError("UpdateJobVersionCount failed", err) | |||
| return err | |||
| @@ -267,7 +267,7 @@ func DelTrainJobVersion(ctx *context.APIContext) { | |||
| //判断当前的任务是否是最新版本,若是,将排序后的第一个版本设置为最新版本,若不是,最新版本不变,更改最新版本的版本数。 | |||
| if task.IsLatestVersion == modelarts.IsLatestVersion { | |||
| err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, VersionListTasks[0].Cloudbrain.VersionName, VersionListCount, modelarts.IsLatestVersion) | |||
| err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, VersionListTasks[0].Cloudbrain.VersionName, VersionListCount, modelarts.IsLatestVersion, VersionListTasks[0].Cloudbrain.TotalVersionCount) | |||
| if err != nil { | |||
| ctx.ServerError("UpdateJobVersionCount failed", err) | |||
| return | |||
| @@ -278,7 +278,7 @@ func DelTrainJobVersion(ctx *context.APIContext) { | |||
| ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) | |||
| return | |||
| } | |||
| err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, latestTask.VersionName, VersionListCount, modelarts.IsLatestVersion) | |||
| err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(jobID, latestTask.VersionName, VersionListCount, modelarts.IsLatestVersion, VersionListTasks[0].Cloudbrain.TotalVersionCount) | |||
| if err != nil { | |||
| ctx.ServerError("UpdateJobVersionCount failed", err) | |||
| return | |||
| @@ -491,6 +491,7 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { | |||
| func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { | |||
| ctx.Data["PageIsTrainJob"] = true | |||
| VersionOutputPath := "V" + strconv.Itoa(modelarts.TotalVersionCount) | |||
| jobName := form.JobName | |||
| uuid := form.Attachment | |||
| description := form.Description | |||
| @@ -504,8 +505,8 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| repo := ctx.Repo.Repository | |||
| codeLocalPath := setting.JobPath + jobName + modelarts.CodePath | |||
| codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath | |||
| outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath | |||
| logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath | |||
| outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" | |||
| logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath + VersionOutputPath + "/" | |||
| dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" | |||
| branch_name := form.BranchName | |||
| isLatestVersion := modelarts.IsLatestVersion | |||
| @@ -554,14 +555,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| } | |||
| //todo: upload code (send to file_server todo this work?) | |||
| if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { | |||
| if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { | |||
| log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) | |||
| trainJobNewDataPrepare(ctx) | |||
| ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form) | |||
| return | |||
| } | |||
| if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { | |||
| if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { | |||
| log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) | |||
| trainJobNewDataPrepare(ctx) | |||
| ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form) | |||
| @@ -640,28 +641,29 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| } | |||
| req := &modelarts.GenerateTrainJobReq{ | |||
| JobName: jobName, | |||
| DataUrl: dataPath, | |||
| Description: description, | |||
| CodeObsPath: codeObsPath, | |||
| BootFileUrl: codeObsPath + bootFile, | |||
| BootFile: bootFile, | |||
| TrainUrl: outputObsPath, | |||
| FlavorCode: flavorCode, | |||
| WorkServerNumber: workServerNumber, | |||
| EngineID: int64(engineID), | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| Uuid: uuid, | |||
| Parameters: parameters.Parameter, | |||
| CommitID: commitID, | |||
| IsLatestVersion: isLatestVersion, | |||
| BranchName: branch_name, | |||
| Params: form.Params, | |||
| FatherVersionName: modelarts.InitFatherVersionName, | |||
| JobName: jobName, | |||
| DataUrl: dataPath, | |||
| Description: description, | |||
| CodeObsPath: codeObsPath, | |||
| BootFileUrl: codeObsPath + bootFile, | |||
| BootFile: bootFile, | |||
| TrainUrl: outputObsPath, | |||
| FlavorCode: flavorCode, | |||
| WorkServerNumber: workServerNumber, | |||
| EngineID: int64(engineID), | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| Uuid: uuid, | |||
| Parameters: parameters.Parameter, | |||
| CommitID: commitID, | |||
| IsLatestVersion: isLatestVersion, | |||
| BranchName: branch_name, | |||
| Params: form.Params, | |||
| // FatherVersionName: InitVersionName, | |||
| FlavorName: FlavorName, | |||
| EngineName: EngineName, | |||
| VersionCount: VersionCount, | |||
| TotalVersionCount: modelarts.TotalVersionCount, | |||
| } | |||
| err = modelarts.GenerateTrainJob(ctx, req) | |||
| @@ -683,6 +685,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||
| ctx.Data["PageIsTrainJob"] = true | |||
| var jobID = ctx.Params(":jobid") | |||
| latestTask, err := models.GetCloudbrainByJobIDAndIsLatestVersion(jobID, modelarts.IsLatestVersion) | |||
| if err != nil { | |||
| ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) | |||
| return | |||
| } | |||
| VersionOutputPath := "V" + strconv.Itoa(latestTask.TotalVersionCount+1) | |||
| jobName := form.JobName | |||
| uuid := form.Attachment | |||
| description := form.Description | |||
| @@ -695,8 +705,8 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||
| isSaveParam := form.IsSaveParam | |||
| repo := ctx.Repo.Repository | |||
| codeLocalPath := setting.JobPath + jobName + modelarts.CodePath | |||
| codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath | |||
| outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath | |||
| codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath + VersionOutputPath + "/" | |||
| outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + VersionOutputPath + "/" | |||
| logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath | |||
| dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + uuid + "/" | |||
| branch_name := form.BranchName | |||
| @@ -743,14 +753,14 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||
| } | |||
| //todo: upload code (send to file_server todo this work?) | |||
| if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { | |||
| if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath + VersionOutputPath + "/"); err != nil { | |||
| log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { | |||
| if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { | |||
| log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobVersionNew, &form) | |||
| @@ -861,6 +871,7 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||
| FlavorName: FlavorName, | |||
| EngineName: EngineName, | |||
| FatherVersionName: fatherVersionName, | |||
| TotalVersionCount: latestTask.TotalVersionCount + 1, | |||
| } | |||
| err = modelarts.GenerateTrainJobVersion(ctx, req, jobID) | |||
| if err != nil { | |||