| @@ -69,8 +69,8 @@ type Cloudbrain struct { | |||
| CanDel bool `xorm:"-"` | |||
| Type int `xorm:"INDEX DEFAULT 0"` | |||
| VersionID int64 `xorm:"INDEX DEFAULT 0"` | |||
| VersionName string | |||
| VersionID int64 `xorm:"INDEX DEFAULT 0"` | |||
| VersionName string `xorm:"INDEX"` | |||
| Uuid string | |||
| DatasetName string | |||
| VersionCount int64 `xorm:"INDEX DEFAULT 1"` | |||
| @@ -80,6 +80,17 @@ type Cloudbrain struct { | |||
| ComputeResource string | |||
| EngineID int64 | |||
| TrainUrl string | |||
| BranchName string | |||
| Parameters string | |||
| BootFile string | |||
| DataUrl string | |||
| LogUrl string | |||
| PreVersionId int64 | |||
| FlavorCode string | |||
| Description string | |||
| WorkServerNumber int | |||
| User *User `xorm:"-"` | |||
| Repo *Repository `xorm:"-"` | |||
| } | |||
| @@ -35,19 +35,20 @@ const ( | |||
| // "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," + | |||
| // "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" + | |||
| // "]}" | |||
| CodePath = "/code/" | |||
| OutputPath = "/output/" | |||
| LogPath = "/log/" | |||
| JobPath = "/job/" | |||
| OrderDesc = "desc" //向下查询 | |||
| OrderAsc = "asc" //向上查询 | |||
| Lines = 20 | |||
| TrainUrl = "train_url" | |||
| DataUrl = "data_url" | |||
| PerPage = 10 | |||
| IsLatestVersion = "1" | |||
| NotLatestVersion = "0" | |||
| ComputeResource = "NPU" | |||
| CodePath = "/code/" | |||
| OutputPath = "/output/" | |||
| LogPath = "/log/" | |||
| JobPath = "/job/" | |||
| OrderDesc = "desc" //向下查询 | |||
| OrderAsc = "asc" //向上查询 | |||
| Lines = 20 | |||
| TrainUrl = "train_url" | |||
| DataUrl = "data_url" | |||
| PerPage = 10 | |||
| IsLatestVersion = "1" | |||
| NotLatestVersion = "0" | |||
| ComputeResource = "NPU" | |||
| InitFatherVersionName = "V0001" | |||
| SortByCreateTime = "create_time" | |||
| ConfigTypeCustom = "custom" | |||
| @@ -59,21 +60,24 @@ var ( | |||
| ) | |||
| type GenerateTrainJobReq struct { | |||
| JobName string | |||
| Uuid string | |||
| Description string | |||
| CodeObsPath string | |||
| BootFile string | |||
| DataUrl string | |||
| TrainUrl string | |||
| FlavorCode string | |||
| LogUrl string | |||
| PoolID string | |||
| WorkServerNumber int | |||
| EngineID int64 | |||
| Parameters []models.Parameter | |||
| CommitID string | |||
| IsLatestVersion string | |||
| JobName string | |||
| Uuid string | |||
| Description string | |||
| CodeObsPath string | |||
| BootFile string | |||
| DataUrl string | |||
| TrainUrl string | |||
| FlavorCode string | |||
| LogUrl string | |||
| PoolID string | |||
| WorkServerNumber int | |||
| EngineID int64 | |||
| Parameters []models.Parameter | |||
| CommitID string | |||
| IsLatestVersion string | |||
| Params string | |||
| BranchName string | |||
| FatherVersionName string | |||
| } | |||
| type GenerateTrainJobVersionReq struct { | |||
| @@ -90,8 +94,10 @@ type GenerateTrainJobVersionReq struct { | |||
| WorkServerNumber int | |||
| EngineID int64 | |||
| Parameters []models.Parameter | |||
| Params string | |||
| PreVersionId int64 | |||
| CommitID string | |||
| BranchName string | |||
| } | |||
| type VersionInfo struct { | |||
| @@ -193,7 +199,7 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description string) error | |||
| return nil | |||
| } | |||
| func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobresult *models.CreateTrainJobResult, err error) { | |||
| func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { | |||
| jobResult, err := createTrainJob(models.CreateTrainJobParams{ | |||
| JobName: req.JobName, | |||
| Description: req.Description, | |||
| @@ -215,42 +221,52 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (jobresult | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateJob failed: %v", err.Error()) | |||
| return nil, err | |||
| return err | |||
| } | |||
| attach, err := models.GetAttachmentByUUID(req.Uuid) | |||
| if err != nil { | |||
| log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) | |||
| return nil, err | |||
| return err | |||
| } | |||
| err = models.CreateCloudbrain(&models.Cloudbrain{ | |||
| Status: TransTrainJobStatus(jobResult.Status), | |||
| UserID: ctx.User.ID, | |||
| RepoID: ctx.Repo.Repository.ID, | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| JobName: req.JobName, | |||
| JobType: string(models.JobTypeTrain), | |||
| Type: models.TypeCloudBrainTwo, | |||
| VersionID: jobResult.VersionID, | |||
| VersionName: jobResult.VersionName, | |||
| Uuid: req.Uuid, | |||
| DatasetName: attach.Name, | |||
| CommitID: req.CommitID, | |||
| IsLatestVersion: req.IsLatestVersion, | |||
| ComputeResource: ComputeResource, | |||
| EngineID: req.EngineID, | |||
| Status: TransTrainJobStatus(jobResult.Status), | |||
| UserID: ctx.User.ID, | |||
| RepoID: ctx.Repo.Repository.ID, | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| JobName: req.JobName, | |||
| JobType: string(models.JobTypeTrain), | |||
| Type: models.TypeCloudBrainTwo, | |||
| VersionID: jobResult.VersionID, | |||
| VersionName: jobResult.VersionName, | |||
| Uuid: req.Uuid, | |||
| DatasetName: attach.Name, | |||
| CommitID: req.CommitID, | |||
| IsLatestVersion: req.IsLatestVersion, | |||
| ComputeResource: ComputeResource, | |||
| EngineID: req.EngineID, | |||
| FatherVersionName: req.FatherVersionName, | |||
| TrainUrl: req.TrainUrl, | |||
| BranchName: req.BranchName, | |||
| Parameters: req.Params, | |||
| BootFile: req.BootFile, | |||
| DataUrl: req.DataUrl, | |||
| LogUrl: req.LogUrl, | |||
| FlavorCode: req.FlavorCode, | |||
| Description: req.Description, | |||
| WorkServerNumber: req.WorkServerNumber, | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | |||
| return nil, err | |||
| return err | |||
| } | |||
| return jobResult, nil | |||
| return nil | |||
| } | |||
| func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionReq, jobId string, fatherVersionName string) (jobresult *models.CreateTrainJobResult, err error) { | |||
| func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionReq, jobId string, fatherVersionName string) (err error) { | |||
| jobResult, err := createTrainJobVersion(models.CreateTrainJobVersionParams{ | |||
| Description: req.Description, | |||
| Config: models.TrainJobVersionConfig{ | |||
| @@ -271,13 +287,13 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR | |||
| }, jobId) | |||
| if err != nil { | |||
| log.Error("CreateJob failed: %v", err.Error()) | |||
| return nil, err | |||
| return err | |||
| } | |||
| attach, err := models.GetAttachmentByUUID(req.Uuid) | |||
| if err != nil { | |||
| log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) | |||
| return nil, err | |||
| return err | |||
| } | |||
| err = models.CreateCloudbrain(&models.Cloudbrain{ | |||
| @@ -296,10 +312,20 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR | |||
| FatherVersionName: fatherVersionName, | |||
| ComputeResource: ComputeResource, | |||
| EngineID: req.EngineID, | |||
| TrainUrl: req.TrainUrl, | |||
| BranchName: req.BranchName, | |||
| Parameters: req.Params, | |||
| BootFile: req.BootFile, | |||
| DataUrl: req.DataUrl, | |||
| LogUrl: req.LogUrl, | |||
| PreVersionId: req.PreVersionId, | |||
| FlavorCode: req.FlavorCode, | |||
| Description: req.Description, | |||
| WorkServerNumber: req.WorkServerNumber, | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | |||
| return nil, err | |||
| return err | |||
| } | |||
| repo := ctx.Repo.Repository | |||
| @@ -319,38 +345,29 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobVersionR | |||
| }) | |||
| if err != nil { | |||
| ctx.ServerError("Cloudbrain", err) | |||
| return nil, err | |||
| return err | |||
| } | |||
| //将训练任务的上一版本的isLatestVersion设置为"0" | |||
| latestTask, err := models.GetCloudbrainByJobIDAndIsLatestVersion(strconv.FormatInt(jobResult.JobID, 10), IsLatestVersion) | |||
| if err != nil { | |||
| ctx.ServerError("GetCloudbrainByJobIDAndIsLatestVersion faild:", err) | |||
| return nil, err | |||
| return err | |||
| } | |||
| err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), latestTask.VersionName, VersionListCount, NotLatestVersion) | |||
| if err != nil { | |||
| ctx.ServerError("UpdateJobVersionCount failed", err) | |||
| return nil, err | |||
| return err | |||
| } | |||
| // lastVersionNum := jobResult.VersionName[1:] | |||
| // lastVersionNumToInt64, err := strconv.ParseInt(lastVersionNum, 10, 64) | |||
| // if err != nil { | |||
| // ctx.ServerError("lastVersionNumToInt64 faild:", err) | |||
| // return nil | |||
| // } | |||
| // lastVersionName := "V" + strconv.FormatInt(lastVersionNumToInt64-1, 10) | |||
| //将训练任务的本版本的isLatestVersion设置为"0" | |||
| //将当前版本的isLatestVersion和任务数量更新 | |||
| //将当前版本的isLatestVersion设置为"1"和任务数量更新 | |||
| err = models.SetVersionCountAndLatestVersionByJobIDAndVersionName(strconv.FormatInt(jobResult.JobID, 10), jobResult.VersionName, VersionListCount, IsLatestVersion) | |||
| if err != nil { | |||
| ctx.ServerError("UpdateJobVersionCount failed", err) | |||
| return nil, err | |||
| return err | |||
| } | |||
| return jobResult, err | |||
| return err | |||
| } | |||
| func TransTrainJobStatus(status int) string { | |||
| @@ -833,24 +833,27 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| } | |||
| req := &modelarts.GenerateTrainJobReq{ | |||
| JobName: jobName, | |||
| DataUrl: dataPath, | |||
| Description: description, | |||
| CodeObsPath: codeObsPath, | |||
| BootFile: codeObsPath + bootFile, | |||
| TrainUrl: outputObsPath, | |||
| FlavorCode: flavorCode, | |||
| WorkServerNumber: workServerNumber, | |||
| EngineID: int64(engineID), | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| Uuid: uuid, | |||
| Parameters: parameters.Parameter, | |||
| CommitID: commitID, | |||
| IsLatestVersion: isLatestVersion, | |||
| } | |||
| jobResult, err := modelarts.GenerateTrainJob(ctx, req) | |||
| JobName: jobName, | |||
| DataUrl: dataPath, | |||
| Description: description, | |||
| CodeObsPath: codeObsPath, | |||
| BootFile: codeObsPath + bootFile, | |||
| TrainUrl: outputObsPath, | |||
| FlavorCode: flavorCode, | |||
| WorkServerNumber: workServerNumber, | |||
| EngineID: int64(engineID), | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| Uuid: uuid, | |||
| Parameters: parameters.Parameter, | |||
| CommitID: commitID, | |||
| IsLatestVersion: isLatestVersion, | |||
| BranchName: branch_name, | |||
| Params: form.Params, | |||
| FatherVersionName: modelarts.InitFatherVersionName, | |||
| } | |||
| err = modelarts.GenerateTrainJob(ctx, req) | |||
| if err != nil { | |||
| log.Error("GenerateTrainJob failed:%v", err.Error()) | |||
| trainJobNewDataPrepare(ctx) | |||
| @@ -862,34 +865,34 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobNew, &form) | |||
| return | |||
| } | |||
| // 保存openi创建训练任务界面的参数 | |||
| err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ | |||
| JobName: req.JobName, | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| VersionName: jobResult.VersionName, | |||
| ResourcePools: form.PoolID, | |||
| EngineVersions: form.EngineID, | |||
| FlavorInfos: form.Flavor, | |||
| TrainUrl: outputObsPath, | |||
| BootFile: form.BootFile, | |||
| Uuid: form.Attachment, | |||
| DatasetName: attach.Name, | |||
| Params: form.Params, | |||
| BranchName: branch_name, | |||
| }) | |||
| // // 保存openi创建训练任务界面的参数 | |||
| // err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ | |||
| if err != nil { | |||
| log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.Data["bootFile"] = form.BootFile | |||
| ctx.Data["uuid"] = form.Attachment | |||
| ctx.Data["datasetName"] = attach.Name | |||
| ctx.Data["params"] = form.Params | |||
| ctx.Data["branch_name"] = branch_name | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| // JobName: req.JobName, | |||
| // JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| // VersionName: jobResult.VersionName, | |||
| // ResourcePools: form.PoolID, | |||
| // EngineVersions: form.EngineID, | |||
| // FlavorInfos: form.Flavor, | |||
| // TrainUrl: outputObsPath, | |||
| // BootFile: form.BootFile, | |||
| // Uuid: form.Attachment, | |||
| // DatasetName: attach.Name, | |||
| // Params: form.Params, | |||
| // BranchName: branch_name, | |||
| // }) | |||
| // if err != nil { | |||
| // log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) | |||
| // trainJobNewVersionDataPrepare(ctx) | |||
| // ctx.Data["bootFile"] = form.BootFile | |||
| // ctx.Data["uuid"] = form.Attachment | |||
| // ctx.Data["datasetName"] = attach.Name | |||
| // ctx.Data["params"] = form.Params | |||
| // ctx.Data["branch_name"] = branch_name | |||
| // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||
| // return | |||
| // } | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | |||
| } | |||
| @@ -1063,11 +1066,12 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| Uuid: uuid, | |||
| Parameters: parameters.Parameter, | |||
| Params: form.Params, | |||
| PreVersionId: task.VersionID, | |||
| CommitID: commitID, | |||
| BranchName: branch_name, | |||
| } | |||
| jobResult, err := modelarts.GenerateTrainJobVersion(ctx, req, jobID, fatherVersionName) | |||
| err = modelarts.GenerateTrainJobVersion(ctx, req, jobID, fatherVersionName) | |||
| if err != nil { | |||
| log.Error("GenerateTrainJob failed:%v", err.Error()) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| @@ -1079,33 +1083,33 @@ func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJ | |||
| return | |||
| } | |||
| // 保存openi创建训练任务界面的参数 | |||
| err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ | |||
| JobName: req.JobName, | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| VersionName: jobResult.VersionName, | |||
| ResourcePools: form.PoolID, | |||
| EngineVersions: form.EngineID, | |||
| FlavorInfos: form.Flavor, | |||
| TrainUrl: outputObsPath, | |||
| BootFile: form.BootFile, | |||
| Uuid: form.Attachment, | |||
| DatasetName: attach.Name, | |||
| Params: form.Params, | |||
| BranchName: branch_name, | |||
| }) | |||
| // err = models.CreateTrainjobConfigDetail(&models.TrainjobConfigDetail{ | |||
| // JobName: req.JobName, | |||
| // JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| // VersionName: jobResult.VersionName, | |||
| // ResourcePools: form.PoolID, | |||
| // EngineVersions: form.EngineID, | |||
| // FlavorInfos: form.Flavor, | |||
| // TrainUrl: outputObsPath, | |||
| // BootFile: form.BootFile, | |||
| // Uuid: form.Attachment, | |||
| // DatasetName: attach.Name, | |||
| // Params: form.Params, | |||
| // BranchName: branch_name, | |||
| // }) | |||
| if err != nil { | |||
| log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) | |||
| trainJobNewVersionDataPrepare(ctx) | |||
| ctx.Data["bootFile"] = form.BootFile | |||
| ctx.Data["uuid"] = form.Attachment | |||
| ctx.Data["datasetName"] = attach.Name | |||
| ctx.Data["params"] = form.Params | |||
| ctx.Data["branch_name"] = branch_name | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||
| return | |||
| } | |||
| // if err != nil { | |||
| // log.Error("CreateTrainjobConfigDetail failed:%v", err.Error()) | |||
| // trainJobNewVersionDataPrepare(ctx) | |||
| // ctx.Data["bootFile"] = form.BootFile | |||
| // ctx.Data["uuid"] = form.Attachment | |||
| // ctx.Data["datasetName"] = attach.Name | |||
| // ctx.Data["params"] = form.Params | |||
| // ctx.Data["branch_name"] = branch_name | |||
| // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobVersionNew, &form) | |||
| // return | |||
| // } | |||
| // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | |||
| ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | |||
| } | |||
| @@ -1387,18 +1391,18 @@ func TrainJobStop(ctx *context.Context) { | |||
| func TrainJobVersionDel(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| var versionName = ctx.Params(":versionName") | |||
| var versionName = ctx.Query(":versionName") | |||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) | |||
| return | |||
| } | |||
| _, err = modelarts.DelTrainJob(jobID) | |||
| if err != nil { | |||
| log.Error("DelTrainJob(%s) failed:%v", task.JobName, err.Error()) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) | |||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) | |||
| return | |||
| } | |||
| @@ -1408,12 +1412,13 @@ func TrainJobVersionDel(ctx *context.Context) { | |||
| return | |||
| } | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | |||
| // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | |||
| ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | |||
| } | |||
| func TrainJobVersionStop(ctx *context.Context) { | |||
| var jobID = ctx.Params(":jobid") | |||
| var versionName = ctx.Params(":versionName") | |||
| var versionName = ctx.Query(":versionName") | |||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) | |||
| @@ -1428,7 +1433,8 @@ func TrainJobVersionStop(ctx *context.Context) { | |||
| return | |||
| } | |||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | |||
| // ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job") | |||
| ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | |||
| } | |||
| func canUserCreateTrainJob(uid int64) (bool, error) { | |||