| @@ -31,9 +31,11 @@ const ( | |||||
| ) | ) | ||||
| const ( | const ( | ||||
| NPUResource = "NPU" | |||||
| GPUResource = "CPU/GPU" | |||||
| AllResource = "all" | |||||
| TempJobIdPrefix = "TEMP" | |||||
| JobStatusTemp = "TEMP" | |||||
| NPUResource = "NPU" | |||||
| GPUResource = "CPU/GPU" | |||||
| AllResource = "all" | |||||
| //notebook storage category | //notebook storage category | ||||
| EVSCategory = "EVS" | EVSCategory = "EVS" | ||||
| @@ -344,6 +346,7 @@ type CloudbrainsOptions struct { | |||||
| RepoID int64 // include all repos if empty | RepoID int64 // include all repos if empty | ||||
| UserID int64 | UserID int64 | ||||
| JobID string | JobID string | ||||
| JobName string | |||||
| SortType string | SortType string | ||||
| CloudbrainIDs []int64 | CloudbrainIDs []int64 | ||||
| JobStatus []string | JobStatus []string | ||||
| @@ -1247,6 +1250,52 @@ type LogFile struct { | |||||
| Name string | Name string | ||||
| } | } | ||||
| type JobList struct { | |||||
| JobName string `json:"job_name"` | |||||
| JobID int64 `json:"job_id"` | |||||
| VersionID int64 `json:"version_id"` | |||||
| VersionCount int64 `json:"version_count"` | |||||
| Description string `json:"job_desc"` | |||||
| IntStatus int `json:"status"` | |||||
| } | |||||
| type GetTrainJobListResult struct { | |||||
| ErrorResult | |||||
| JobTotalCount int `json:"job_total_count"` //查询到的用户创建作业总数 | |||||
| JobCountLimit int `json:"job_count_limit"` //用户还可以创建训练作业的数量 | |||||
| Quotas int `json:"quotas"` //训练作业的运行数量上限 | |||||
| JobList []JobList `json:"jobs"` | |||||
| } | |||||
| type JobVersionList struct { | |||||
| VersionName string `json:"version_name"` | |||||
| VersionID int64 `json:"version_id"` | |||||
| IntStatus int `json:"status"` | |||||
| } | |||||
| type GetTrainJobVersionListResult struct { | |||||
| ErrorResult | |||||
| JobID int64 `json:"job_id"` | |||||
| JobName string `json:"job_name"` | |||||
| JobDesc string `json:"job_desc"` | |||||
| VersionCount int64 `json:"version_count"` | |||||
| JobVersionList []JobVersionList `json:"versions"` | |||||
| } | |||||
| type NotebookList struct { | |||||
| JobName string `json:"name"` | |||||
| JobID string `json:"id"` | |||||
| Status string `json:"status"` | |||||
| } | |||||
| type GetNotebookListResult struct { | |||||
| TotalCount int64 `json:"total"` //总的记录数量 | |||||
| CurrentPage int `json:"current"` //当前页数 | |||||
| TotalPages int `json:"pages"` //总的页数 | |||||
| Size int `json:"size"` //每一页的数量 | |||||
| NotebookList []NotebookList `json:"data"` | |||||
| } | |||||
| //Grampus | //Grampus | ||||
| type GrampusResult struct { | type GrampusResult struct { | ||||
| ErrorCode int `json:"errorCode"` | ErrorCode int `json:"errorCode"` | ||||
| @@ -1559,6 +1608,12 @@ func CloudbrainsVersionList(opts *CloudbrainsOptions) ([]*CloudbrainInfo, int, e | |||||
| ) | ) | ||||
| } | } | ||||
| if (opts.JobName) != "" { | |||||
| cond = cond.And( | |||||
| builder.Eq{"cloudbrain.job_name": opts.JobName}, | |||||
| ) | |||||
| } | |||||
| if len(opts.JobTypes) > 0 { | if len(opts.JobTypes) > 0 { | ||||
| cond = cond.And( | cond = cond.And( | ||||
| builder.In("cloudbrain.job_type", opts.JobTypes), | builder.In("cloudbrain.job_type", opts.JobTypes), | ||||
| @@ -1692,9 +1747,9 @@ func SetTrainJobStatusByJobID(jobID string, status string, duration int64, train | |||||
| return | return | ||||
| } | } | ||||
| func SetVersionCountAndLatestVersion(jobID string, versionName string, versionCount int, isLatestVersion string, totalVersionCount int) (err error) { | |||||
| cb := &Cloudbrain{JobID: jobID, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion, TotalVersionCount: totalVersionCount} | |||||
| _, err = x.Cols("version_Count", "is_latest_version", "total_version_count").Where("cloudbrain.job_id=? AND cloudbrain.version_name=?", jobID, versionName).Update(cb) | |||||
| func SetVersionCountAndLatestVersion(jobName string, versionName string, versionCount int, isLatestVersion string, totalVersionCount int) (err error) { | |||||
| cb := &Cloudbrain{JobName: jobName, VersionName: versionName, VersionCount: versionCount, IsLatestVersion: isLatestVersion, TotalVersionCount: totalVersionCount} | |||||
| _, err = x.Cols("version_Count", "is_latest_version", "total_version_count").Where("cloudbrain.job_name=? AND cloudbrain.version_name=?", jobName, versionName).Update(cb) | |||||
| return | return | ||||
| } | } | ||||
| @@ -2114,3 +2169,8 @@ func GetCloudbrainByIDs(ids []int64) ([]*Cloudbrain, error) { | |||||
| In("id", ids). | In("id", ids). | ||||
| Find(&cloudbrains) | Find(&cloudbrains) | ||||
| } | } | ||||
| func GetCloudbrainCountByJobName(jobName, jobType string) (int, error) { | |||||
| count, err := x.Where("job_name = ? and job_type= ?", jobName, jobType).Count(new(Cloudbrain)) | |||||
| return int(count), err | |||||
| } | |||||
| @@ -0,0 +1,57 @@ | |||||
| package models | |||||
| import ( | |||||
| "time" | |||||
| "code.gitea.io/gitea/modules/timeutil" | |||||
| ) | |||||
| const ( | |||||
| //TempJobIdPrefix = "TEMP" | |||||
| ) | |||||
| type CloudbrainTemp struct { | |||||
| CloudbrainID int64 `xorm:"pk"` | |||||
| JobName string | |||||
| Type int | |||||
| JobType string `xorm:"INDEX NOT NULL DEFAULT 'DEBUG'"` | |||||
| Status string `xorm:"INDEX NOT NULL DEFAULT 'TEMP'"` | |||||
| VersionCount int `xorm:"NOT NULL DEFAULT 0"` | |||||
| QueryTimes int `xorm:"INDEX NOT NULL DEFAULT 0"` | |||||
| CreatedUnix timeutil.TimeStamp `xorm:"INDEX"` | |||||
| UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"` | |||||
| DeletedAt time.Time `xorm:"deleted"` | |||||
| } | |||||
| func InsertCloudbrainTemp(temp *CloudbrainTemp) (err error) { | |||||
| if _, err = x.Insert(temp); err != nil { | |||||
| return err | |||||
| } | |||||
| return nil | |||||
| } | |||||
| func getCloudBrainTemp(temp *CloudbrainTemp) (*CloudbrainTemp, error) { | |||||
| has, err := x.Get(temp) | |||||
| if err != nil { | |||||
| return nil, err | |||||
| } else if !has { | |||||
| return nil, ErrJobNotExist{} | |||||
| } | |||||
| return temp, nil | |||||
| } | |||||
| func GetCloudbrainTempByCloudbrainID(id int64) (*CloudbrainTemp, error) { | |||||
| temp := &CloudbrainTemp{CloudbrainID: id} | |||||
| return getCloudBrainTemp(temp) | |||||
| } | |||||
| func DeleteCloudbrainTemp(temp *CloudbrainTemp) error { | |||||
| return deleteCloudbrainTemp(x, temp) | |||||
| } | |||||
| func deleteCloudbrainTemp(e Engine, temp *CloudbrainTemp) error { | |||||
| _, err := e.Where("cloudbrain_id = ?", temp.CloudbrainID).Delete(temp) | |||||
| return err | |||||
| } | |||||
| @@ -144,6 +144,7 @@ func init() { | |||||
| new(WechatBindLog), | new(WechatBindLog), | ||||
| new(OrgStatistic), | new(OrgStatistic), | ||||
| new(SearchRecord), | new(SearchRecord), | ||||
| new(CloudbrainTemp), | |||||
| new(AiModelConvert), | new(AiModelConvert), | ||||
| ) | ) | ||||
| @@ -142,8 +142,8 @@ func isAdminOrImageCreater(ctx *context.Context, image *models.Image, err error) | |||||
| func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { | func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { | ||||
| var ID = ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(ID) | |||||
| var id = ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByID failed:%v", err.Error()) | log.Error("GetCloudbrainByID failed:%v", err.Error()) | ||||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | ||||
| @@ -158,8 +158,8 @@ func AdminOrOwnerOrJobCreaterRight(ctx *context.Context) { | |||||
| func AdminOrJobCreaterRight(ctx *context.Context) { | func AdminOrJobCreaterRight(ctx *context.Context) { | ||||
| var ID = ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(ID) | |||||
| var id = ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByID failed:%v", err.Error()) | log.Error("GetCloudbrainByID failed:%v", err.Error()) | ||||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | ||||
| @@ -547,7 +547,7 @@ func RestartTask(ctx *context.Context, task *models.Cloudbrain, newID *string) e | |||||
| GPUNumber: resourceSpec.GpuNum, | GPUNumber: resourceSpec.GpuNum, | ||||
| MemoryMB: resourceSpec.MemMiB, | MemoryMB: resourceSpec.MemMiB, | ||||
| ShmMB: resourceSpec.ShareMemMiB, | ShmMB: resourceSpec.ShareMemMiB, | ||||
| Command: GetCloudbrainDebugCommand(),//Command, | |||||
| Command: GetCloudbrainDebugCommand(), //Command, | |||||
| NeedIBDevice: false, | NeedIBDevice: false, | ||||
| IsMainRole: false, | IsMainRole: false, | ||||
| UseNNI: false, | UseNNI: false, | ||||
| @@ -4,8 +4,11 @@ import ( | |||||
| "encoding/json" | "encoding/json" | ||||
| "errors" | "errors" | ||||
| "fmt" | "fmt" | ||||
| "math/rand" | |||||
| "path" | "path" | ||||
| "strconv" | "strconv" | ||||
| "strings" | |||||
| "time" | |||||
| "code.gitea.io/gitea/modules/timeutil" | "code.gitea.io/gitea/modules/timeutil" | ||||
| @@ -59,7 +62,7 @@ const ( | |||||
| PerPage = 10 | PerPage = 10 | ||||
| IsLatestVersion = "1" | IsLatestVersion = "1" | ||||
| NotLatestVersion = "0" | NotLatestVersion = "0" | ||||
| VersionCount = 1 | |||||
| VersionCountOne = 1 | |||||
| SortByCreateTime = "create_time" | SortByCreateTime = "create_time" | ||||
| ConfigTypeCustom = "custom" | ConfigTypeCustom = "custom" | ||||
| @@ -264,31 +267,13 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc | |||||
| log.Error("GetNotebookImageName failed: %v", err.Error()) | log.Error("GetNotebookImageName failed: %v", err.Error()) | ||||
| return err | return err | ||||
| } | } | ||||
| createTime := timeutil.TimeStampNow() | createTime := timeutil.TimeStampNow() | ||||
| jobResult, err := createNotebook2(models.CreateNotebook2Params{ | |||||
| JobName: jobName, | |||||
| Description: description, | |||||
| Flavor: flavor, | |||||
| Duration: autoStopDurationMs, | |||||
| ImageID: imageId, | |||||
| PoolID: poolInfos.PoolInfo[0].PoolId, | |||||
| Feature: models.NotebookFeature, | |||||
| Volume: models.VolumeReq{ | |||||
| Capacity: setting.Capacity, | |||||
| Category: models.EVSCategory, | |||||
| Ownership: models.ManagedOwnership, | |||||
| }, | |||||
| WorkspaceID: "0", | |||||
| }) | |||||
| if err != nil { | |||||
| log.Error("createNotebook2 failed: %v", err.Error()) | |||||
| return err | |||||
| } | |||||
| err = models.CreateCloudbrain(&models.Cloudbrain{ | |||||
| Status: jobResult.Status, | |||||
| task := &models.Cloudbrain{ | |||||
| Status: string(models.ModelArtsTrainJobWaiting), | |||||
| UserID: ctx.User.ID, | UserID: ctx.User.ID, | ||||
| RepoID: ctx.Repo.Repository.ID, | RepoID: ctx.Repo.Repository.ID, | ||||
| JobID: jobResult.ID, | |||||
| JobID: models.TempJobIdPrefix + jobName + strconv.Itoa(int(rand.New(rand.NewSource(time.Now().UnixNano())).Int31n(100000))), | |||||
| JobName: jobName, | JobName: jobName, | ||||
| FlavorCode: flavor, | FlavorCode: flavor, | ||||
| DisplayJobName: displayJobName, | DisplayJobName: displayJobName, | ||||
| @@ -300,16 +285,66 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc | |||||
| Description: description, | Description: description, | ||||
| CreatedUnix: createTime, | CreatedUnix: createTime, | ||||
| UpdatedUnix: createTime, | UpdatedUnix: createTime, | ||||
| }) | |||||
| } | |||||
| err = models.CreateCloudbrain(task) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("CreateCloudbrain(%s) failed:%v", displayJobName, err.Error()) | |||||
| return err | return err | ||||
| } | } | ||||
| task, err := models.GetCloudbrainByName(jobName) | |||||
| jobResult, err := createNotebook2(models.CreateNotebook2Params{ | |||||
| JobName: jobName, | |||||
| Description: description, | |||||
| Flavor: flavor, | |||||
| Duration: autoStopDurationMs, | |||||
| ImageID: imageId, | |||||
| PoolID: poolInfos.PoolInfo[0].PoolId, | |||||
| Feature: models.NotebookFeature, | |||||
| Volume: models.VolumeReq{ | |||||
| Capacity: setting.Capacity, | |||||
| Category: models.EVSCategory, | |||||
| Ownership: models.ManagedOwnership, | |||||
| }, | |||||
| WorkspaceID: "0", | |||||
| }) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByName failed: %v", err.Error()) | |||||
| return err | |||||
| log.Error("createNotebook2 failed: %v", err.Error()) | |||||
| if strings.HasPrefix(err.Error(), UnknownErrorPrefix) { | |||||
| log.Info("(%s)unknown error, set temp status", displayJobName) | |||||
| errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{ | |||||
| CloudbrainID: task.ID, | |||||
| Status: models.JobStatusTemp, | |||||
| Type: task.Type, | |||||
| JobName: task.JobName, | |||||
| JobType: task.JobType, | |||||
| }) | |||||
| if errTemp != nil { | |||||
| log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error()) | |||||
| return errTemp | |||||
| } | |||||
| } else { | |||||
| task.Status = string(models.ModelArtsCreateFailed) | |||||
| errTemp := models.UpdateJob(task) | |||||
| if errTemp != nil { | |||||
| log.Error("UpdateJob failed: %v", errTemp.Error()) | |||||
| } | |||||
| errTemp = models.DeleteJob(task) | |||||
| if errTemp != nil { | |||||
| log.Error("DeleteJob failed: %v", errTemp.Error()) | |||||
| } | |||||
| return err | |||||
| } | |||||
| } else { | |||||
| task.Status = jobResult.Status | |||||
| task.JobID = jobResult.ID | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed: %v", err.Error()) | |||||
| return err | |||||
| } | |||||
| } | } | ||||
| stringId := strconv.FormatInt(task.ID, 10) | stringId := strconv.FormatInt(task.ID, 10) | ||||
| notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask) | notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, stringId, displayJobName, models.ActionCreateDebugNPUTask) | ||||
| return nil | return nil | ||||
| @@ -317,66 +352,15 @@ func GenerateNotebook2(ctx *context.Context, displayJobName, jobName, uuid, desc | |||||
| func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { | func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error) { | ||||
| createTime := timeutil.TimeStampNow() | createTime := timeutil.TimeStampNow() | ||||
| var jobResult *models.CreateTrainJobResult | |||||
| var createErr error | |||||
| if req.EngineID < 0 { | |||||
| jobResult, createErr = createTrainJobUserImage(models.CreateUserImageTrainJobParams{ | |||||
| JobName: req.JobName, | |||||
| Description: req.Description, | |||||
| Config: models.UserImageConfig{ | |||||
| WorkServerNum: req.WorkServerNumber, | |||||
| AppUrl: req.CodeObsPath, | |||||
| BootFileUrl: req.BootFileUrl, | |||||
| DataUrl: req.DataUrl, | |||||
| TrainUrl: req.TrainUrl, | |||||
| LogUrl: req.LogUrl, | |||||
| PoolID: req.PoolID, | |||||
| CreateVersion: true, | |||||
| Flavor: models.Flavor{ | |||||
| Code: req.FlavorCode, | |||||
| }, | |||||
| Parameter: req.Parameters, | |||||
| UserImageUrl: req.UserImageUrl, | |||||
| UserCommand: req.UserCommand, | |||||
| }, | |||||
| }) | |||||
| } else { | |||||
| jobResult, createErr = createTrainJob(models.CreateTrainJobParams{ | |||||
| JobName: req.JobName, | |||||
| Description: req.Description, | |||||
| Config: models.Config{ | |||||
| WorkServerNum: req.WorkServerNumber, | |||||
| AppUrl: req.CodeObsPath, | |||||
| BootFileUrl: req.BootFileUrl, | |||||
| DataUrl: req.DataUrl, | |||||
| EngineID: req.EngineID, | |||||
| TrainUrl: req.TrainUrl, | |||||
| LogUrl: req.LogUrl, | |||||
| PoolID: req.PoolID, | |||||
| CreateVersion: true, | |||||
| Flavor: models.Flavor{ | |||||
| Code: req.FlavorCode, | |||||
| }, | |||||
| Parameter: req.Parameters, | |||||
| }, | |||||
| }) | |||||
| } | |||||
| if createErr != nil { | |||||
| log.Error("CreateJob failed: %v", createErr.Error()) | |||||
| return createErr | |||||
| } | |||||
| jobId := strconv.FormatInt(jobResult.JobID, 10) | |||||
| createErr = models.CreateCloudbrain(&models.Cloudbrain{ | |||||
| Status: TransTrainJobStatus(jobResult.Status), | |||||
| task := &models.Cloudbrain{ | |||||
| Status: string(models.ModelArtsTrainJobWaiting), | |||||
| UserID: ctx.User.ID, | UserID: ctx.User.ID, | ||||
| RepoID: ctx.Repo.Repository.ID, | RepoID: ctx.Repo.Repository.ID, | ||||
| JobID: jobId, | |||||
| JobID: models.TempJobIdPrefix + req.JobName + strconv.Itoa(int(rand.New(rand.NewSource(time.Now().UnixNano())).Int31n(100000))), | |||||
| JobName: req.JobName, | JobName: req.JobName, | ||||
| DisplayJobName: req.DisplayJobName, | DisplayJobName: req.DisplayJobName, | ||||
| JobType: string(models.JobTypeTrain), | JobType: string(models.JobTypeTrain), | ||||
| Type: models.TypeCloudBrainTwo, | Type: models.TypeCloudBrainTwo, | ||||
| VersionID: jobResult.VersionID, | |||||
| VersionName: jobResult.VersionName, | |||||
| Uuid: req.Uuid, | Uuid: req.Uuid, | ||||
| DatasetName: req.DatasetName, | DatasetName: req.DatasetName, | ||||
| CommitID: req.CommitID, | CommitID: req.CommitID, | ||||
| @@ -398,49 +382,21 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) (err error | |||||
| TotalVersionCount: req.TotalVersionCount, | TotalVersionCount: req.TotalVersionCount, | ||||
| CreatedUnix: createTime, | CreatedUnix: createTime, | ||||
| UpdatedUnix: createTime, | UpdatedUnix: createTime, | ||||
| }) | |||||
| if createErr != nil { | |||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, createErr.Error()) | |||||
| return createErr | |||||
| } | } | ||||
| notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobId, req.DisplayJobName, models.ActionCreateTrainTask) | |||||
| return nil | |||||
| } | |||||
| func GenerateModelConvertTrainJob(req *GenerateTrainJobReq) (*models.CreateTrainJobResult, error) { | |||||
| return createTrainJobUserImage(models.CreateUserImageTrainJobParams{ | |||||
| JobName: req.JobName, | |||||
| Description: req.Description, | |||||
| Config: models.UserImageConfig{ | |||||
| WorkServerNum: req.WorkServerNumber, | |||||
| AppUrl: req.CodeObsPath, | |||||
| BootFileUrl: req.BootFileUrl, | |||||
| DataUrl: req.DataUrl, | |||||
| TrainUrl: req.TrainUrl, | |||||
| LogUrl: req.LogUrl, | |||||
| PoolID: req.PoolID, | |||||
| CreateVersion: true, | |||||
| Flavor: models.Flavor{ | |||||
| Code: req.FlavorCode, | |||||
| }, | |||||
| Parameter: req.Parameters, | |||||
| UserImageUrl: req.UserImageUrl, | |||||
| UserCommand: req.UserCommand, | |||||
| }, | |||||
| }) | |||||
| } | |||||
| err = models.CreateCloudbrain(task) | |||||
| if err != nil { | |||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.DisplayJobName, err.Error()) | |||||
| return err | |||||
| } | |||||
| func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) { | |||||
| createTime := timeutil.TimeStampNow() | |||||
| var jobResult *models.CreateTrainJobResult | var jobResult *models.CreateTrainJobResult | ||||
| var createErr error | var createErr error | ||||
| log.Info(" req.EngineID =" + fmt.Sprint(req.EngineID)) | |||||
| if req.EngineID < 0 { | if req.EngineID < 0 { | ||||
| jobResult, createErr = createTrainJobVersionUserImage(models.CreateTrainJobVersionUserImageParams{ | |||||
| jobResult, createErr = createTrainJobUserImage(models.CreateUserImageTrainJobParams{ | |||||
| JobName: req.JobName, | |||||
| Description: req.Description, | Description: req.Description, | ||||
| Config: models.TrainJobVersionUserImageConfig{ | |||||
| Config: models.UserImageConfig{ | |||||
| WorkServerNum: req.WorkServerNumber, | WorkServerNum: req.WorkServerNumber, | ||||
| AppUrl: req.CodeObsPath, | AppUrl: req.CodeObsPath, | ||||
| BootFileUrl: req.BootFileUrl, | BootFileUrl: req.BootFileUrl, | ||||
| @@ -448,19 +404,20 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||||
| TrainUrl: req.TrainUrl, | TrainUrl: req.TrainUrl, | ||||
| LogUrl: req.LogUrl, | LogUrl: req.LogUrl, | ||||
| PoolID: req.PoolID, | PoolID: req.PoolID, | ||||
| CreateVersion: true, | |||||
| Flavor: models.Flavor{ | Flavor: models.Flavor{ | ||||
| Code: req.FlavorCode, | Code: req.FlavorCode, | ||||
| }, | }, | ||||
| Parameter: req.Parameters, | Parameter: req.Parameters, | ||||
| PreVersionId: req.PreVersionId, | |||||
| UserImageUrl: req.UserImageUrl, | UserImageUrl: req.UserImageUrl, | ||||
| UserCommand: req.UserCommand, | UserCommand: req.UserCommand, | ||||
| }, | }, | ||||
| }, jobId) | |||||
| }) | |||||
| } else { | } else { | ||||
| jobResult, createErr = createTrainJobVersion(models.CreateTrainJobVersionParams{ | |||||
| jobResult, createErr = createTrainJob(models.CreateTrainJobParams{ | |||||
| JobName: req.JobName, | |||||
| Description: req.Description, | Description: req.Description, | ||||
| Config: models.TrainJobVersionConfig{ | |||||
| Config: models.Config{ | |||||
| WorkServerNum: req.WorkServerNumber, | WorkServerNum: req.WorkServerNumber, | ||||
| AppUrl: req.CodeObsPath, | AppUrl: req.CodeObsPath, | ||||
| BootFileUrl: req.BootFileUrl, | BootFileUrl: req.BootFileUrl, | ||||
| @@ -469,87 +426,60 @@ func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, job | |||||
| TrainUrl: req.TrainUrl, | TrainUrl: req.TrainUrl, | ||||
| LogUrl: req.LogUrl, | LogUrl: req.LogUrl, | ||||
| PoolID: req.PoolID, | PoolID: req.PoolID, | ||||
| CreateVersion: true, | |||||
| Flavor: models.Flavor{ | Flavor: models.Flavor{ | ||||
| Code: req.FlavorCode, | Code: req.FlavorCode, | ||||
| }, | }, | ||||
| Parameter: req.Parameters, | |||||
| PreVersionId: req.PreVersionId, | |||||
| Parameter: req.Parameters, | |||||
| }, | }, | ||||
| }, jobId) | |||||
| } | |||||
| if createErr != nil { | |||||
| log.Error("CreateJob failed: %v", createErr.Error()) | |||||
| return createErr | |||||
| } | |||||
| var jobTypes []string | |||||
| jobTypes = append(jobTypes, string(models.JobTypeTrain)) | |||||
| repo := ctx.Repo.Repository | |||||
| VersionTaskList, VersionListCount, createErr := models.CloudbrainsVersionList(&models.CloudbrainsOptions{ | |||||
| RepoID: repo.ID, | |||||
| Type: models.TypeCloudBrainTwo, | |||||
| JobTypes: jobTypes, | |||||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||||
| }) | |||||
| if createErr != nil { | |||||
| ctx.ServerError("Cloudbrain", createErr) | |||||
| return createErr | |||||
| } | |||||
| //将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount | |||||
| createErr = models.CreateCloudbrain(&models.Cloudbrain{ | |||||
| Status: TransTrainJobStatus(jobResult.Status), | |||||
| UserID: ctx.User.ID, | |||||
| RepoID: ctx.Repo.Repository.ID, | |||||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||||
| JobName: req.JobName, | |||||
| DisplayJobName: req.DisplayJobName, | |||||
| JobType: string(models.JobTypeTrain), | |||||
| Type: models.TypeCloudBrainTwo, | |||||
| VersionID: jobResult.VersionID, | |||||
| VersionName: jobResult.VersionName, | |||||
| Uuid: req.Uuid, | |||||
| DatasetName: req.DatasetName, | |||||
| CommitID: req.CommitID, | |||||
| IsLatestVersion: req.IsLatestVersion, | |||||
| PreVersionName: req.PreVersionName, | |||||
| ComputeResource: models.NPUResource, | |||||
| EngineID: req.EngineID, | |||||
| TrainUrl: req.TrainUrl, | |||||
| BranchName: req.BranchName, | |||||
| Parameters: req.Params, | |||||
| BootFile: req.BootFile, | |||||
| DataUrl: req.DataUrl, | |||||
| LogUrl: req.LogUrl, | |||||
| PreVersionId: req.PreVersionId, | |||||
| FlavorCode: req.FlavorCode, | |||||
| Description: req.Description, | |||||
| WorkServerNumber: req.WorkServerNumber, | |||||
| FlavorName: req.FlavorName, | |||||
| EngineName: req.EngineName, | |||||
| TotalVersionCount: VersionTaskList[0].TotalVersionCount + 1, | |||||
| VersionCount: VersionListCount + 1, | |||||
| CreatedUnix: createTime, | |||||
| UpdatedUnix: createTime, | |||||
| }) | |||||
| if createErr != nil { | |||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, createErr.Error()) | |||||
| return createErr | |||||
| }) | |||||
| } | } | ||||
| //将训练任务的上一版本的isLatestVersion设置为"0" | |||||
| createErr = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount) | |||||
| if createErr != nil { | if createErr != nil { | ||||
| ctx.ServerError("Update IsLatestVersion failed", createErr) | |||||
| return createErr | |||||
| log.Error("createTrainJob failed: %v", createErr.Error()) | |||||
| if strings.HasPrefix(createErr.Error(), UnknownErrorPrefix) { | |||||
| log.Info("(%s)unknown error, set temp status", req.DisplayJobName) | |||||
| errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{ | |||||
| CloudbrainID: task.ID, | |||||
| Status: models.JobStatusTemp, | |||||
| Type: task.Type, | |||||
| JobName: task.JobName, | |||||
| JobType: task.JobType, | |||||
| }) | |||||
| if errTemp != nil { | |||||
| log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error()) | |||||
| return errTemp | |||||
| } | |||||
| } else { | |||||
| task.Status = string(models.ModelArtsTrainJobFailed) | |||||
| errTemp := models.UpdateJob(task) | |||||
| if errTemp != nil { | |||||
| log.Error("UpdateJob failed: %v", errTemp.Error()) | |||||
| } | |||||
| errTemp = models.DeleteJob(task) | |||||
| if errTemp != nil { | |||||
| log.Error("DeleteJob failed: %v", errTemp.Error()) | |||||
| } | |||||
| return createErr | |||||
| } | |||||
| } else { | |||||
| task.Status = TransTrainJobStatus(jobResult.Status) | |||||
| task.JobID = strconv.FormatInt(jobResult.JobID, 10) | |||||
| task.VersionID = jobResult.VersionID | |||||
| task.VersionName = jobResult.VersionName | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed: %v", err.Error()) | |||||
| return err | |||||
| } | |||||
| } | } | ||||
| return createErr | |||||
| notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, task.JobID, req.DisplayJobName, models.ActionCreateTrainTask) | |||||
| return nil | |||||
| } | } | ||||
| func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) { | |||||
| createTime := timeutil.TimeStampNow() | |||||
| jobResult, err := createTrainJobUserImage(models.CreateUserImageTrainJobParams{ | |||||
| func GenerateModelConvertTrainJob(req *GenerateTrainJobReq) (*models.CreateTrainJobResult, error) { | |||||
| return createTrainJobUserImage(models.CreateUserImageTrainJobParams{ | |||||
| JobName: req.JobName, | JobName: req.JobName, | ||||
| Description: req.Description, | Description: req.Description, | ||||
| Config: models.UserImageConfig{ | Config: models.UserImageConfig{ | ||||
| @@ -569,11 +499,9 @@ func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrain | |||||
| UserCommand: req.UserCommand, | UserCommand: req.UserCommand, | ||||
| }, | }, | ||||
| }) | }) | ||||
| if err != nil { | |||||
| log.Error("CreateJob failed: %v", err.Error()) | |||||
| return err | |||||
| } | |||||
| } | |||||
| func GenerateTrainJobVersion(ctx *context.Context, req *GenerateTrainJobReq, jobId string) (err error) { | |||||
| var jobTypes []string | var jobTypes []string | ||||
| jobTypes = append(jobTypes, string(models.JobTypeTrain)) | jobTypes = append(jobTypes, string(models.JobTypeTrain)) | ||||
| repo := ctx.Repo.Repository | repo := ctx.Repo.Repository | ||||
| @@ -581,7 +509,7 @@ func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrain | |||||
| RepoID: repo.ID, | RepoID: repo.ID, | ||||
| Type: models.TypeCloudBrainTwo, | Type: models.TypeCloudBrainTwo, | ||||
| JobTypes: jobTypes, | JobTypes: jobTypes, | ||||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||||
| JobID: jobId, | |||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| ctx.ServerError("Cloudbrain", err) | ctx.ServerError("Cloudbrain", err) | ||||
| @@ -589,25 +517,23 @@ func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrain | |||||
| } | } | ||||
| //将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount | //将当前版本的isLatestVersion设置为"1"和任务数量更新,任务数量包括当前版本数VersionCount和历史创建的总版本数TotalVersionCount | ||||
| err = models.CreateCloudbrain(&models.Cloudbrain{ | |||||
| Status: TransTrainJobStatus(jobResult.Status), | |||||
| createTime := timeutil.TimeStampNow() | |||||
| task := &models.Cloudbrain{ | |||||
| Status: models.JobStatusTemp, | |||||
| UserID: ctx.User.ID, | UserID: ctx.User.ID, | ||||
| RepoID: ctx.Repo.Repository.ID, | RepoID: ctx.Repo.Repository.ID, | ||||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||||
| JobID: jobId, | |||||
| JobName: req.JobName, | JobName: req.JobName, | ||||
| DisplayJobName: req.DisplayJobName, | DisplayJobName: req.DisplayJobName, | ||||
| JobType: string(models.JobTypeTrain), | JobType: string(models.JobTypeTrain), | ||||
| Type: models.TypeCloudBrainTwo, | Type: models.TypeCloudBrainTwo, | ||||
| VersionID: jobResult.VersionID, | |||||
| VersionName: jobResult.VersionName, | |||||
| Uuid: req.Uuid, | Uuid: req.Uuid, | ||||
| DatasetName: req.DatasetName, | DatasetName: req.DatasetName, | ||||
| CommitID: req.CommitID, | CommitID: req.CommitID, | ||||
| IsLatestVersion: req.IsLatestVersion, | IsLatestVersion: req.IsLatestVersion, | ||||
| PreVersionName: req.PreVersionName, | PreVersionName: req.PreVersionName, | ||||
| ComputeResource: models.NPUResource, | ComputeResource: models.NPUResource, | ||||
| EngineID: MORDELART_USER_IMAGE_ENGINE_ID, | |||||
| Image: req.UserImageUrl, | |||||
| EngineID: req.EngineID, | |||||
| TrainUrl: req.TrainUrl, | TrainUrl: req.TrainUrl, | ||||
| BranchName: req.BranchName, | BranchName: req.BranchName, | ||||
| Parameters: req.Params, | Parameters: req.Params, | ||||
| @@ -624,20 +550,103 @@ func GenerateTrainJobVersionByUserImage(ctx *context.Context, req *GenerateTrain | |||||
| VersionCount: VersionListCount + 1, | VersionCount: VersionListCount + 1, | ||||
| CreatedUnix: createTime, | CreatedUnix: createTime, | ||||
| UpdatedUnix: createTime, | UpdatedUnix: createTime, | ||||
| }) | |||||
| } | |||||
| err = models.CreateCloudbrain(task) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | ||||
| return err | return err | ||||
| } | } | ||||
| //将训练任务的上一版本的isLatestVersion设置为"0" | //将训练任务的上一版本的isLatestVersion设置为"0" | ||||
| err = models.SetVersionCountAndLatestVersion(strconv.FormatInt(jobResult.JobID, 10), VersionTaskList[0].VersionName, VersionCount, NotLatestVersion, TotalVersionCount) | |||||
| err = models.SetVersionCountAndLatestVersion(req.JobName, VersionTaskList[0].VersionName, VersionListCount, NotLatestVersion, VersionTaskList[0].TotalVersionCount) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.ServerError("Update IsLatestVersion failed", err) | ctx.ServerError("Update IsLatestVersion failed", err) | ||||
| return err | return err | ||||
| } | } | ||||
| return err | |||||
| var jobResult *models.CreateTrainJobResult | |||||
| var createErr error | |||||
| if req.EngineID < 0 { | |||||
| jobResult, createErr = createTrainJobVersionUserImage(models.CreateTrainJobVersionUserImageParams{ | |||||
| Description: req.Description, | |||||
| Config: models.TrainJobVersionUserImageConfig{ | |||||
| WorkServerNum: req.WorkServerNumber, | |||||
| AppUrl: req.CodeObsPath, | |||||
| BootFileUrl: req.BootFileUrl, | |||||
| DataUrl: req.DataUrl, | |||||
| TrainUrl: req.TrainUrl, | |||||
| LogUrl: req.LogUrl, | |||||
| PoolID: req.PoolID, | |||||
| Flavor: models.Flavor{ | |||||
| Code: req.FlavorCode, | |||||
| }, | |||||
| Parameter: req.Parameters, | |||||
| PreVersionId: req.PreVersionId, | |||||
| UserImageUrl: req.UserImageUrl, | |||||
| UserCommand: req.UserCommand, | |||||
| }, | |||||
| }, jobId) | |||||
| } else { | |||||
| jobResult, createErr = createTrainJobVersion(models.CreateTrainJobVersionParams{ | |||||
| Description: req.Description, | |||||
| Config: models.TrainJobVersionConfig{ | |||||
| WorkServerNum: req.WorkServerNumber, | |||||
| AppUrl: req.CodeObsPath, | |||||
| BootFileUrl: req.BootFileUrl, | |||||
| DataUrl: req.DataUrl, | |||||
| EngineID: req.EngineID, | |||||
| TrainUrl: req.TrainUrl, | |||||
| LogUrl: req.LogUrl, | |||||
| PoolID: req.PoolID, | |||||
| Flavor: models.Flavor{ | |||||
| Code: req.FlavorCode, | |||||
| }, | |||||
| Parameter: req.Parameters, | |||||
| PreVersionId: req.PreVersionId, | |||||
| }, | |||||
| }, jobId) | |||||
| } | |||||
| if createErr != nil { | |||||
| log.Error("createTrainJobVersion failed: %v", err.Error()) | |||||
| if strings.HasPrefix(err.Error(), UnknownErrorPrefix) { | |||||
| log.Info("(%s)unknown error, set temp status", req.DisplayJobName) | |||||
| errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{ | |||||
| CloudbrainID: task.ID, | |||||
| Status: models.JobStatusTemp, | |||||
| Type: task.Type, | |||||
| JobName: task.JobName, | |||||
| JobType: task.JobType, | |||||
| }) | |||||
| if errTemp != nil { | |||||
| log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error()) | |||||
| return errTemp | |||||
| } | |||||
| } else { | |||||
| task.Status = string(models.ModelArtsTrainJobFailed) | |||||
| errTemp := models.UpdateJob(task) | |||||
| if errTemp != nil { | |||||
| log.Error("UpdateJob failed: %v", errTemp.Error()) | |||||
| } | |||||
| errTemp = models.DeleteJob(task) | |||||
| if errTemp != nil { | |||||
| log.Error("DeleteJob failed: %v", errTemp.Error()) | |||||
| } | |||||
| return createErr | |||||
| } | |||||
| } else { | |||||
| task.Status = TransTrainJobStatus(jobResult.Status) | |||||
| task.JobID = strconv.FormatInt(jobResult.JobID, 10) | |||||
| task.VersionID = jobResult.VersionID | |||||
| task.VersionName = jobResult.VersionName | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed: %v", err.Error()) | |||||
| return err | |||||
| } | |||||
| } | |||||
| return nil | |||||
| } | } | ||||
| func TransTrainJobStatus(status int) string { | func TransTrainJobStatus(status int) string { | ||||
| @@ -700,47 +709,22 @@ func GetOutputPathByCount(TotalVersionCount int) (VersionOutputPath string) { | |||||
| func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (err error) { | func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (err error) { | ||||
| createTime := timeutil.TimeStampNow() | createTime := timeutil.TimeStampNow() | ||||
| jobResult, err := createInferenceJob(models.CreateInferenceJobParams{ | |||||
| JobName: req.JobName, | |||||
| Description: req.Description, | |||||
| InfConfig: models.InfConfig{ | |||||
| WorkServerNum: req.WorkServerNumber, | |||||
| AppUrl: req.CodeObsPath, | |||||
| BootFileUrl: req.BootFileUrl, | |||||
| DataUrl: req.DataUrl, | |||||
| EngineID: req.EngineID, | |||||
| // TrainUrl: req.TrainUrl, | |||||
| LogUrl: req.LogUrl, | |||||
| PoolID: req.PoolID, | |||||
| CreateVersion: true, | |||||
| Flavor: models.Flavor{ | |||||
| Code: req.FlavorCode, | |||||
| }, | |||||
| Parameter: req.Parameters, | |||||
| }, | |||||
| }) | |||||
| if err != nil { | |||||
| log.Error("CreateJob failed: %v", err.Error()) | |||||
| return err | |||||
| } | |||||
| attach, err := models.GetAttachmentByUUID(req.Uuid) | attach, err := models.GetAttachmentByUUID(req.Uuid) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetAttachmentByUUID(%s) failed:%v", strconv.FormatInt(jobResult.JobID, 10), err.Error()) | |||||
| log.Error("GetAttachmentByUUID(%s) failed:%v", req.DisplayJobName, err.Error()) | |||||
| return err | return err | ||||
| } | } | ||||
| jobID := strconv.FormatInt(jobResult.JobID, 10) | |||||
| err = models.CreateCloudbrain(&models.Cloudbrain{ | |||||
| Status: TransTrainJobStatus(jobResult.Status), | |||||
| task := &models.Cloudbrain{ | |||||
| Status: string(models.ModelArtsTrainJobWaiting), | |||||
| UserID: ctx.User.ID, | UserID: ctx.User.ID, | ||||
| RepoID: ctx.Repo.Repository.ID, | RepoID: ctx.Repo.Repository.ID, | ||||
| JobID: jobID, | |||||
| JobID: models.TempJobIdPrefix + req.JobName + strconv.Itoa(int(rand.New(rand.NewSource(time.Now().UnixNano())).Int31n(100000))), | |||||
| JobName: req.JobName, | JobName: req.JobName, | ||||
| DisplayJobName: req.DisplayJobName, | DisplayJobName: req.DisplayJobName, | ||||
| JobType: string(models.JobTypeInference), | JobType: string(models.JobTypeInference), | ||||
| Type: models.TypeCloudBrainTwo, | Type: models.TypeCloudBrainTwo, | ||||
| VersionID: jobResult.VersionID, | |||||
| VersionName: jobResult.VersionName, | |||||
| Uuid: req.Uuid, | Uuid: req.Uuid, | ||||
| DatasetName: attach.Name, | DatasetName: attach.Name, | ||||
| CommitID: req.CommitID, | CommitID: req.CommitID, | ||||
| @@ -767,13 +751,74 @@ func GenerateInferenceJob(ctx *context.Context, req *GenerateInferenceJobReq) (e | |||||
| ResultUrl: req.ResultUrl, | ResultUrl: req.ResultUrl, | ||||
| CreatedUnix: createTime, | CreatedUnix: createTime, | ||||
| UpdatedUnix: createTime, | UpdatedUnix: createTime, | ||||
| }) | |||||
| } | |||||
| err = models.CreateCloudbrain(task) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | log.Error("CreateCloudbrain(%s) failed:%v", req.JobName, err.Error()) | ||||
| return err | return err | ||||
| } | } | ||||
| notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, jobID, req.DisplayJobName, models.ActionCreateInferenceTask) | |||||
| jobResult, err := createInferenceJob(models.CreateInferenceJobParams{ | |||||
| JobName: req.JobName, | |||||
| Description: req.Description, | |||||
| InfConfig: models.InfConfig{ | |||||
| WorkServerNum: req.WorkServerNumber, | |||||
| AppUrl: req.CodeObsPath, | |||||
| BootFileUrl: req.BootFileUrl, | |||||
| DataUrl: req.DataUrl, | |||||
| EngineID: req.EngineID, | |||||
| // TrainUrl: req.TrainUrl, | |||||
| LogUrl: req.LogUrl, | |||||
| PoolID: req.PoolID, | |||||
| CreateVersion: true, | |||||
| Flavor: models.Flavor{ | |||||
| Code: req.FlavorCode, | |||||
| }, | |||||
| Parameter: req.Parameters, | |||||
| }, | |||||
| }) | |||||
| if err != nil { | |||||
| log.Error("createTrainJob failed: %v", err.Error()) | |||||
| if strings.HasPrefix(err.Error(), UnknownErrorPrefix) { | |||||
| log.Info("(%s)unknown error, set temp status", req.DisplayJobName) | |||||
| err = models.InsertCloudbrainTemp(&models.CloudbrainTemp{ | |||||
| CloudbrainID: task.ID, | |||||
| Status: models.JobStatusTemp, | |||||
| Type: task.Type, | |||||
| JobName: task.JobName, | |||||
| JobType: task.JobType, | |||||
| }) | |||||
| if err != nil { | |||||
| log.Error("InsertCloudbrainTemp failed: %v", err.Error()) | |||||
| return err | |||||
| } | |||||
| } else { | |||||
| task.Status = string(models.ModelArtsTrainJobFailed) | |||||
| errTemp := models.UpdateJob(task) | |||||
| if errTemp != nil { | |||||
| log.Error("UpdateJob failed: %v", errTemp.Error()) | |||||
| } | |||||
| errTemp = models.DeleteJob(task) | |||||
| if errTemp != nil { | |||||
| log.Error("DeleteJob failed: %v", errTemp.Error()) | |||||
| } | |||||
| return err | |||||
| } | |||||
| } else { | |||||
| task.Status = TransTrainJobStatus(jobResult.Status) | |||||
| task.JobID = strconv.FormatInt(jobResult.JobID, 10) | |||||
| task.VersionID = jobResult.VersionID | |||||
| task.VersionName = jobResult.VersionName | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed: %v", err.Error()) | |||||
| return err | |||||
| } | |||||
| } | |||||
| notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, task.JobID, req.DisplayJobName, models.ActionCreateInferenceTask) | |||||
| return nil | return nil | ||||
| } | } | ||||
| @@ -799,3 +844,194 @@ func GetNotebookImageName(imageId string) (string, error) { | |||||
| return imageName, nil | return imageName, nil | ||||
| } | } | ||||
| func HandleTrainJobInfo(task *models.Cloudbrain) error { | |||||
| if isTempJob(task.JobID, task.Status) { | |||||
| if task.VersionCount > VersionCountOne { | |||||
| //multi version | |||||
| result, err := GetTrainJobVersionList(1000, 1, strings.TrimPrefix(task.JobID, models.TempJobIdPrefix)) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJobVersionList failed:%v", err) | |||||
| return err | |||||
| } | |||||
| if result != nil { | |||||
| if strconv.FormatInt(result.JobID, 10) == task.JobID && result.JobName == task.JobName { | |||||
| if result.VersionCount == int64(task.VersionCount) { | |||||
| log.Info("find the record(%s)", task.DisplayJobName) | |||||
| task.Status = TransTrainJobStatus(result.JobVersionList[0].IntStatus) | |||||
| task.VersionName = result.JobVersionList[0].VersionName | |||||
| task.VersionID = result.JobVersionList[0].VersionID | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||||
| return err | |||||
| } | |||||
| temp, err := models.GetCloudbrainTempByCloudbrainID(task.ID) | |||||
| if err != nil { | |||||
| log.Error("no such temp record(%s):%v", task.DisplayJobName, err.Error()) | |||||
| } else { | |||||
| err = models.DeleteCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err) | |||||
| } | |||||
| } | |||||
| return nil | |||||
| } else { | |||||
| log.Error("can not find the record(%s) until now", task.DisplayJobName) | |||||
| } | |||||
| } else { | |||||
| log.Error("can not find the record(%s) until now", task.DisplayJobName) | |||||
| } | |||||
| } | |||||
| } else { | |||||
| //inference or one version | |||||
| result, err := GetTrainJobList(1000, 1, "create_time", "desc", task.JobName) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJobList failed:%v", err) | |||||
| return err | |||||
| } | |||||
| if result != nil { | |||||
| for _, job := range result.JobList { | |||||
| if task.JobName == job.JobName { | |||||
| log.Info("find the record(%s)", task.DisplayJobName) | |||||
| task.Status = TransTrainJobStatus(job.IntStatus) | |||||
| task.JobID = strconv.FormatInt(job.JobID, 10) | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err) | |||||
| return err | |||||
| } | |||||
| temp, err := models.GetCloudbrainTempByCloudbrainID(task.ID) | |||||
| if err != nil { | |||||
| log.Error("no such temp record(%s):%v", task.DisplayJobName, err.Error()) | |||||
| return err | |||||
| } | |||||
| err = models.DeleteCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err) | |||||
| return err | |||||
| } | |||||
| return nil | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| } else { | |||||
| //normal | |||||
| result, err := GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJob(%s) failed:%v", task.DisplayJobName, err) | |||||
| return err | |||||
| } | |||||
| if result != nil { | |||||
| task.Status = TransTrainJobStatus(result.IntStatus) | |||||
| task.Duration = result.Duration / 1000 | |||||
| task.TrainJobDuration = result.TrainJobDuration | |||||
| if task.StartTime == 0 && result.StartTime > 0 { | |||||
| task.StartTime = timeutil.TimeStamp(result.StartTime / 1000) | |||||
| } | |||||
| task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||||
| if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { | |||||
| task.EndTime = task.StartTime.Add(task.Duration) | |||||
| } | |||||
| task.CorrectCreateUnix() | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||||
| return err | |||||
| } | |||||
| } | |||||
| } | |||||
| return nil | |||||
| } | |||||
| func HandleNotebookInfo(task *models.Cloudbrain) error { | |||||
| if isTempJob(task.JobID, task.Status) { | |||||
| result, err := GetNotebookList(1000, 0, "createTime", "DESC", task.JobName) | |||||
| if err != nil { | |||||
| log.Error("GetNotebookList failed:%v", err) | |||||
| return err | |||||
| } | |||||
| if result != nil { | |||||
| count, err := models.GetCloudbrainCountByJobName(task.JobName, task.JobType) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainCountByJobName failed:%v", err) | |||||
| return err | |||||
| } | |||||
| if len(result.NotebookList) == count { | |||||
| if result.NotebookList[0].JobName == task.JobName { | |||||
| log.Info("find the record(%s)", task.DisplayJobName) | |||||
| task.Status = result.NotebookList[0].Status | |||||
| task.JobID = result.NotebookList[0].JobID | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||||
| return err | |||||
| } | |||||
| temp, err := models.GetCloudbrainTempByCloudbrainID(task.ID) | |||||
| if err != nil { | |||||
| log.Error("no such temp record(%s):%v", task.DisplayJobName, err.Error()) | |||||
| return err | |||||
| } | |||||
| err = models.DeleteCloudbrainTemp(temp) | |||||
| if err != nil { | |||||
| log.Error("DeleteCloudbrainTemp(%s) failed:%v", task.DisplayJobName, err) | |||||
| return err | |||||
| } | |||||
| return nil | |||||
| } else { | |||||
| log.Error("can not find the record(%s) until now", task.DisplayJobName) | |||||
| } | |||||
| } else { | |||||
| log.Error("can not find the record(%s) until now", task.DisplayJobName) | |||||
| } | |||||
| } else { | |||||
| log.Error("can not find the record(%s) until now", task.DisplayJobName) | |||||
| } | |||||
| } else { | |||||
| //normal | |||||
| result, err := GetNotebook2(task.JobID) | |||||
| if err != nil { | |||||
| log.Error("GetNotebook2(%s) failed:%v", task.DisplayJobName, err) | |||||
| return err | |||||
| } | |||||
| if result != nil { | |||||
| task.Status = result.Status | |||||
| if task.StartTime == 0 && result.Lease.UpdateTime > 0 { | |||||
| task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000) | |||||
| } | |||||
| if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { | |||||
| task.EndTime = timeutil.TimeStampNow() | |||||
| } | |||||
| task.CorrectCreateUnix() | |||||
| task.ComputeAndSetDuration() | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err) | |||||
| return err | |||||
| } | |||||
| } | |||||
| } | |||||
| return nil | |||||
| } | |||||
| func isTempJob(jobID, status string) bool { | |||||
| if (strings.HasPrefix(jobID, models.TempJobIdPrefix) && status == string(models.ModelArtsTrainJobWaiting)) || status == models.JobStatusTemp { | |||||
| return true | |||||
| } | |||||
| return false | |||||
| } | |||||
| @@ -37,6 +37,7 @@ const ( | |||||
| NotebookNotFound = "ModelArts.6404" | NotebookNotFound = "ModelArts.6404" | ||||
| NotebookNoPermission = "ModelArts.6407" | NotebookNoPermission = "ModelArts.6407" | ||||
| NotebookInvalid = "ModelArts.6400" | NotebookInvalid = "ModelArts.6400" | ||||
| UnknownErrorPrefix = "UNKNOWN:" | |||||
| ) | ) | ||||
| func getRestyClient() *resty.Client { | func getRestyClient() *resty.Client { | ||||
| @@ -298,6 +299,10 @@ sendjob: | |||||
| return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | ||||
| } | } | ||||
| if res.StatusCode() == http.StatusBadGateway { | |||||
| return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||||
| } | |||||
| if len(response.ErrorCode) != 0 { | if len(response.ErrorCode) != 0 { | ||||
| log.Error("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | log.Error("ManageNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | ||||
| if response.ErrorCode == modelartsIllegalToken && retry < 1 { | if response.ErrorCode == modelartsIllegalToken && retry < 1 { | ||||
| @@ -547,9 +552,6 @@ sendjob: | |||||
| return nil, fmt.Errorf("resty create train-job: %s", err) | return nil, fmt.Errorf("resty create train-job: %s", err) | ||||
| } | } | ||||
| req, _ := json.Marshal(createJobParams) | |||||
| log.Info("%s", req) | |||||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | ||||
| retry++ | retry++ | ||||
| _ = getToken() | _ = getToken() | ||||
| @@ -563,17 +565,21 @@ sendjob: | |||||
| return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | ||||
| } | } | ||||
| log.Error("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | log.Error("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | ||||
| BootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'." | |||||
| DataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'." | |||||
| if temp.ErrorMsg == BootFileErrorMsg { | |||||
| bootFileErrorMsg := "Invalid OBS path '" + createJobParams.Config.BootFileUrl + "'." | |||||
| dataSetErrorMsg := "Invalid OBS path '" + createJobParams.Config.DataUrl + "'." | |||||
| if temp.ErrorMsg == bootFileErrorMsg { | |||||
| log.Error("启动文件错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | log.Error("启动文件错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | ||||
| return &result, fmt.Errorf("启动文件错误!") | return &result, fmt.Errorf("启动文件错误!") | ||||
| } | } | ||||
| if temp.ErrorMsg == DataSetErrorMsg { | |||||
| if temp.ErrorMsg == dataSetErrorMsg { | |||||
| log.Error("数据集错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | log.Error("数据集错误!createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | ||||
| return &result, fmt.Errorf("数据集错误!") | return &result, fmt.Errorf("数据集错误!") | ||||
| } | } | ||||
| return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| if res.StatusCode() == http.StatusBadGateway { | |||||
| return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } else { | |||||
| return &result, fmt.Errorf("createTrainJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } | |||||
| } | } | ||||
| if !result.IsSuccess { | if !result.IsSuccess { | ||||
| @@ -603,9 +609,6 @@ sendjob: | |||||
| return nil, fmt.Errorf("resty create train-job version: %s", err) | return nil, fmt.Errorf("resty create train-job version: %s", err) | ||||
| } | } | ||||
| req, _ := json.Marshal(createJobVersionParams) | |||||
| log.Info("%s", req) | |||||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | ||||
| retry++ | retry++ | ||||
| _ = getToken() | _ = getToken() | ||||
| @@ -618,17 +621,23 @@ sendjob: | |||||
| log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | ||||
| return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | ||||
| } | } | ||||
| BootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'." | |||||
| DataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'." | |||||
| if temp.ErrorMsg == BootFileErrorMsg { | |||||
| log.Error("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| bootFileErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.BootFileUrl + "'." | |||||
| dataSetErrorMsg := "Invalid OBS path '" + createJobVersionParams.Config.DataUrl + "'." | |||||
| if temp.ErrorMsg == bootFileErrorMsg { | |||||
| log.Error("启动文件错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | log.Error("启动文件错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | ||||
| return &result, fmt.Errorf("启动文件错误!") | return &result, fmt.Errorf("启动文件错误!") | ||||
| } | } | ||||
| if temp.ErrorMsg == DataSetErrorMsg { | |||||
| if temp.ErrorMsg == dataSetErrorMsg { | |||||
| log.Error("数据集错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | log.Error("数据集错误!createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | ||||
| return &result, fmt.Errorf("数据集错误!") | return &result, fmt.Errorf("数据集错误!") | ||||
| } | } | ||||
| return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| if res.StatusCode() == http.StatusBadGateway { | |||||
| return &result, fmt.Errorf(UnknownErrorPrefix+"createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } else { | |||||
| return &result, fmt.Errorf("createTrainJobVersion failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } | |||||
| } | } | ||||
| if !result.IsSuccess { | if !result.IsSuccess { | ||||
| @@ -761,9 +770,6 @@ sendjob: | |||||
| goto sendjob | goto sendjob | ||||
| } | } | ||||
| //temp, _ := json.Marshal(req) | |||||
| //log.Info("%s", temp) | |||||
| if res.StatusCode() != http.StatusOK { | if res.StatusCode() != http.StatusOK { | ||||
| var temp models.ErrorResult | var temp models.ErrorResult | ||||
| if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | ||||
| @@ -1172,7 +1178,11 @@ sendjob: | |||||
| log.Error("数据集错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | log.Error("数据集错误!createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | ||||
| return &result, fmt.Errorf("数据集错误!") | return &result, fmt.Errorf("数据集错误!") | ||||
| } | } | ||||
| return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| if res.StatusCode() == http.StatusBadGateway { | |||||
| return &result, fmt.Errorf(UnknownErrorPrefix+"createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } else { | |||||
| return &result, fmt.Errorf("createInferenceJob failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| } | |||||
| } | } | ||||
| if !result.IsSuccess { | if !result.IsSuccess { | ||||
| @@ -1212,7 +1222,11 @@ sendjob: | |||||
| err = json.Unmarshal(res.Body(), &response) | err = json.Unmarshal(res.Body(), &response) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("json.Unmarshal failed: %s", err.Error()) | log.Error("json.Unmarshal failed: %s", err.Error()) | ||||
| return &result, fmt.Errorf("son.Unmarshal failed: %s", err.Error()) | |||||
| return &result, fmt.Errorf("json.Unmarshal failed: %s", err.Error()) | |||||
| } | |||||
| if res.StatusCode() == http.StatusBadGateway { | |||||
| return &result, fmt.Errorf(UnknownErrorPrefix+"createNotebook2 failed(%s): %s", response.ErrorCode, response.ErrorMsg) | |||||
| } | } | ||||
| if len(response.ErrorCode) != 0 { | if len(response.ErrorCode) != 0 { | ||||
| @@ -1271,3 +1285,139 @@ sendjob: | |||||
| return &result, nil | return &result, nil | ||||
| } | } | ||||
| func GetTrainJobList(perPage, page int, sortBy, order, searchContent string) (*models.GetTrainJobListResult, error) { | |||||
| checkSetting() | |||||
| client := getRestyClient() | |||||
| var result models.GetTrainJobListResult | |||||
| retry := 0 | |||||
| sendjob: | |||||
| res, err := client.R(). | |||||
| SetQueryParams(map[string]string{ | |||||
| "per_page": strconv.Itoa(perPage), | |||||
| "page": strconv.Itoa(page), | |||||
| "sortBy": sortBy, | |||||
| "order": order, | |||||
| "search_content": searchContent, | |||||
| }). | |||||
| SetAuthToken(TOKEN). | |||||
| SetResult(&result). | |||||
| Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob) | |||||
| if err != nil { | |||||
| return nil, fmt.Errorf("resty GetTrainJobList: %v", err) | |||||
| } | |||||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||||
| retry++ | |||||
| _ = getToken() | |||||
| goto sendjob | |||||
| } | |||||
| if res.StatusCode() != http.StatusOK { | |||||
| var temp models.ErrorResult | |||||
| if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||||
| log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| } | |||||
| log.Error("GetTrainJobList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| return &result, fmt.Errorf(temp.ErrorMsg) | |||||
| } | |||||
| if !result.IsSuccess { | |||||
| log.Error("GetTrainJobList failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
| return &result, fmt.Errorf(result.ErrorMsg) | |||||
| } | |||||
| return &result, nil | |||||
| } | |||||
| func GetTrainJobVersionList(perPage, page int, jobID string) (*models.GetTrainJobVersionListResult, error) { | |||||
| checkSetting() | |||||
| client := getRestyClient() | |||||
| var result models.GetTrainJobVersionListResult | |||||
| retry := 0 | |||||
| sendjob: | |||||
| res, err := client.R(). | |||||
| SetQueryParams(map[string]string{ | |||||
| "per_page": strconv.Itoa(perPage), | |||||
| "page": strconv.Itoa(page), | |||||
| }). | |||||
| SetAuthToken(TOKEN). | |||||
| SetResult(&result). | |||||
| Get(HOST + "/v1/" + setting.ProjectID + urlTrainJob + "/" + jobID + "/versions") | |||||
| if err != nil { | |||||
| return nil, fmt.Errorf("resty GetTrainJobVersionList: %v", err) | |||||
| } | |||||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||||
| retry++ | |||||
| _ = getToken() | |||||
| goto sendjob | |||||
| } | |||||
| if res.StatusCode() != http.StatusOK { | |||||
| var temp models.ErrorResult | |||||
| if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||||
| log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| } | |||||
| log.Error("GetTrainJobVersionList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| return &result, fmt.Errorf(temp.ErrorMsg) | |||||
| } | |||||
| if !result.IsSuccess { | |||||
| log.Error("GetTrainJobVersionList failed(%s): %s", result.ErrorCode, result.ErrorMsg) | |||||
| return &result, fmt.Errorf(result.ErrorMsg) | |||||
| } | |||||
| return &result, nil | |||||
| } | |||||
| func GetNotebookList(limit, offset int, sortBy, order, searchContent string) (*models.GetNotebookListResult, error) { | |||||
| checkSetting() | |||||
| client := getRestyClient() | |||||
| var result models.GetNotebookListResult | |||||
| retry := 0 | |||||
| sendjob: | |||||
| res, err := client.R(). | |||||
| SetQueryParams(map[string]string{ | |||||
| "limit": strconv.Itoa(limit), | |||||
| "offset": strconv.Itoa(offset), | |||||
| "name": searchContent, | |||||
| "sort_key": sortBy, | |||||
| "sort_dir": order, | |||||
| }). | |||||
| SetAuthToken(TOKEN). | |||||
| SetResult(&result). | |||||
| Get(HOST + "/v1/" + setting.ProjectID + urlNotebook2) | |||||
| if err != nil { | |||||
| return nil, fmt.Errorf("resty GetNotebookList: %v", err) | |||||
| } | |||||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||||
| retry++ | |||||
| _ = getToken() | |||||
| goto sendjob | |||||
| } | |||||
| if res.StatusCode() != http.StatusOK { | |||||
| var temp models.ErrorResult | |||||
| if err = json.Unmarshal([]byte(res.String()), &temp); err != nil { | |||||
| log.Error("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| return &result, fmt.Errorf("json.Unmarshal failed(%s): %v", res.String(), err.Error()) | |||||
| } | |||||
| log.Error("GetNotebookList failed(%d):%s(%s)", res.StatusCode(), temp.ErrorCode, temp.ErrorMsg) | |||||
| return &result, fmt.Errorf(temp.ErrorMsg) | |||||
| } | |||||
| return &result, nil | |||||
| } | |||||
| @@ -938,11 +938,10 @@ func RegisterRoutes(m *macaron.Macaron) { | |||||
| }, reqRepoReader(models.UnitTypeModelManage)) | }, reqRepoReader(models.UnitTypeModelManage)) | ||||
| m.Group("/modelarts", func() { | m.Group("/modelarts", func() { | ||||
| m.Group("/notebook", func() { | m.Group("/notebook", func() { | ||||
| //m.Get("/:jobid", repo.GetModelArtsNotebook) | |||||
| m.Get("/:id", repo.GetModelArtsNotebook2) | m.Get("/:id", repo.GetModelArtsNotebook2) | ||||
| }) | }) | ||||
| m.Group("/train-job", func() { | m.Group("/train-job", func() { | ||||
| m.Group("/:jobid", func() { | |||||
| m.Group("/:id", func() { | |||||
| m.Get("", repo.GetModelArtsTrainJobVersion) | m.Get("", repo.GetModelArtsTrainJobVersion) | ||||
| m.Get("/log", repo.TrainJobGetLog) | m.Get("/log", repo.TrainJobGetLog) | ||||
| m.Post("/del_version", repo.DelTrainJobVersion) | m.Post("/del_version", repo.DelTrainJobVersion) | ||||
| @@ -952,7 +951,7 @@ func RegisterRoutes(m *macaron.Macaron) { | |||||
| }) | }) | ||||
| }) | }) | ||||
| m.Group("/inference-job", func() { | m.Group("/inference-job", func() { | ||||
| m.Group("/:jobid", func() { | |||||
| m.Group("/:id", func() { | |||||
| m.Get("", repo.GetModelArtsInferenceJob) | m.Get("", repo.GetModelArtsInferenceJob) | ||||
| m.Get("/log", repo.TrainJobGetLog) | m.Get("/log", repo.TrainJobGetLog) | ||||
| m.Post("/del_version", repo.DelTrainJobVersion) | m.Post("/del_version", repo.DelTrainJobVersion) | ||||
| @@ -25,105 +25,27 @@ import ( | |||||
| routerRepo "code.gitea.io/gitea/routers/repo" | routerRepo "code.gitea.io/gitea/routers/repo" | ||||
| ) | ) | ||||
| func GetModelArtsNotebook(ctx *context.APIContext) { | |||||
| var ( | |||||
| err error | |||||
| ) | |||||
| jobID := ctx.Params(":jobid") | |||||
| repoID := ctx.Repo.Repository.ID | |||||
| job, err := models.GetRepoCloudBrainByJobID(repoID, jobID) | |||||
| if err != nil { | |||||
| ctx.NotFound(err) | |||||
| return | |||||
| } | |||||
| result, err := modelarts.GetJob(jobID) | |||||
| if err != nil { | |||||
| ctx.NotFound(err) | |||||
| return | |||||
| } | |||||
| job.Status = result.Status | |||||
| err = models.UpdateJob(job) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed:", err) | |||||
| } | |||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | |||||
| "JobID": jobID, | |||||
| "JobStatus": result.Status, | |||||
| }) | |||||
| } | |||||
| func GetModelArtsNotebook2(ctx *context.APIContext) { | func GetModelArtsNotebook2(ctx *context.APIContext) { | ||||
| var ( | var ( | ||||
| err error | err error | ||||
| ) | ) | ||||
| ID := ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(ID) | |||||
| id := ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| return | return | ||||
| } | } | ||||
| result, err := modelarts.GetNotebook2(job.JobID) | |||||
| err = modelarts.HandleNotebookInfo(job) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| return | return | ||||
| } | } | ||||
| if job.StartTime == 0 && result.Lease.UpdateTime > 0 { | |||||
| job.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000) | |||||
| } | |||||
| job.Status = result.Status | |||||
| if job.EndTime == 0 && models.IsModelArtsDebugJobTerminal(job.Status) { | |||||
| job.EndTime = timeutil.TimeStampNow() | |||||
| } | |||||
| job.CorrectCreateUnix() | |||||
| job.ComputeAndSetDuration() | |||||
| err = models.UpdateJob(job) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed:", err) | |||||
| } | |||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
| "ID": ID, | |||||
| "ID": id, | |||||
| "JobName": job.JobName, | "JobName": job.JobName, | ||||
| "JobStatus": result.Status, | |||||
| }) | |||||
| } | |||||
| func GetModelArtsTrainJob(ctx *context.APIContext) { | |||||
| var ( | |||||
| err error | |||||
| ) | |||||
| jobID := ctx.Params(":jobid") | |||||
| repoID := ctx.Repo.Repository.ID | |||||
| job, err := models.GetRepoCloudBrainByJobID(repoID, jobID) | |||||
| if err != nil { | |||||
| ctx.NotFound(err) | |||||
| return | |||||
| } | |||||
| result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10)) | |||||
| if err != nil { | |||||
| ctx.NotFound(err) | |||||
| return | |||||
| } | |||||
| job.Status = modelarts.TransTrainJobStatus(result.IntStatus) | |||||
| job.Duration = result.Duration | |||||
| job.TrainJobDuration = result.TrainJobDuration | |||||
| err = models.UpdateJob(job) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed:", err) | |||||
| } | |||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | |||||
| "JobID": jobID, | |||||
| "JobStatus": job.Status, | |||||
| "JobDuration": job.Duration, | |||||
| "JobStatus": job.Status, | |||||
| }) | }) | ||||
| } | } | ||||
| @@ -134,9 +56,8 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { | |||||
| aiCenterName string | aiCenterName string | ||||
| ) | ) | ||||
| jobID := ctx.Params(":jobid") | |||||
| versionName := ctx.Query("version_name") | |||||
| job, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||||
| id := ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| return | return | ||||
| @@ -174,29 +95,13 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { | |||||
| } | } | ||||
| } | } | ||||
| } else if job.Type == models.TypeCloudBrainTwo { | } else if job.Type == models.TypeCloudBrainTwo { | ||||
| result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10)) | |||||
| err := modelarts.HandleTrainJobInfo(job) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| return | return | ||||
| } | } | ||||
| if job.StartTime == 0 && result.StartTime > 0 { | |||||
| job.StartTime = timeutil.TimeStamp(result.StartTime / 1000) | |||||
| } | |||||
| job.Status = modelarts.TransTrainJobStatus(result.IntStatus) | |||||
| job.Duration = result.Duration / 1000 | |||||
| job.TrainJobDuration = models.ConvertDurationToStr(job.Duration) | |||||
| if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 { | |||||
| job.EndTime = job.StartTime.Add(job.Duration) | |||||
| } | |||||
| job.CorrectCreateUnix() | |||||
| err = models.UpdateTrainJobVersion(job) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed:", err) | |||||
| } | |||||
| } else if job.Type == models.TypeC2Net { | } else if job.Type == models.TypeC2Net { | ||||
| result, err := grampus.GetJob(jobID) | |||||
| result, err := grampus.GetJob(job.JobID) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetJob(%s) failed:%v", job.JobName, err) | log.Error("GetJob(%s) failed:%v", job.JobName, err) | ||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| @@ -235,7 +140,8 @@ func GetModelArtsTrainJobVersion(ctx *context.APIContext) { | |||||
| } | } | ||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
| "JobID": jobID, | |||||
| "JobID": job.JobID, | |||||
| "ID": id, | |||||
| "JobStatus": job.Status, | "JobStatus": job.Status, | ||||
| "JobDuration": job.TrainJobDuration, | "JobDuration": job.TrainJobDuration, | ||||
| "AiCenter": aiCenterName, | "AiCenter": aiCenterName, | ||||
| @@ -317,7 +223,7 @@ func TrainJobGetLog(ctx *context.APIContext) { | |||||
| err error | err error | ||||
| ) | ) | ||||
| var jobID = ctx.Params(":jobid") | |||||
| var id = ctx.Params(":id") | |||||
| var versionName = ctx.Query("version_name") | var versionName = ctx.Query("version_name") | ||||
| var baseLine = ctx.Query("base_line") | var baseLine = ctx.Query("base_line") | ||||
| var order = ctx.Query("order") | var order = ctx.Query("order") | ||||
| @@ -335,14 +241,19 @@ func TrainJobGetLog(ctx *context.APIContext) { | |||||
| return | return | ||||
| } | } | ||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||||
| temp, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) | |||||
| log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) | |||||
| return | return | ||||
| } | } | ||||
| resultLogFile, result, err := trainJobGetLogContent(jobID, task.VersionID, baseLine, order, lines_int) | |||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(temp.JobID, versionName) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("trainJobGetLog(%s) failed:%v", jobID, err.Error()) | |||||
| log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) | |||||
| return | |||||
| } | |||||
| resultLogFile, result, err := trainJobGetLogContent(task.JobID, task.VersionID, baseLine, order, lines_int) | |||||
| if err != nil { | |||||
| log.Error("trainJobGetLog(%s) failed:%v", task.JobID, err.Error()) | |||||
| // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) | // ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) | ||||
| return | return | ||||
| } | } | ||||
| @@ -359,7 +270,7 @@ func TrainJobGetLog(ctx *context.APIContext) { | |||||
| ctx.Data["log_file_name"] = resultLogFile.LogFileList[0] | ctx.Data["log_file_name"] = resultLogFile.LogFileList[0] | ||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
| "JobID": jobID, | |||||
| "ID": id, | |||||
| "LogFileName": resultLogFile.LogFileList[0], | "LogFileName": resultLogFile.LogFileList[0], | ||||
| "StartLine": result.StartLine, | "StartLine": result.StartLine, | ||||
| "EndLine": result.EndLine, | "EndLine": result.EndLine, | ||||
| @@ -391,17 +302,16 @@ func DelTrainJobVersion(ctx *context.APIContext) { | |||||
| err error | err error | ||||
| ) | ) | ||||
| var jobID = ctx.Params(":jobid") | |||||
| var versionName = ctx.Query("version_name") | |||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||||
| var id = ctx.Params(":id") | |||||
| task, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) | |||||
| log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) | |||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| return | return | ||||
| } | } | ||||
| //删除modelarts上的记录 | //删除modelarts上的记录 | ||||
| _, err = modelarts.DelTrainJobVersion(jobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| _, err = modelarts.DelTrainJobVersion(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("DelTrainJobVersion(%s) failed:%v", task.JobName, err.Error()) | log.Error("DelTrainJobVersion(%s) failed:%v", task.JobName, err.Error()) | ||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| @@ -424,7 +334,7 @@ func DelTrainJobVersion(ctx *context.APIContext) { | |||||
| RepoID: repo.ID, | RepoID: repo.ID, | ||||
| Type: models.TypeCloudBrainTwo, | Type: models.TypeCloudBrainTwo, | ||||
| JobTypes: jobTypes, | JobTypes: jobTypes, | ||||
| JobID: jobID, | |||||
| JobID: task.JobID, | |||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| ctx.ServerError("get VersionListCount failed", err) | ctx.ServerError("get VersionListCount failed", err) | ||||
| @@ -433,13 +343,13 @@ func DelTrainJobVersion(ctx *context.APIContext) { | |||||
| if VersionListCount > 0 { | if VersionListCount > 0 { | ||||
| // 判断当前删掉的任务是否是最新版本,若是,将排序后的TotalVersionCount置为删掉的最新版本的TotalVersionCount,若不是,按时间排序后的版本列表的第一个版本设置为最新版本,TotalVersionCount不变 | // 判断当前删掉的任务是否是最新版本,若是,将排序后的TotalVersionCount置为删掉的最新版本的TotalVersionCount,若不是,按时间排序后的版本列表的第一个版本设置为最新版本,TotalVersionCount不变 | ||||
| if task.IsLatestVersion == modelarts.IsLatestVersion { | if task.IsLatestVersion == modelarts.IsLatestVersion { | ||||
| err = models.SetVersionCountAndLatestVersion(jobID, VersionTaskList[0].Cloudbrain.VersionName, VersionListCount, modelarts.IsLatestVersion, task.TotalVersionCount) | |||||
| err = models.SetVersionCountAndLatestVersion(task.JobName, VersionTaskList[0].Cloudbrain.VersionName, VersionListCount, modelarts.IsLatestVersion, task.TotalVersionCount) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.ServerError("UpdateJobVersionCount failed", err) | ctx.ServerError("UpdateJobVersionCount failed", err) | ||||
| return | return | ||||
| } | } | ||||
| } else { | } else { | ||||
| err = models.SetVersionCountAndLatestVersion(jobID, VersionTaskList[0].VersionName, VersionListCount, modelarts.IsLatestVersion, VersionTaskList[0].Cloudbrain.TotalVersionCount) | |||||
| err = models.SetVersionCountAndLatestVersion(task.JobName, VersionTaskList[0].VersionName, VersionListCount, modelarts.IsLatestVersion, VersionTaskList[0].Cloudbrain.TotalVersionCount) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.ServerError("UpdateJobVersionCount failed", err) | ctx.ServerError("UpdateJobVersionCount failed", err) | ||||
| return | return | ||||
| @@ -450,8 +360,8 @@ func DelTrainJobVersion(ctx *context.APIContext) { | |||||
| } | } | ||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
| "JobID": jobID, | |||||
| "VersionName": versionName, | |||||
| "ID": id, | |||||
| "VersionName": task.VersionName, | |||||
| "StatusOK": 0, | "StatusOK": 0, | ||||
| "VersionListCount": VersionListCount, | "VersionListCount": VersionListCount, | ||||
| }) | }) | ||||
| @@ -461,23 +371,23 @@ func StopTrainJobVersion(ctx *context.APIContext) { | |||||
| var ( | var ( | ||||
| err error | err error | ||||
| ) | ) | ||||
| var jobID = ctx.Params(":jobid") | |||||
| var versionName = ctx.Query("version_name") | |||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||||
| var id = ctx.Params(":id") | |||||
| task, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) | |||||
| log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) | |||||
| ctx.NotFound(err) | |||||
| return | return | ||||
| } | } | ||||
| _, err = modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| _, err = modelarts.StopTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) | log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) | ||||
| return | return | ||||
| } | } | ||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
| "JobID": jobID, | |||||
| "VersionName": versionName, | |||||
| "ID": id, | |||||
| "VersionName": task.VersionName, | |||||
| "StatusOK": 0, | "StatusOK": 0, | ||||
| }) | }) | ||||
| } | } | ||||
| @@ -487,19 +397,19 @@ func ModelList(ctx *context.APIContext) { | |||||
| err error | err error | ||||
| ) | ) | ||||
| var jobID = ctx.Params(":jobid") | |||||
| var versionName = ctx.Query("version_name") | |||||
| parentDir := ctx.Query("parentDir") | parentDir := ctx.Query("parentDir") | ||||
| dirArray := strings.Split(parentDir, "/") | dirArray := strings.Split(parentDir, "/") | ||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||||
| var id = ctx.Params(":id") | |||||
| task, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) | |||||
| log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) | |||||
| ctx.NotFound(err) | |||||
| return | return | ||||
| } | } | ||||
| var fileInfos []storage.FileInfo | var fileInfos []storage.FileInfo | ||||
| if task.ComputeResource == models.NPUResource { | if task.ComputeResource == models.NPUResource { | ||||
| fileInfos, err = storage.GetObsListObject(task.JobName, "output/", parentDir, versionName) | |||||
| fileInfos, err = storage.GetObsListObject(task.JobName, "output/", parentDir, task.VersionName) | |||||
| if err != nil { | if err != nil { | ||||
| log.Info("get TrainJobListModel failed:", err) | log.Info("get TrainJobListModel failed:", err) | ||||
| ctx.ServerError("GetObsListObject:", err) | ctx.ServerError("GetObsListObject:", err) | ||||
| @@ -522,8 +432,8 @@ func ModelList(ctx *context.APIContext) { | |||||
| } | } | ||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
| "JobID": jobID, | |||||
| "VersionName": versionName, | |||||
| "ID": id, | |||||
| "VersionName": task.VersionName, | |||||
| "StatusOK": 0, | "StatusOK": 0, | ||||
| "Path": dirArray, | "Path": dirArray, | ||||
| "Dirs": fileInfos, | "Dirs": fileInfos, | ||||
| @@ -537,35 +447,20 @@ func GetModelArtsInferenceJob(ctx *context.APIContext) { | |||||
| err error | err error | ||||
| ) | ) | ||||
| jobID := ctx.Params(":jobid") | |||||
| job, err := models.GetCloudbrainByJobID(jobID) | |||||
| id := ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| return | return | ||||
| } | } | ||||
| result, err := modelarts.GetTrainJob(jobID, strconv.FormatInt(job.VersionID, 10)) | |||||
| err = modelarts.HandleTrainJobInfo(job) | |||||
| if err != nil { | if err != nil { | ||||
| ctx.NotFound(err) | ctx.NotFound(err) | ||||
| return | return | ||||
| } | } | ||||
| if job.StartTime == 0 && result.StartTime > 0 { | |||||
| job.StartTime = timeutil.TimeStamp(result.StartTime / 1000) | |||||
| } | |||||
| job.Status = modelarts.TransTrainJobStatus(result.IntStatus) | |||||
| job.Duration = result.Duration / 1000 | |||||
| job.TrainJobDuration = models.ConvertDurationToStr(job.Duration) | |||||
| if job.EndTime == 0 && models.IsTrainJobTerminal(job.Status) && job.StartTime > 0 { | |||||
| job.EndTime = job.StartTime.Add(job.Duration) | |||||
| } | |||||
| job.CorrectCreateUnix() | |||||
| err = models.UpdateInferenceJob(job) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob failed:", err) | |||||
| } | |||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
| "JobID": jobID, | |||||
| "ID": id, | |||||
| "JobStatus": job.Status, | "JobStatus": job.Status, | ||||
| "JobDuration": job.TrainJobDuration, | "JobDuration": job.TrainJobDuration, | ||||
| }) | }) | ||||
| @@ -577,16 +472,15 @@ func ResultList(ctx *context.APIContext) { | |||||
| err error | err error | ||||
| ) | ) | ||||
| var jobID = ctx.Params(":jobid") | |||||
| var versionName = ctx.Query("version_name") | |||||
| var id = ctx.Params(":id") | |||||
| parentDir := ctx.Query("parentDir") | parentDir := ctx.Query("parentDir") | ||||
| dirArray := strings.Split(parentDir, "/") | dirArray := strings.Split(parentDir, "/") | ||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||||
| task, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) | |||||
| log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) | |||||
| return | return | ||||
| } | } | ||||
| models, err := storage.GetObsListObject(task.JobName, "result/", parentDir, versionName) | |||||
| models, err := storage.GetObsListObject(task.JobName, "result/", parentDir, task.VersionName) | |||||
| if err != nil { | if err != nil { | ||||
| log.Info("get TrainJobListModel failed:", err) | log.Info("get TrainJobListModel failed:", err) | ||||
| ctx.ServerError("GetObsListObject:", err) | ctx.ServerError("GetObsListObject:", err) | ||||
| @@ -594,8 +488,8 @@ func ResultList(ctx *context.APIContext) { | |||||
| } | } | ||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
| "JobID": jobID, | |||||
| "VersionName": versionName, | |||||
| "ID": id, | |||||
| "VersionName": task.VersionName, | |||||
| "StatusOK": 0, | "StatusOK": 0, | ||||
| "Path": dirArray, | "Path": dirArray, | ||||
| "Dirs": models, | "Dirs": models, | ||||
| @@ -609,38 +503,37 @@ func TrainJobGetMetricStatistic(ctx *context.APIContext) { | |||||
| err error | err error | ||||
| ) | ) | ||||
| var jobID = ctx.Params(":jobid") | |||||
| var versionName = ctx.Query("version_name") | |||||
| var id = ctx.Params(":id") | |||||
| result, err := trainJobGetMetricStatistic(jobID, versionName) | |||||
| result, err := trainJobGetMetricStatistic(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("trainJobGetMetricStatistic(%s) failed:%v", jobID, err.Error()) | |||||
| log.Error("trainJobGetMetricStatistic(%s) failed:%v", id, err.Error()) | |||||
| return | return | ||||
| } | } | ||||
| ctx.JSON(http.StatusOK, map[string]interface{}{ | ctx.JSON(http.StatusOK, map[string]interface{}{ | ||||
| "JobID": jobID, | |||||
| "ID": id, | |||||
| "Interval": result.Interval, | "Interval": result.Interval, | ||||
| "MetricsInfo": result.MetricsInfo, | "MetricsInfo": result.MetricsInfo, | ||||
| }) | }) | ||||
| } | } | ||||
| func trainJobGetMetricStatistic(jobID string, versionName string) (*models.GetTrainJobMetricStatisticResult, error) { | |||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||||
| func trainJobGetMetricStatistic(id string) (*models.GetTrainJobMetricStatisticResult, error) { | |||||
| task, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) | |||||
| log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) | |||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| resultLogFile, err := modelarts.GetTrainJobLogFileNames(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error()) | |||||
| log.Error("GetTrainJobLogFileNames(%s) failed:%v", task.JobID, err.Error()) | |||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| result, err := modelarts.GetTrainJobMetricStatistic(jobID, strconv.FormatInt(task.VersionID, 10), resultLogFile.LogFileList[0]) | |||||
| result, err := modelarts.GetTrainJobMetricStatistic(task.JobID, strconv.FormatInt(task.VersionID, 10), resultLogFile.LogFileList[0]) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetTrainJobMetricStatistic(%s) failed:%v", jobID, err.Error()) | |||||
| log.Error("GetTrainJobMetricStatistic(%s) failed:%v", task.JobID, err.Error()) | |||||
| return nil, err | return nil, err | ||||
| } | } | ||||
| @@ -373,7 +373,6 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { | |||||
| } | } | ||||
| } | } | ||||
| func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBrainInferencForm) { | func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBrainInferencForm) { | ||||
| ctx.Data["PageIsCloudBrain"] = true | ctx.Data["PageIsCloudBrain"] = true | ||||
| displayJobName := form.DisplayJobName | displayJobName := form.DisplayJobName | ||||
| @@ -494,6 +493,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra | |||||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/inference-job") | ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/inference-job") | ||||
| } | } | ||||
| /** | /** | ||||
| 检查用户传输的参数是否符合专属资源池 | 检查用户传输的参数是否符合专属资源池 | ||||
| */ | */ | ||||
| @@ -1214,7 +1214,7 @@ func StopJobs(cloudBrains []*models.Cloudbrain) { | |||||
| Action: models.ActionStop, | Action: models.ActionStop, | ||||
| } | } | ||||
| err := retry(3, time.Second*30, func() error { | err := retry(3, time.Second*30, func() error { | ||||
| _, err := modelarts.ManageNotebook(taskInfo.JobID, param) | |||||
| _, err := modelarts.ManageNotebook2(taskInfo.JobID, param) | |||||
| return err | return err | ||||
| }) | }) | ||||
| logErrorAndUpdateJobStatus(err, taskInfo) | logErrorAndUpdateJobStatus(err, taskInfo) | ||||
| @@ -1765,62 +1765,24 @@ func SyncCloudbrainStatus() { | |||||
| } | } | ||||
| } else if task.Type == models.TypeCloudBrainTwo { | } else if task.Type == models.TypeCloudBrainTwo { | ||||
| if task.JobType == string(models.JobTypeDebug) { | if task.JobType == string(models.JobTypeDebug) { | ||||
| //result, err := modelarts.GetJob(task.JobID) | |||||
| result, err := modelarts.GetNotebook2(task.JobID) | |||||
| err := modelarts.HandleNotebookInfo(task) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetJob(%s) failed:%v", task.JobName, err) | |||||
| log.Error("HandleNotebookInfo(%s) failed:%v", task.DisplayJobName, err) | |||||
| continue | continue | ||||
| } | } | ||||
| if result != nil { | |||||
| task.Status = result.Status | |||||
| if task.StartTime == 0 && result.Lease.UpdateTime > 0 { | |||||
| task.StartTime = timeutil.TimeStamp(result.Lease.UpdateTime / 1000) | |||||
| } | |||||
| if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { | |||||
| task.EndTime = timeutil.TimeStampNow() | |||||
| } | |||||
| task.CorrectCreateUnix() | |||||
| task.ComputeAndSetDuration() | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||||
| continue | |||||
| } | |||||
| } | |||||
| } else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) { | } else if task.JobType == string(models.JobTypeTrain) || task.JobType == string(models.JobTypeInference) { | ||||
| result, err := modelarts.GetTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| err := modelarts.HandleTrainJobInfo(task) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetTrainJob(%s) failed:%v", task.JobName, err) | |||||
| log.Error("HandleTrainJobInfo(%s) failed:%v", task.DisplayJobName, err) | |||||
| continue | continue | ||||
| } | } | ||||
| if result != nil { | |||||
| task.Status = modelarts.TransTrainJobStatus(result.IntStatus) | |||||
| task.Duration = result.Duration / 1000 | |||||
| task.TrainJobDuration = result.TrainJobDuration | |||||
| if task.StartTime == 0 && result.StartTime > 0 { | |||||
| task.StartTime = timeutil.TimeStamp(result.StartTime / 1000) | |||||
| } | |||||
| task.TrainJobDuration = models.ConvertDurationToStr(task.Duration) | |||||
| if task.EndTime == 0 && models.IsTrainJobTerminal(task.Status) && task.StartTime > 0 { | |||||
| task.EndTime = task.StartTime.Add(task.Duration) | |||||
| } | |||||
| task.CorrectCreateUnix() | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||||
| continue | |||||
| } | |||||
| } | |||||
| } else { | } else { | ||||
| log.Error("task.JobType(%s) is error:%s", task.JobName, task.JobType) | |||||
| log.Error("task.JobType(%s) is error:%s", task.DisplayJobName, task.JobType) | |||||
| } | } | ||||
| } else if task.Type == models.TypeC2Net { | } else if task.Type == models.TypeC2Net { | ||||
| result, err := grampus.GetJob(task.JobID) | result, err := grampus.GetJob(task.JobID) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetTrainJob(%s) failed:%v", task.JobName, err) | |||||
| log.Error("GetTrainJob(%s) failed:%v", task.DisplayJobName, err) | |||||
| continue | continue | ||||
| } | } | ||||
| @@ -1841,12 +1803,12 @@ func SyncCloudbrainStatus() { | |||||
| task.CorrectCreateUnix() | task.CorrectCreateUnix() | ||||
| err = models.UpdateJob(task) | err = models.UpdateJob(task) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err) | |||||
| log.Error("UpdateJob(%s) failed:%v", task.DisplayJobName, err) | |||||
| continue | continue | ||||
| } | } | ||||
| } | } | ||||
| } else { | } else { | ||||
| log.Error("task.Type(%s) is error:%d", task.JobName, task.Type) | |||||
| log.Error("task.Type(%s) is error:%d", task.DisplayJobName, task.Type) | |||||
| } | } | ||||
| } | } | ||||
| @@ -336,7 +336,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||||
| EngineName: image, | EngineName: image, | ||||
| DatasetName: attachment.Name, | DatasetName: attachment.Name, | ||||
| IsLatestVersion: modelarts.IsLatestVersion, | IsLatestVersion: modelarts.IsLatestVersion, | ||||
| VersionCount: modelarts.VersionCount, | |||||
| VersionCount: modelarts.VersionCountOne, | |||||
| WorkServerNumber: 1, | WorkServerNumber: 1, | ||||
| } | } | ||||
| @@ -386,7 +386,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, form auth.CreateGrampusTrain | |||||
| branchName := form.BranchName | branchName := form.BranchName | ||||
| isLatestVersion := modelarts.IsLatestVersion | isLatestVersion := modelarts.IsLatestVersion | ||||
| flavorName := form.FlavorName | flavorName := form.FlavorName | ||||
| versionCount := modelarts.VersionCount | |||||
| versionCount := modelarts.VersionCountOne | |||||
| engineName := form.EngineName | engineName := form.EngineName | ||||
| if !jobNamePattern.MatchString(displayJobName) { | if !jobNamePattern.MatchString(displayJobName) { | ||||
| @@ -7,6 +7,7 @@ import ( | |||||
| "fmt" | "fmt" | ||||
| "io" | "io" | ||||
| "io/ioutil" | "io/ioutil" | ||||
| "math/rand" | |||||
| "net/http" | "net/http" | ||||
| "os" | "os" | ||||
| "path" | "path" | ||||
| @@ -262,28 +263,15 @@ func NotebookShow(ctx *context.Context) { | |||||
| return | return | ||||
| } | } | ||||
| result, err := modelarts.GetNotebook2(task.JobID) | |||||
| if err != nil { | |||||
| log.Error("GET job error", err.Error()) | |||||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||||
| return | |||||
| } | |||||
| if result != nil { | |||||
| if task.DeletedAt.IsZero() { //normal record | |||||
| if task.Status != result.Status { | |||||
| task.Status = result.Status | |||||
| models.ParseAndSetDurationFromModelArtsNotebook(result, task) | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("GET job error", err.Error()) | |||||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||||
| return | |||||
| } | |||||
| } | |||||
| } else { //deleted record | |||||
| if task.DeletedAt.IsZero() { //normal record | |||||
| err := modelarts.HandleNotebookInfo(task) | |||||
| if err != nil { | |||||
| ctx.Data["error"] = err.Error() | |||||
| ctx.RenderWithErr(err.Error(), tplModelArtsNotebookShow, nil) | |||||
| return | |||||
| } | } | ||||
| } else { //deleted record | |||||
| } | } | ||||
| datasetDownload := make([]models.DatasetDownload, 0) | datasetDownload := make([]models.DatasetDownload, 0) | ||||
| @@ -394,82 +382,141 @@ func NotebookDebug2(ctx *context.Context) { | |||||
| ctx.Redirect(result.Url + "?token=" + result.Token) | ctx.Redirect(result.Url + "?token=" + result.Token) | ||||
| } | } | ||||
| func NotebookManage(ctx *context.Context) { | |||||
| func NotebookRestart(ctx *context.Context) { | |||||
| var ID = ctx.Params(":id") | var ID = ctx.Params(":id") | ||||
| var action = ctx.Params(":action") | |||||
| var resultCode = "0" | |||||
| var resultCode = "-1" | |||||
| var errorMsg = "" | var errorMsg = "" | ||||
| var status = "" | var status = "" | ||||
| task := ctx.Cloudbrain | |||||
| for { | for { | ||||
| task, err := models.GetCloudbrainByID(ID) | |||||
| if err != nil { | |||||
| log.Error("get task(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "system error" | |||||
| ctx.CheckWechatBind() | |||||
| if ctx.Written() { | |||||
| return | |||||
| } | |||||
| if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) { | |||||
| log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) | |||||
| errorMsg = "the job is not stopped" | |||||
| break | break | ||||
| } | } | ||||
| if action == models.ActionStop { | |||||
| if task.Status != string(models.ModelArtsRunning) { | |||||
| log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "the job is not running" | |||||
| count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||||
| errorMsg = "system error" | |||||
| break | |||||
| } else { | |||||
| if count >= 1 { | |||||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||||
| errorMsg = "you have already a running or waiting task, can not create more" | |||||
| break | break | ||||
| } | } | ||||
| } | |||||
| if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin() && !ctx.IsUserRepoOwner()) { | |||||
| log.Error("the user has no right ro stop the job", task.JobName, ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "you have no right to stop the job" | |||||
| break | |||||
| } | |||||
| } else if action == models.ActionRestart { | |||||
| ctx.CheckWechatBind() | |||||
| if ctx.Written() { | |||||
| return | |||||
| } | |||||
| if task.Status != string(models.ModelArtsStopped) && task.Status != string(models.ModelArtsStartFailed) && task.Status != string(models.ModelArtsCreateFailed) { | |||||
| log.Error("the job(%s) is not stopped", task.JobName, ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "the job is not stopped" | |||||
| break | |||||
| } | |||||
| createTime := timeutil.TimeStampNow() | |||||
| newTask := &models.Cloudbrain{ | |||||
| Status: string(models.ModelArtsTrainJobWaiting), | |||||
| UserID: task.UserID, | |||||
| RepoID: task.RepoID, | |||||
| JobID: models.TempJobIdPrefix + task.JobName + strconv.Itoa(int(rand.New(rand.NewSource(time.Now().UnixNano())).Int31n(100000))), | |||||
| JobName: task.JobName, | |||||
| DisplayJobName: task.DisplayJobName, | |||||
| JobType: task.JobType, | |||||
| Type: task.Type, | |||||
| Uuid: task.Uuid, | |||||
| Image: task.Image, | |||||
| ComputeResource: task.ComputeResource, | |||||
| Description: task.Description, | |||||
| CreatedUnix: createTime, | |||||
| UpdatedUnix: createTime, | |||||
| } | |||||
| if !ctx.IsSigned || (ctx.User.ID != task.UserID && !ctx.IsUserSiteAdmin()) { | |||||
| log.Error("the user has no right ro restart the job", task.JobName, ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "you have no right to restart the job" | |||||
| err = models.RestartCloudbrain(task, newTask) | |||||
| if err != nil { | |||||
| log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | |||||
| errorMsg = "system error" | |||||
| break | |||||
| } | |||||
| param := models.NotebookAction{ | |||||
| Action: models.ActionStart, | |||||
| } | |||||
| res, err := modelarts.ManageNotebook2(task.JobID, param) | |||||
| if err != nil { | |||||
| log.Error("ManageNotebook2(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["MsgID"]) | |||||
| if strings.HasPrefix(err.Error(), modelarts.UnknownErrorPrefix) { | |||||
| log.Info("(%s)unknown error, set temp status", newTask.DisplayJobName) | |||||
| errTemp := models.InsertCloudbrainTemp(&models.CloudbrainTemp{ | |||||
| CloudbrainID: newTask.ID, | |||||
| Status: models.JobStatusTemp, | |||||
| Type: newTask.Type, | |||||
| JobName: newTask.JobName, | |||||
| JobType: newTask.JobType, | |||||
| }) | |||||
| if errTemp != nil { | |||||
| log.Error("InsertCloudbrainTemp failed: %v", errTemp.Error()) | |||||
| } | |||||
| } else { | |||||
| newTask.Status = string(models.ModelArtsTrainJobFailed) | |||||
| errTemp := models.UpdateJob(newTask) | |||||
| if errTemp != nil { | |||||
| log.Error("UpdateJob failed: %v", errTemp.Error()) | |||||
| } | |||||
| errTemp = models.DeleteJob(newTask) | |||||
| if errTemp != nil { | |||||
| log.Error("DeleteJob failed: %v", errTemp.Error()) | |||||
| } | |||||
| errorMsg = err.Error() | |||||
| break | break | ||||
| } | } | ||||
| count, err := models.GetCloudbrainNotebookCountByUserID(ctx.User.ID) | |||||
| } else { | |||||
| newTask.Status = res.Status | |||||
| newTask.JobID = task.JobID | |||||
| err = models.UpdateJob(newTask) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainNotebookCountByUserID failed:%v", err, ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "system error" | |||||
| log.Error("UpdateJob failed: %v", err.Error()) | |||||
| errorMsg = err.Error() | |||||
| break | break | ||||
| } else { | |||||
| if count >= 1 { | |||||
| log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "you have already a running or waiting task, can not create more" | |||||
| break | |||||
| } | |||||
| } | } | ||||
| } | |||||
| action = models.ActionStart | |||||
| } else { | |||||
| log.Error("the action(%s) is illegal", action, ctx.Data["MsgID"]) | |||||
| status = res.Status | |||||
| resultCode = "0" | |||||
| notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, strconv.FormatInt(newTask.ID, 10), newTask.DisplayJobName, models.ActionCreateDebugNPUTask) | |||||
| break | |||||
| } | |||||
| ctx.JSON(200, map[string]string{ | |||||
| "result_code": resultCode, | |||||
| "error_msg": errorMsg, | |||||
| "status": status, | |||||
| "id": ID, | |||||
| }) | |||||
| } | |||||
| func NotebookStop(ctx *context.Context) { | |||||
| var ID = ctx.Params(":id") | |||||
| var resultCode = "0" | |||||
| var errorMsg = "" | |||||
| var status = "" | |||||
| task := ctx.Cloudbrain | |||||
| for { | |||||
| if task.Status != string(models.ModelArtsRunning) { | |||||
| log.Error("the job(%s) is not running", task.JobName, ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | resultCode = "-1" | ||||
| errorMsg = "非法操作" | |||||
| errorMsg = "the job is not running" | |||||
| break | break | ||||
| } | } | ||||
| param := models.NotebookAction{ | param := models.NotebookAction{ | ||||
| Action: action, | |||||
| Action: models.ActionStop, | |||||
| } | } | ||||
| createTime := timeutil.TimeStampNow() | |||||
| res, err := modelarts.ManageNotebook2(task.JobID, param) | res, err := modelarts.ManageNotebook2(task.JobID, param) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | log.Error("ManageNotebook2(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | ||||
| @@ -482,46 +529,17 @@ func NotebookManage(ctx *context.Context) { | |||||
| } | } | ||||
| status = res.Status | status = res.Status | ||||
| if action == models.ActionStart { | |||||
| newTask := &models.Cloudbrain{ | |||||
| Status: status, | |||||
| UserID: task.UserID, | |||||
| RepoID: task.RepoID, | |||||
| JobID: task.JobID, | |||||
| JobName: task.JobName, | |||||
| DisplayJobName: task.DisplayJobName, | |||||
| JobType: task.JobType, | |||||
| Type: task.Type, | |||||
| Uuid: task.Uuid, | |||||
| Image: task.Image, | |||||
| ComputeResource: task.ComputeResource, | |||||
| Description: task.Description, | |||||
| CreatedUnix: createTime, | |||||
| UpdatedUnix: createTime, | |||||
| } | |||||
| err = models.RestartCloudbrain(task, newTask) | |||||
| if err != nil { | |||||
| log.Error("RestartCloudbrain(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "system error" | |||||
| break | |||||
| } | |||||
| ID = strconv.FormatInt(newTask.ID, 10) | |||||
| notification.NotifyOtherTask(ctx.User, ctx.Repo.Repository, ID, task.DisplayJobName, models.ActionCreateDebugNPUTask) | |||||
| } else { | |||||
| task.Status = res.Status | |||||
| if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { | |||||
| task.EndTime = timeutil.TimeStampNow() | |||||
| } | |||||
| task.ComputeAndSetDuration() | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "system error" | |||||
| break | |||||
| } | |||||
| task.Status = res.Status | |||||
| if task.EndTime == 0 && models.IsModelArtsDebugJobTerminal(task.Status) { | |||||
| task.EndTime = timeutil.TimeStampNow() | |||||
| } | |||||
| task.ComputeAndSetDuration() | |||||
| err = models.UpdateJob(task) | |||||
| if err != nil { | |||||
| log.Error("UpdateJob(%s) failed:%v", task.JobName, err.Error(), ctx.Data["MsgID"]) | |||||
| resultCode = "-1" | |||||
| errorMsg = "system error" | |||||
| break | |||||
| } | } | ||||
| break | break | ||||
| @@ -792,18 +810,11 @@ func TrainJobNewVersion(ctx *context.Context) { | |||||
| func trainJobNewVersionDataPrepare(ctx *context.Context) error { | func trainJobNewVersionDataPrepare(ctx *context.Context) error { | ||||
| ctx.Data["PageIsCloudBrain"] = true | ctx.Data["PageIsCloudBrain"] = true | ||||
| var jobID = ctx.Params(":jobid") | |||||
| var versionName = ctx.Query("version_name") | |||||
| var id = ctx.Params(":id") | |||||
| // canNewJob, err := canUserCreateTrainJobVersion(ctx, jobID, versionName) | |||||
| // if err != nil { | |||||
| // ctx.ServerError("canNewJob can info failed", err) | |||||
| // return err | |||||
| // } | |||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||||
| task, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) | |||||
| log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) | |||||
| return err | return err | ||||
| } | } | ||||
| @@ -883,13 +894,10 @@ func trainJobNewVersionDataPrepare(ctx *context.Context) error { | |||||
| func versionErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) error { | func versionErrorDataPrepare(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) error { | ||||
| ctx.Data["PageIsCloudBrain"] = true | ctx.Data["PageIsCloudBrain"] = true | ||||
| var jobID = ctx.Params(":jobid") | |||||
| // var versionName = ctx.Params(":version-name") | |||||
| var versionName = ctx.Query("version_name") | |||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||||
| var id = ctx.Params(":id") | |||||
| task, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", jobID, err.Error()) | |||||
| log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) | |||||
| return err | return err | ||||
| } | } | ||||
| @@ -994,7 +1002,7 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||||
| branch_name := form.BranchName | branch_name := form.BranchName | ||||
| isLatestVersion := modelarts.IsLatestVersion | isLatestVersion := modelarts.IsLatestVersion | ||||
| FlavorName := form.FlavorName | FlavorName := form.FlavorName | ||||
| VersionCount := modelarts.VersionCount | |||||
| VersionCount := modelarts.VersionCountOne | |||||
| EngineName := form.EngineName | EngineName := form.EngineName | ||||
| count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | ||||
| @@ -1248,7 +1256,7 @@ func getUserCommand(engineId int, req *modelarts.GenerateTrainJobReq) (string, s | |||||
| func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { | func TrainJobCreateVersion(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) { | ||||
| ctx.Data["PageIsTrainJob"] = true | ctx.Data["PageIsTrainJob"] = true | ||||
| var jobID = ctx.Params(":jobid") | |||||
| var jobID = ctx.Cloudbrain.JobID | |||||
| count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | count, err := models.GetCloudbrainTrainJobCountByUserID(ctx.User.ID) | ||||
| if err != nil { | if err != nil { | ||||
| @@ -1620,7 +1628,13 @@ func paramCheckCreateInferenceJob(form auth.CreateModelArtsInferenceJobForm) err | |||||
| func TrainJobShow(ctx *context.Context) { | func TrainJobShow(ctx *context.Context) { | ||||
| ctx.Data["PageIsCloudBrain"] = true | ctx.Data["PageIsCloudBrain"] = true | ||||
| var jobID = ctx.Params(":jobid") | |||||
| var id = ctx.Params(":id") | |||||
| job, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainByID failed:%v", err.Error()) | |||||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | |||||
| } | |||||
| repo := ctx.Repo.Repository | repo := ctx.Repo.Repository | ||||
| page := ctx.QueryInt("page") | page := ctx.QueryInt("page") | ||||
| @@ -1638,11 +1652,11 @@ func TrainJobShow(ctx *context.Context) { | |||||
| RepoID: repo.ID, | RepoID: repo.ID, | ||||
| Type: models.TypeCloudBrainTwo, | Type: models.TypeCloudBrainTwo, | ||||
| JobTypes: jobTypes, | JobTypes: jobTypes, | ||||
| JobID: jobID, | |||||
| JobID: job.JobID, | |||||
| }) | }) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetVersionListTasks(%s) failed:%v", jobID, err.Error()) | |||||
| log.Error("GetVersionListTasks(%s) failed:%v", id, err.Error()) | |||||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | ||||
| return | return | ||||
| } | } | ||||
| @@ -1688,7 +1702,7 @@ func TrainJobShow(ctx *context.Context) { | |||||
| pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5) | pager := context.NewPagination(VersionListCount, setting.UI.IssuePagingNum, page, 5) | ||||
| pager.SetDefaultParams(ctx) | pager.SetDefaultParams(ctx) | ||||
| ctx.Data["Page"] = pager | ctx.Data["Page"] = pager | ||||
| ctx.Data["jobID"] = jobID | |||||
| ctx.Data["jobID"] = job.JobID | |||||
| ctx.Data["displayJobName"] = VersionListTasks[0].DisplayJobName | ctx.Data["displayJobName"] = VersionListTasks[0].DisplayJobName | ||||
| ctx.Data["version_list_task"] = VersionListTasks | ctx.Data["version_list_task"] = VersionListTasks | ||||
| ctx.Data["version_list_count"] = VersionListCount | ctx.Data["version_list_count"] = VersionListCount | ||||
| @@ -1696,62 +1710,8 @@ func TrainJobShow(ctx *context.Context) { | |||||
| ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | ||||
| } | } | ||||
| func TrainJobGetLog(ctx *context.Context) { | |||||
| ctx.Data["PageIsTrainJob"] = true | |||||
| var jobID = ctx.Params(":jobid") | |||||
| var logFileName = ctx.Query("file_name") | |||||
| var baseLine = ctx.Query("base_line") | |||||
| var order = ctx.Query("order") | |||||
| if order != modelarts.OrderDesc && order != modelarts.OrderAsc { | |||||
| log.Error("order(%s) check failed", order) | |||||
| ctx.HTML(http.StatusBadRequest, tplModelArtsTrainJobShow) | |||||
| return | |||||
| } | |||||
| task, err := models.GetCloudbrainByJobID(jobID) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) | |||||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) | |||||
| return | |||||
| } | |||||
| result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), baseLine, logFileName, order, modelarts.Lines) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error()) | |||||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobShow, nil) | |||||
| return | |||||
| } | |||||
| ctx.Data["log"] = result | |||||
| //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | |||||
| } | |||||
| func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) { | |||||
| task, err := models.GetCloudbrainByJobID(jobID) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) | |||||
| return nil, nil, err | |||||
| } | |||||
| resultLogFile, err := modelarts.GetTrainJobLogFileNames(jobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJobLogFileNames(%s) failed:%v", jobID, err.Error()) | |||||
| return nil, nil, err | |||||
| } | |||||
| result, err := modelarts.GetTrainJobLog(jobID, strconv.FormatInt(task.VersionID, 10), "", resultLogFile.LogFileList[0], modelarts.OrderDesc, modelarts.Lines) | |||||
| if err != nil { | |||||
| log.Error("GetTrainJobLog(%s) failed:%v", jobID, err.Error()) | |||||
| return nil, nil, err | |||||
| } | |||||
| return resultLogFile, result, err | |||||
| } | |||||
| func TrainJobDel(ctx *context.Context) { | func TrainJobDel(ctx *context.Context) { | ||||
| var jobID = ctx.Params(":jobid") | |||||
| var jobID = ctx.Cloudbrain.JobID | |||||
| var listType = ctx.Query("listType") | var listType = ctx.Query("listType") | ||||
| repo := ctx.Repo.Repository | repo := ctx.Repo.Repository | ||||
| @@ -1802,11 +1762,10 @@ func TrainJobDel(ctx *context.Context) { | |||||
| } | } | ||||
| func TrainJobStop(ctx *context.Context) { | func TrainJobStop(ctx *context.Context) { | ||||
| var jobID = ctx.Params(":jobid") | |||||
| var listType = ctx.Query("listType") | var listType = ctx.Query("listType") | ||||
| task := ctx.Cloudbrain | task := ctx.Cloudbrain | ||||
| _, err := modelarts.StopTrainJob(jobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| _, err := modelarts.StopTrainJob(task.JobID, strconv.FormatInt(task.VersionID, 10)) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) | log.Error("StopTrainJob(%s) failed:%v", task.JobName, err.Error()) | ||||
| ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) | ctx.RenderWithErr(err.Error(), tplModelArtsTrainJobIndex, nil) | ||||
| @@ -1816,15 +1775,6 @@ func TrainJobStop(ctx *context.Context) { | |||||
| ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType) | ctx.Redirect(setting.AppSubURL + ctx.Repo.RepoLink + "/modelarts/train-job?listType=" + listType) | ||||
| } | } | ||||
| func canUserCreateTrainJob(uid int64) (bool, error) { | |||||
| org, err := models.GetOrgByName(setting.AllowedOrg) | |||||
| if err != nil { | |||||
| log.Error("get allowed org failed: ", setting.AllowedOrg) | |||||
| return false, err | |||||
| } | |||||
| return org.IsOrgMember(uid) | |||||
| } | |||||
| func canUserCreateTrainJobVersion(ctx *context.Context, userID int64) (bool, error) { | func canUserCreateTrainJobVersion(ctx *context.Context, userID int64) (bool, error) { | ||||
| if ctx == nil || ctx.User == nil { | if ctx == nil || ctx.User == nil { | ||||
| log.Error("user unlogin!") | log.Error("user unlogin!") | ||||
| @@ -1916,7 +1866,7 @@ func InferenceJobCreate(ctx *context.Context, form auth.CreateModelArtsInference | |||||
| EngineName := form.EngineName | EngineName := form.EngineName | ||||
| LabelName := form.LabelName | LabelName := form.LabelName | ||||
| isLatestVersion := modelarts.IsLatestVersion | isLatestVersion := modelarts.IsLatestVersion | ||||
| VersionCount := modelarts.VersionCount | |||||
| VersionCount := modelarts.VersionCountOne | |||||
| trainUrl := form.TrainUrl | trainUrl := form.TrainUrl | ||||
| modelName := form.ModelName | modelName := form.ModelName | ||||
| modelVersion := form.ModelVersion | modelVersion := form.ModelVersion | ||||
| @@ -2291,16 +2241,15 @@ func inferenceJobErrorNewDataPrepare(ctx *context.Context, form auth.CreateModel | |||||
| } | } | ||||
| func InferenceJobShow(ctx *context.Context) { | func InferenceJobShow(ctx *context.Context) { | ||||
| ctx.Data["PageIsCloudBrain"] = true | ctx.Data["PageIsCloudBrain"] = true | ||||
| var jobID = ctx.Params(":jobid") | |||||
| page := ctx.QueryInt("page") | page := ctx.QueryInt("page") | ||||
| if page <= 0 { | if page <= 0 { | ||||
| page = 1 | page = 1 | ||||
| } | } | ||||
| task, err := models.GetCloudbrainByJobID(jobID) | |||||
| var id = ctx.Params(":id") | |||||
| task, err := models.GetCloudbrainByID(id) | |||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetInferenceTask(%s) failed:%v", jobID, err.Error()) | |||||
| log.Error("GetCloudbrainByID(%s) failed:%v", id, err.Error()) | |||||
| ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | ctx.NotFound(ctx.Req.URL.RequestURI(), nil) | ||||
| return | return | ||||
| } | } | ||||
| @@ -2334,11 +2283,12 @@ func InferenceJobShow(ctx *context.Context) { | |||||
| LabelName := strings.Fields(task.LabelName) | LabelName := strings.Fields(task.LabelName) | ||||
| ctx.Data["labelName"] = LabelName | ctx.Data["labelName"] = LabelName | ||||
| ctx.Data["jobID"] = jobID | |||||
| ctx.Data["jobID"] = task.JobID | |||||
| ctx.Data["jobName"] = task.JobName | ctx.Data["jobName"] = task.JobName | ||||
| ctx.Data["displayJobName"] = task.DisplayJobName | ctx.Data["displayJobName"] = task.DisplayJobName | ||||
| ctx.Data["task"] = task | ctx.Data["task"] = task | ||||
| ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task) | ctx.Data["canDownload"] = cloudbrain.CanModifyJob(ctx, task) | ||||
| ctx.Data["id"] = id | |||||
| tempUids := []int64{} | tempUids := []int64{} | ||||
| tempUids = append(tempUids, task.UserID) | tempUids = append(tempUids, task.UserID) | ||||
| @@ -2355,15 +2305,10 @@ func ModelDownload(ctx *context.Context) { | |||||
| err error | err error | ||||
| ) | ) | ||||
| jobID := ctx.Params(":jobid") | |||||
| versionName := ctx.Query("version_name") | versionName := ctx.Query("version_name") | ||||
| parentDir := ctx.Query("parent_dir") | parentDir := ctx.Query("parent_dir") | ||||
| fileName := ctx.Query("file_name") | fileName := ctx.Query("file_name") | ||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", task.JobName, err.Error()) | |||||
| return | |||||
| } | |||||
| task := ctx.Cloudbrain | |||||
| var url string | var url string | ||||
| if task.ComputeResource == models.NPUResource { | if task.ComputeResource == models.NPUResource { | ||||
| @@ -2430,19 +2375,8 @@ func DeleteJobStorage(jobName string) error { | |||||
| } | } | ||||
| func DownloadMultiResultFile(ctx *context.Context) { | func DownloadMultiResultFile(ctx *context.Context) { | ||||
| var jobID = ctx.Params(":jobid") | |||||
| var versionName = ctx.Query("version_name") | var versionName = ctx.Query("version_name") | ||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", task.JobName, err.Error()) | |||||
| return | |||||
| } | |||||
| // if !isCanDeleteOrDownload(ctx, task) { | |||||
| // ctx.ServerError("no right.", errors.New(ctx.Tr("repo.model_noright"))) | |||||
| // return | |||||
| // } | |||||
| // path := Model_prefix + models.AttachmentRelativePath(id) + "/" | |||||
| task := ctx.Cloudbrain | |||||
| path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, "result/", versionName), "/") + "/" | path := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, "result/", versionName), "/") + "/" | ||||
| allFile, err := storage.GetAllObjectByBucketAndPrefix(setting.Bucket, path) | allFile, err := storage.GetAllObjectByBucketAndPrefix(setting.Bucket, path) | ||||
| @@ -2513,19 +2447,13 @@ func TrainJobDownloadLogFile(ctx *context.Context) { | |||||
| err error | err error | ||||
| ) | ) | ||||
| var jobID = ctx.Params(":jobid") | |||||
| versionName := ctx.Query("version_name") | versionName := ctx.Query("version_name") | ||||
| task, err := models.GetCloudbrainByJobIDAndVersionName(jobID, versionName) | |||||
| if err != nil { | |||||
| log.Error("GetCloudbrainByJobIDAndVersionName(%s) failed:%v", task.JobName, err.Error(), ctx.Data["msgID"]) | |||||
| ctx.ServerError("GetCloudbrainByJobIDAndVersionName", err) | |||||
| return | |||||
| } | |||||
| task := ctx.Cloudbrain | |||||
| prefix := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, modelarts.LogPath, versionName), "/") + "/job" | prefix := strings.TrimPrefix(path.Join(setting.TrainJobModelPath, task.JobName, modelarts.LogPath, versionName), "/") + "/job" | ||||
| key, err := storage.GetObsLogFileName(prefix) | key, err := storage.GetObsLogFileName(prefix) | ||||
| if err != nil { | if err != nil { | ||||
| log.Error("GetObsLogFileName(%s) failed:%v", jobID, err.Error(), ctx.Data["msgID"]) | |||||
| log.Error("GetObsLogFileName(%s) failed:%v", task.DisplayJobName, err.Error(), ctx.Data["msgID"]) | |||||
| ctx.ServerError("GetObsLogFileName", err) | ctx.ServerError("GetObsLogFileName", err) | ||||
| return | return | ||||
| } | } | ||||
| @@ -1183,7 +1183,8 @@ func RegisterRoutes(m *macaron.Macaron) { | |||||
| m.Group("/:id", func() { | m.Group("/:id", func() { | ||||
| m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) | m.Get("", reqRepoCloudBrainReader, repo.NotebookShow) | ||||
| m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug2) | m.Get("/debug", cloudbrain.AdminOrJobCreaterRight, repo.NotebookDebug2) | ||||
| m.Post("/:action", reqRepoCloudBrainWriter, repo.NotebookManage) | |||||
| m.Post("/restart", cloudbrain.AdminOrJobCreaterRight, repo.NotebookRestart) | |||||
| m.Post("/stop", cloudbrain.AdminOrJobCreaterRight, repo.NotebookStop) | |||||
| m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) | m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.NotebookDel) | ||||
| }) | }) | ||||
| m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.NotebookNew) | m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.NotebookNew) | ||||
| @@ -1192,14 +1193,14 @@ func RegisterRoutes(m *macaron.Macaron) { | |||||
| m.Group("/train-job", func() { | m.Group("/train-job", func() { | ||||
| m.Get("", reqRepoCloudBrainReader, repo.TrainJobIndex) | m.Get("", reqRepoCloudBrainReader, repo.TrainJobIndex) | ||||
| m.Group("/:jobid", func() { | |||||
| m.Group("/:id", func() { | |||||
| m.Get("", reqRepoCloudBrainReader, repo.TrainJobShow) | m.Get("", reqRepoCloudBrainReader, repo.TrainJobShow) | ||||
| m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.TrainJobStop) | |||||
| m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRightForTrain, repo.TrainJobDel) | |||||
| m.Get("/model_download", cloudbrain.AdminOrJobCreaterRightForTrain, repo.ModelDownload) | |||||
| m.Get("/download_log_file", cloudbrain.AdminOrJobCreaterRightForTrain, repo.TrainJobDownloadLogFile) | |||||
| m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, repo.TrainJobNewVersion) | |||||
| m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRightForTrain, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) | |||||
| m.Post("/stop", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.TrainJobStop) | |||||
| m.Post("/del", cloudbrain.AdminOrOwnerOrJobCreaterRight, repo.TrainJobDel) | |||||
| m.Get("/model_download", cloudbrain.AdminOrJobCreaterRight, repo.ModelDownload) | |||||
| m.Get("/download_log_file", cloudbrain.AdminOrJobCreaterRight, repo.TrainJobDownloadLogFile) | |||||
| m.Get("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRight, repo.TrainJobNewVersion) | |||||
| m.Post("/create_version", reqWechatBind, cloudbrain.AdminOrJobCreaterRight, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreateVersion) | |||||
| }) | }) | ||||
| m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.TrainJobNew) | m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.TrainJobNew) | ||||
| m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate) | m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsTrainJobForm{}), repo.TrainJobCreate) | ||||
| @@ -1209,10 +1210,10 @@ func RegisterRoutes(m *macaron.Macaron) { | |||||
| m.Group("/inference-job", func() { | m.Group("/inference-job", func() { | ||||
| m.Get("", reqRepoCloudBrainReader, repo.InferenceJobIndex) | m.Get("", reqRepoCloudBrainReader, repo.InferenceJobIndex) | ||||
| m.Group("/:jobid", func() { | |||||
| m.Group("/:id", func() { | |||||
| m.Get("", reqRepoCloudBrainReader, repo.InferenceJobShow) | m.Get("", reqRepoCloudBrainReader, repo.InferenceJobShow) | ||||
| m.Get("/result_download", cloudbrain.AdminOrJobCreaterRightForTrain, repo.ResultDownload) | |||||
| m.Get("/downloadall", repo.DownloadMultiResultFile) | |||||
| m.Get("/result_download", cloudbrain.AdminOrJobCreaterRight, repo.ResultDownload) | |||||
| m.Get("/downloadall", cloudbrain.AdminOrJobCreaterRight, repo.DownloadMultiResultFile) | |||||
| }) | }) | ||||
| m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.InferenceJobNew) | m.Get("/create", reqWechatBind, reqRepoCloudBrainWriter, repo.InferenceJobNew) | ||||
| m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsInferenceJobForm{}), repo.InferenceJobCreate) | m.Post("/create", reqWechatBind, reqRepoCloudBrainWriter, bindIgnErr(auth.CreateModelArtsInferenceJobForm{}), repo.InferenceJobCreate) | ||||
| @@ -112,7 +112,7 @@ | |||||
| <!-- 任务名 --> | <!-- 任务名 --> | ||||
| <div class="three wide column padding0"> | <div class="three wide column padding0"> | ||||
| <a class="title" href='{{if eq .Cloudbrain.Type 1 }}{{$.Link}}/{{.JobID}}{{else if eq .Cloudbrain.Type 0}}{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 2}}{{$.RepoLink}}/grampus/train-job/{{.JobID}}{{end}}' title="{{.DisplayJobName}}" style="font-size: 14px;"> | |||||
| <a class="title" href='{{if eq .Cloudbrain.Type 1 }}{{$.Link}}/{{.Cloudbrain.ID}}{{else if eq .Cloudbrain.Type 0}}{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 2}}{{$.RepoLink}}/grampus/train-job/{{.JobID}}{{end}}' title="{{.DisplayJobName}}" style="font-size: 14px;"> | |||||
| <span class="fitted" style="width: 90%;vertical-align: middle;">{{.DisplayJobName}}</span> | <span class="fitted" style="width: 90%;vertical-align: middle;">{{.DisplayJobName}}</span> | ||||
| </a> | </a> | ||||
| @@ -123,8 +123,8 @@ | |||||
| </div> | </div> | ||||
| <!-- 任务状态 --> | <!-- 任务状态 --> | ||||
| <div class="two wide column padding0" style="padding-left: 2.2rem !important;"> | <div class="two wide column padding0" style="padding-left: 2.2rem !important;"> | ||||
| <span class="job-status" id="{{.JobID}}" data-repopath="{{$.RepoRelPath}}/modelarts/train-job" data-jobid="{{.JobID}}" data-version="{{.VersionName}}"> | |||||
| <span><i id="{{.JobID}}-icon" style="vertical-align: middle;" class="{{.Status}}"></i><span id="{{.JobID}}-text" style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span> | |||||
| <span class="job-status" id="{{if eq .Cloudbrain.Type 1 }}{{.Cloudbrain.ID}}{{else }}{{.JobID}}{{end}}" data-repopath="{{$.RepoRelPath}}/modelarts/train-job" data-jobid="{{if eq .Cloudbrain.Type 1 }}{{.Cloudbrain.ID}}{{else }}{{.JobID}}{{end}}" data-version="{{.VersionName}}"> | |||||
| <span><i id="{{if eq .Cloudbrain.Type 1 }}{{.Cloudbrain.ID}}{{else }}{{.JobID}}{{end}}-icon" style="vertical-align: middle;" class="{{.Status}}"></i><span id="{{if eq .Cloudbrain.Type 1 }}{{.Cloudbrain.ID}}{{else }}{{.JobID}}{{end}}-text" style="margin-left: 0.4em;font-size: 12px;">{{.Status}}</span></span> | |||||
| </span> | </span> | ||||
| </div> | </div> | ||||
| <!-- 任务创建时间 --> | <!-- 任务创建时间 --> | ||||
| @@ -133,7 +133,7 @@ | |||||
| </div> | </div> | ||||
| <!-- 任务运行时间 --> | <!-- 任务运行时间 --> | ||||
| <div class="two wide column text center padding0"> | <div class="two wide column text center padding0"> | ||||
| <span style="font-size: 12px;" id="duration-{{.JobID}}">{{.TrainJobDuration}}</span> | |||||
| <span style="font-size: 12px;" id="duration-{{if eq .Cloudbrain.Type 1 }}{{.Cloudbrain.ID}}{{else }}{{.JobID}}{{end}}">{{.TrainJobDuration}}</span> | |||||
| </div> | </div> | ||||
| <!-- 计算资源 --> | <!-- 计算资源 --> | ||||
| <div class="two wide column text center padding0"> | <div class="two wide column text center padding0"> | ||||
| @@ -153,7 +153,7 @@ | |||||
| <div class="ui compact buttons"> | <div class="ui compact buttons"> | ||||
| {{$.CsrfTokenHtml}} | {{$.CsrfTokenHtml}} | ||||
| {{if .CanDel}} | {{if .CanDel}} | ||||
| <a style="padding: 0.5rem 1rem;" id="ai-stop-{{.JobID}}" class="ui basic ai_stop_version {{if eq .Status "KILLED" "FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED"}}disabled {{else}} blue {{end}}button" data-repopath='{{$.RepoRelPath}}{{if eq .Cloudbrain.Type 1}}/modelarts/train-job{{else if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}/grampus/train-job{{end}}' data-jobid="{{.JobID}}" data-version="{{.VersionName}}"> | |||||
| <a style="padding: 0.5rem 1rem;" id="ai-stop-{{if eq .Cloudbrain.Type 1 }}{{.Cloudbrain.ID}}{{else }}{{.JobID}}{{end}}" class="ui basic ai_stop_version {{if eq .Status "KILLED" "FAILED" "START_FAILED" "KILLING" "COMPLETED" "SUCCEEDED" "STOPPED"}}disabled {{else}} blue {{end}}button" data-repopath='{{$.RepoRelPath}}{{if eq .Cloudbrain.Type 1}}/modelarts/train-job{{else if eq .Cloudbrain.Type 0}}/cloudbrain/train-job{{else if eq .Cloudbrain.Type 2}}/grampus/train-job{{end}}' data-jobid="{{if eq .Cloudbrain.Type 1 }}{{.Cloudbrain.ID}}{{else }}{{.JobID}}{{end}}" data-version="{{.VersionName}}"> | |||||
| {{$.i18n.Tr "repo.stop"}} | {{$.i18n.Tr "repo.stop"}} | ||||
| </a> | </a> | ||||
| {{else}} | {{else}} | ||||
| @@ -164,11 +164,11 @@ | |||||
| </div> | </div> | ||||
| <!-- 删除任务 --> | <!-- 删除任务 --> | ||||
| <form class="ui compact buttons" id="delForm-{{.JobID}}" action='{{if eq .Cloudbrain.Type 1}}{{$.Link}}/{{.JobID}}{{else if eq .Cloudbrain.Type 0}}{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 2}}{{$.RepoLink}}/grampus/train-job/{{.JobID}}{{end}}/del' method="post"> | |||||
| <form class="ui compact buttons" id="delForm-{{if eq .Cloudbrain.Type 1 }}{{.Cloudbrain.ID}}{{else }}{{.JobID}}{{end}}" action='{{if eq .Cloudbrain.Type 1}}{{$.Link}}/{{.Cloudbrain.ID}}{{else if eq .Cloudbrain.Type 0}}{{$.RepoLink}}/cloudbrain/train-job/{{.JobID}}{{else if eq .Cloudbrain.Type 2}}{{$.RepoLink}}/grampus/train-job/{{.JobID}}{{end}}/del' method="post"> | |||||
| <input type="hidden" name="listType" value="{{$.ListType}}"> | <input type="hidden" name="listType" value="{{$.ListType}}"> | ||||
| {{$.CsrfTokenHtml}} | {{$.CsrfTokenHtml}} | ||||
| {{if .CanDel}} | {{if .CanDel}} | ||||
| <a style="padding: 0.5rem 1rem;margin-left:0.2rem" id="ai-delete-{{.JobID}}" class="ui basic ai_delete blue button" style="border-radius: .28571429rem;"> | |||||
| <a style="padding: 0.5rem 1rem;margin-left:0.2rem" id="ai-delete-{{if eq .Cloudbrain.Type 1 }}{{.Cloudbrain.ID}}{{else }}{{.JobID}}{{end}}" class="ui basic ai_delete blue button" style="border-radius: .28571429rem;"> | |||||
| {{$.i18n.Tr "repo.delete"}} | {{$.i18n.Tr "repo.delete"}} | ||||
| </a> | </a> | ||||
| {{else}} | {{else}} | ||||
| @@ -243,7 +243,7 @@ | |||||
| {{range $k ,$v := .version_list_task}} | {{range $k ,$v := .version_list_task}} | ||||
| <div class="ui accordion border-according" id="accordion{{.VersionName}}" | <div class="ui accordion border-according" id="accordion{{.VersionName}}" | ||||
| data-repopath="{{$.RepoRelPath}}/modelarts/train-job" data-jobid="{{.JobID}}" | |||||
| data-repopath="{{$.RepoRelPath}}/modelarts/train-job" data-id="{{.Cloudbrain.ID}}" | |||||
| data-version="{{.VersionName}}"> | data-version="{{.VersionName}}"> | ||||
| <div class="{{if eq $k 0}}active{{end}} title padding0"> | <div class="{{if eq $k 0}}active{{end}} title padding0"> | ||||
| <div class="according-panel-heading"> | <div class="according-panel-heading"> | ||||
| @@ -263,16 +263,16 @@ | |||||
| {{if .CanModify}} | {{if .CanModify}} | ||||
| <a class="ti-action-menu-item" | <a class="ti-action-menu-item" | ||||
| href="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/create_version?version_name={{.VersionName}}">{{$.i18n.Tr "repo.modelarts.modify"}}</a> | |||||
| href="{{$.RepoLink}}/modelarts/train-job/{{.Cloudbrain.ID}}/create_version?version_name={{.VersionName}}">{{$.i18n.Tr "repo.modelarts.modify"}}</a> | |||||
| {{else}} | {{else}} | ||||
| <a class="ti-action-menu-item disabled" | <a class="ti-action-menu-item disabled" | ||||
| href="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/create_version?version_name={{.VersionName}}">{{$.i18n.Tr "repo.modelarts.modify"}}</a> | |||||
| href="{{$.RepoLink}}/modelarts/train-job/{{.Cloudbrain.ID}}/create_version?version_name={{.VersionName}}">{{$.i18n.Tr "repo.modelarts.modify"}}</a> | |||||
| {{end}} | {{end}} | ||||
| {{if .CanDel}} | {{if .CanDel}} | ||||
| <a class="ti-action-menu-item stop-show-version {{if eq .Status "KILLED" "FAILED" "START_FAILED" "KILLING" "COMPLETED"}}disabled {{end}}" | <a class="ti-action-menu-item stop-show-version {{if eq .Status "KILLED" "FAILED" "START_FAILED" "KILLING" "COMPLETED"}}disabled {{end}}" | ||||
| id="{{.VersionName}}-stop" | id="{{.VersionName}}-stop" | ||||
| data-jobid="{{.JobID}}" | |||||
| data-id="{{.Cloudbrain.ID}}" | |||||
| data-repopath="{{$.RepoRelPath}}/modelarts/train-job" | data-repopath="{{$.RepoRelPath}}/modelarts/train-job" | ||||
| data-version = "{{.VersionName}}" | data-version = "{{.VersionName}}" | ||||
| >{{$.i18n.Tr "repo.stop"}}</a> | >{{$.i18n.Tr "repo.stop"}}</a> | ||||
| @@ -325,7 +325,7 @@ | |||||
| <a class="item log_bottom" data-tab="second{{$k}}" | <a class="item log_bottom" data-tab="second{{$k}}" | ||||
| data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a> | data-version="{{.VersionName}}">{{$.i18n.Tr "repo.modelarts.log"}}</a> | ||||
| <a class="item metric_chart" data-tab="four{{$k}}" data-version="{{.VersionName}}">资源占用情况</a> | <a class="item metric_chart" data-tab="four{{$k}}" data-version="{{.VersionName}}">资源占用情况</a> | ||||
| <a class="item load-model-file" data-tab="third{{$k}}" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/model_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a> | |||||
| <a class="item load-model-file" data-tab="third{{$k}}" data-download-flag="{{$.canDownload}}" data-path="{{$.RepoLink}}/modelarts/train-job/{{.Cloudbrain.ID}}/model_list" data-version="{{.VersionName}}" data-parents="" data-filename="" data-init="init" >{{$.i18n.Tr "repo.model_download"}}</a> | |||||
| </div> | </div> | ||||
| <div class="ui tab active" data-tab="first{{$k}}"> | <div class="ui tab active" data-tab="first{{$k}}"> | ||||
| <div style="padding-top: 10px;"> | <div style="padding-top: 10px;"> | ||||
| @@ -500,7 +500,7 @@ | |||||
| <div> | <div> | ||||
| <a id="{{.VersionName}}-log-down" | <a id="{{.VersionName}}-log-down" | ||||
| class='{{if and (.CanModify) (eq .Status "KILLED" "FAILED" "START_FAILED" "STOPPED" "COMPLETED") }}ti-download-file{{else}}disabled{{end}}' | class='{{if and (.CanModify) (eq .Status "KILLED" "FAILED" "START_FAILED" "STOPPED" "COMPLETED") }}ti-download-file{{else}}disabled{{end}}' | ||||
| href="{{$.RepoLink}}/modelarts/train-job/{{.JobID}}/download_log_file?version_name={{.VersionName}}"> | |||||
| href="{{$.RepoLink}}/modelarts/train-job/{{.Cloudbrain.ID}}/download_log_file?version_name={{.VersionName}}"> | |||||
| <i class="ri-download-cloud-2-line"></i> | <i class="ri-download-cloud-2-line"></i> | ||||
| <span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span> | <span style="margin-left: 0.3rem;">{{$.i18n.Tr "repo.modelarts.download_log"}}</span> | ||||
| </a> | </a> | ||||
| @@ -597,7 +597,7 @@ | |||||
| <div class="two inline fields "> | <div class="two inline fields "> | ||||
| <div class="required ten wide field"> | <div class="required ten wide field"> | ||||
| <label style="margin-left: -23px;">选择训练任务</label> | <label style="margin-left: -23px;">选择训练任务</label> | ||||
| <input type="hidden" class="width83" id="JobId" name="JobId" readonly required> | |||||
| <input type="hidden" class="width83" id="ID" name="ID" readonly required> | |||||
| <input class="width83" id="JobName" readonly required> | <input class="width83" id="JobName" readonly required> | ||||
| </div> | </div> | ||||
| @@ -793,13 +793,15 @@ | |||||
| let userName | let userName | ||||
| let repoPath | let repoPath | ||||
| let jobID | let jobID | ||||
| let id | |||||
| let downlaodFlag = {{ $.canDownload }} | let downlaodFlag = {{ $.canDownload }} | ||||
| $(document).ready(function () { | $(document).ready(function () { | ||||
| let url = window.location.href; | let url = window.location.href; | ||||
| let urlArr = url.split('/') | let urlArr = url.split('/') | ||||
| userName = urlArr.slice(-5)[0] | userName = urlArr.slice(-5)[0] | ||||
| repoPath = urlArr.slice(-4)[0] | repoPath = urlArr.slice(-4)[0] | ||||
| jobID = urlArr.slice(-1)[0] | |||||
| id = urlArr.slice(-1)[0] | |||||
| jobID = id | |||||
| }) | }) | ||||
| function stopBubbling(e) { | function stopBubbling(e) { | ||||
| e = window.event || e; | e = window.event || e; | ||||
| @@ -816,9 +818,9 @@ | |||||
| centered: false, | centered: false, | ||||
| onShow: function () { | onShow: function () { | ||||
| $('input[name="Version"]').addClass('model_disabled') | $('input[name="Version"]').addClass('model_disabled') | ||||
| // $('input[name="JobId"]').text(obj.JobName) | |||||
| // $('input[name="ID"]').text(obj.JobName) | |||||
| $('#JobName').val(obj.DisplayJobName).addClass('model_disabled') | $('#JobName').val(obj.DisplayJobName).addClass('model_disabled') | ||||
| $('input[name="JobId"]').val(obj.JobID) | |||||
| $('input[name="ID"]').val(obj.ID) | |||||
| $('input[name="VersionName"]').val(obj.VersionName).addClass('model_disabled') | $('input[name="VersionName"]').val(obj.VersionName).addClass('model_disabled') | ||||
| if(obj.EngineID ==122 || obj.EngineID ==35 || obj.EngineID ==-1){ | if(obj.EngineID ==122 || obj.EngineID ==35 || obj.EngineID ==-1){ | ||||
| $('input[name="Engine_name"]').val("MindSpore").addClass('model_disabled'); | $('input[name="Engine_name"]').val("MindSpore").addClass('model_disabled'); | ||||
| @@ -883,7 +885,7 @@ | |||||
| flag = false | flag = false | ||||
| }, | }, | ||||
| onApprove: function () { | onApprove: function () { | ||||
| $.post(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/del_version`, { version_name: version_name }, (data) => { | |||||
| $.post(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${id}/del_version`, { version_name: version_name }, (data) => { | |||||
| if (data.VersionListCount === 0) { | if (data.VersionListCount === 0) { | ||||
| location.href = `/${userName}/${repoPath}/modelarts/train-job` | location.href = `/${userName}/${repoPath}/modelarts/train-job` | ||||
| } else { | } else { | ||||
| @@ -906,7 +908,7 @@ | |||||
| } | } | ||||
| function loadLog(version_name) { | function loadLog(version_name) { | ||||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/log?version_name=${version_name}&lines=50&order=asc`, (data) => { | |||||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${id}/log?version_name=${version_name}&lines=50&order=asc`, (data) => { | |||||
| $('input[name=end_line]').val(data.EndLine) | $('input[name=end_line]').val(data.EndLine) | ||||
| $('input[name=start_line]').val(data.StartLine) | $('input[name=start_line]').val(data.StartLine) | ||||
| $(`#log_file${version_name}`).text(data.Content) | $(`#log_file${version_name}`).text(data.Content) | ||||
| @@ -940,7 +942,7 @@ | |||||
| let scrollLeft = container.scrollLeft | let scrollLeft = container.scrollLeft | ||||
| if (((parseInt(scrollTop) + clientHeight == scrollHeight || parseInt(scrollTop) + clientHeight + 1 == scrollHeight || parseInt(scrollTop) + clientHeight - 1 == scrollHeight)) && parseInt(scrollTop) !== 0 && scrollLeft == 0) { | if (((parseInt(scrollTop) + clientHeight == scrollHeight || parseInt(scrollTop) + clientHeight + 1 == scrollHeight || parseInt(scrollTop) + clientHeight - 1 == scrollHeight)) && parseInt(scrollTop) !== 0 && scrollLeft == 0) { | ||||
| let end_line = $(`#log${version_name} input[name=end_line]`).val() | let end_line = $(`#log${version_name} input[name=end_line]`).val() | ||||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/log?version_name=${version_name}&base_line=${end_line}&lines=50&order=desc`, (data) => { | |||||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${id}/log?version_name=${version_name}&base_line=${end_line}&lines=50&order=desc`, (data) => { | |||||
| if (data.Lines == 0) { | if (data.Lines == 0) { | ||||
| $(`.message${version_name} #header`).text('您已翻阅至日志底部') | $(`.message${version_name} #header`).text('您已翻阅至日志底部') | ||||
| $(`.message${version_name}`).css('display', 'block') | $(`.message${version_name}`).css('display', 'block') | ||||
| @@ -963,7 +965,7 @@ | |||||
| } | } | ||||
| if ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10].includes(scrollTop) && scrollLeft == 0) { | if ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10].includes(scrollTop) && scrollLeft == 0) { | ||||
| let start_line = $(`#log${version_name} input[name=start_line]`).val() | let start_line = $(`#log${version_name} input[name=start_line]`).val() | ||||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/log?version_name=${version_name}&base_line=${start_line}&lines=50&order=asc`, (data) => { | |||||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${id}/log?version_name=${version_name}&base_line=${start_line}&lines=50&order=asc`, (data) => { | |||||
| if (data.Lines == 0) { | if (data.Lines == 0) { | ||||
| $(`.message${version_name} #header`).text('您已翻阅至日志顶部') | $(`.message${version_name} #header`).text('您已翻阅至日志顶部') | ||||
| $(`.message${version_name}`).css('display', 'block') | $(`.message${version_name}`).css('display', 'block') | ||||
| @@ -1007,7 +1009,7 @@ | |||||
| let logContentDom = document.querySelector(`#log${version_name}`) | let logContentDom = document.querySelector(`#log${version_name}`) | ||||
| $(`#log_file${version_name}`).siblings('pre').remove() | $(`#log_file${version_name}`).siblings('pre').remove() | ||||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/log?version_name=${version_name}&base_line=&lines=50&order=asc`, (data) => { | |||||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${id}/log?version_name=${version_name}&base_line=&lines=50&order=asc`, (data) => { | |||||
| $(`#log${version_name} input[name=end_line]`).val(data.EndLine) //如果变动就改变所对应的值 | $(`#log${version_name} input[name=end_line]`).val(data.EndLine) //如果变动就改变所对应的值 | ||||
| $(`#log${version_name} input[name=start_line]`).val(data.StartLine) | $(`#log${version_name} input[name=start_line]`).val(data.StartLine) | ||||
| @@ -1025,12 +1027,12 @@ | |||||
| let version_name = $(this).data('version') | let version_name = $(this).data('version') | ||||
| let logContentDom = document.querySelector(`#log${version_name}`) | let logContentDom = document.querySelector(`#log${version_name}`) | ||||
| $(`#log_file${version_name}`).siblings('pre').remove() | $(`#log_file${version_name}`).siblings('pre').remove() | ||||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/log?version_name=${version_name}&base_line=&lines=50&order=desc`, (data) => { | |||||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${id}/log?version_name=${version_name}&base_line=&lines=50&order=desc`, (data) => { | |||||
| $(`#log${version_name} input[name=end_line]`).val(data.EndLine) //如果变动就改变所对应的值 | $(`#log${version_name} input[name=end_line]`).val(data.EndLine) //如果变动就改变所对应的值 | ||||
| $(`#log${version_name} input[name=start_line]`).val(data.StartLine) | $(`#log${version_name} input[name=start_line]`).val(data.StartLine) | ||||
| $(`#log${version_name}`).append('<pre>' + data.Content) | $(`#log${version_name}`).append('<pre>' + data.Content) | ||||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${jobID}/log?version_name=${version_name}&base_line=${data.EndLine}&lines=50&order=desc`, (data) => { | |||||
| $.get(`/api/v1/repos/${userName}/${repoPath}/modelarts/train-job/${id}/log?version_name=${version_name}&base_line=${data.EndLine}&lines=50&order=desc`, (data) => { | |||||
| if (data.Lines == 0) { | if (data.Lines == 0) { | ||||
| $(`.message${version_name} #header`).text('您已翻阅至日志底部') | $(`.message${version_name} #header`).text('您已翻阅至日志底部') | ||||
| $(`.message${version_name}`).css('display', 'block') | $(`.message${version_name}`).css('display', 'block') | ||||