| @@ -21,14 +21,14 @@ const ( | |||
| poolName = "train-private-1" | |||
| poolType = "USER_DEFINED" | |||
| DataSetMountPath = "/home/ma-user/work" | |||
| NotebookEnv = "Python3" | |||
| NotebookType = "Ascend" | |||
| FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)" | |||
| DataSetMountPath = "/home/ma-user/work" | |||
| NotebookEnv = "Python3" | |||
| NotebookType = "Ascend" | |||
| FlavorInfo = "Ascend: 1*Ascend 910 CPU: 24 核 96GiB (modelarts.kat1.xlarge)" | |||
| //train-job | |||
| ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}" | |||
| Engines = "{\"engine\":[{\"id\":1, \"value\":\"Ascend-Powered-Engine\"}]}" | |||
| ResourcePools = "{\"resource_pool\":[{\"id\":\"pool1328035d\", \"value\":\"专属资源池\"}]}" | |||
| Engines = "{\"engine\":[{\"id\":1, \"value\":\"Ascend-Powered-Engine\"}]}" | |||
| EngineVersions = "{\"version\":[{\"id\":118,\"value\":\"MindSpore-1.0.0-c75-python3.7-euleros2.8-aarch64\"}," + | |||
| "{\"id\":119,\"value\":\"MindSpore-1.1.1-c76-python3.7-euleros2.8-aarch64\"}," + | |||
| "{\"id\":120,\"value\":\"MindSpore-1.1.1-c76-tr5-python3.7-euleros2.8-aarch64\"}," + | |||
| @@ -39,35 +39,35 @@ const ( | |||
| "{\"code\":\"modelarts.bm.910.arm.public.4\",\"value\":\"Ascend : 4 * Ascend 910 CPU:96 核 1024GiB\"}," + | |||
| "{\"code\":\"modelarts.bm.910.arm.public.1\",\"value\":\"Ascend : 1 * Ascend 910 CPU:24 核 256GiB\"}" + | |||
| "]}" | |||
| CodePath = "/code/" | |||
| CodePath = "/code/" | |||
| OutputPath = "/output/" | |||
| LogPath = "/log/" | |||
| JobPath = "/job/" | |||
| OrderDesc = "desc" //向下查询 | |||
| OrderAsc = "asc" //向上查询 | |||
| Lines = 20 | |||
| TrainUrl = "train_url" | |||
| DataUrl = "data_url" | |||
| PerPage = 10 | |||
| LogPath = "/log/" | |||
| JobPath = "/job/" | |||
| OrderDesc = "desc" //向下查询 | |||
| OrderAsc = "asc" //向上查询 | |||
| Lines = 20 | |||
| TrainUrl = "train_url" | |||
| DataUrl = "data_url" | |||
| PerPage = 10 | |||
| SortByCreateTime = "create_time" | |||
| ConfigTypeCustom = "custom" | |||
| ) | |||
| type GenerateTrainJobReq struct { | |||
| JobName string | |||
| Uuid string | |||
| Description string | |||
| CodeObsPath string | |||
| BootFile string | |||
| DataUrl string | |||
| TrainUrl string | |||
| FlavorCode string | |||
| LogUrl string | |||
| PoolID string | |||
| WorkServerNumber int | |||
| EngineID int64 | |||
| Parameters []models.Parameter | |||
| JobName string | |||
| Uuid string | |||
| Description string | |||
| CodeObsPath string | |||
| BootFile string | |||
| DataUrl string | |||
| TrainUrl string | |||
| FlavorCode string | |||
| LogUrl string | |||
| PoolID string | |||
| WorkServerNumber int | |||
| EngineID int64 | |||
| Parameters []models.Parameter | |||
| } | |||
| type VersionInfo struct { | |||
| @@ -79,8 +79,8 @@ type VersionInfo struct { | |||
| type Flavor struct { | |||
| Info []struct { | |||
| Code string `json:"code"` | |||
| Value string `json:"value"` | |||
| Code string `json:"code"` | |||
| Value string `json:"value"` | |||
| } `json:"flavor"` | |||
| } | |||
| @@ -93,8 +93,8 @@ type Engine struct { | |||
| type ResourcePool struct { | |||
| Info []struct { | |||
| ID string `json:"id"` | |||
| Value string `json:"value"` | |||
| ID string `json:"id"` | |||
| Value string `json:"value"` | |||
| } `json:"resource_pool"` | |||
| } | |||
| @@ -137,7 +137,7 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description string) error | |||
| JobName: jobName, | |||
| JobType: string(models.JobTypeDebug), | |||
| Type: models.TypeCloudBrainNotebook, | |||
| Uuid: uuid, | |||
| Uuid: uuid, | |||
| }) | |||
| if err != nil { | |||
| @@ -149,24 +149,23 @@ func GenerateTask(ctx *context.Context, jobName, uuid, description string) error | |||
| func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { | |||
| jobResult, err := createTrainJob(models.CreateTrainJobParams{ | |||
| JobName: req.JobName, | |||
| Description: req.Description, | |||
| JobName: req.JobName, | |||
| Description: req.Description, | |||
| Config: models.Config{ | |||
| WorkServerNum: req.WorkServerNumber, | |||
| AppUrl: req.CodeObsPath, | |||
| BootFileUrl: req.BootFile, | |||
| DataUrl: req.DataUrl, | |||
| EngineID: req.EngineID, | |||
| TrainUrl: req.TrainUrl, | |||
| LogUrl: req.LogUrl, | |||
| PoolID: req.PoolID, | |||
| CreateVersion: true, | |||
| Flavor: models.Flavor{ | |||
| Code: req.FlavorCode, | |||
| WorkServerNum: req.WorkServerNumber, | |||
| AppUrl: req.CodeObsPath, | |||
| BootFileUrl: req.BootFile, | |||
| DataUrl: req.DataUrl, | |||
| EngineID: req.EngineID, | |||
| TrainUrl: req.TrainUrl, | |||
| LogUrl: req.LogUrl, | |||
| PoolID: req.PoolID, | |||
| CreateVersion: true, | |||
| Flavor: models.Flavor{ | |||
| Code: req.FlavorCode, | |||
| }, | |||
| Parameter: req.Parameters, | |||
| Parameter: req.Parameters, | |||
| }, | |||
| }) | |||
| if err != nil { | |||
| log.Error("CreateJob failed: %v", err.Error()) | |||
| @@ -180,10 +179,10 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { | |||
| JobID: strconv.FormatInt(jobResult.JobID, 10), | |||
| JobName: req.JobName, | |||
| JobType: string(models.JobTypeDebug), | |||
| Type: models.TypeCloudBrainTrainJob, | |||
| VersionID: jobResult.VersionID, | |||
| Type: models.TypeCloudBrainTrainJob, | |||
| VersionID: jobResult.VersionID, | |||
| VersionName: jobResult.VersionName, | |||
| Uuid: req.Uuid, | |||
| Uuid: req.Uuid, | |||
| }) | |||
| if err != nil { | |||
| @@ -194,7 +193,7 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { | |||
| return nil | |||
| } | |||
| func TransTrainJobStatus(status int) string{ | |||
| func TransTrainJobStatus(status int) string { | |||
| switch status { | |||
| case 0: | |||
| return "UNKNOWN" | |||
| @@ -23,9 +23,9 @@ const ( | |||
| urlGetToken = "/v3/auth/tokens" | |||
| urlNotebook = "/demanager/instances" | |||
| urlTrainJob = "/training-jobs" | |||
| urlResourceSpecs = "/job/resource-specs" | |||
| urlTrainJobConfig = "/training-job-configs" | |||
| urlTrainJob = "/training-jobs" | |||
| urlResourceSpecs = "/job/resource-specs" | |||
| urlTrainJobConfig = "/training-job-configs" | |||
| errorCodeExceedLimit = "ModelArts.0118" | |||
| ) | |||
| @@ -435,12 +435,12 @@ func GetConfigList(perPage, page int, sortBy, order, searchContent, configType s | |||
| sendjob: | |||
| res, err := client.R(). | |||
| SetQueryParams(map[string]string{ | |||
| "per_page": strconv.Itoa(perPage), | |||
| "page": strconv.Itoa(page), | |||
| "sortBy": sortBy, | |||
| "order": order, | |||
| "search_content": searchContent, | |||
| "config_type": configType, | |||
| "per_page": strconv.Itoa(perPage), | |||
| "page": strconv.Itoa(page), | |||
| "sortBy": sortBy, | |||
| "order": order, | |||
| "search_content": searchContent, | |||
| "config_type": configType, | |||
| }). | |||
| SetAuthToken(TOKEN). | |||
| SetResult(&result). | |||
| @@ -484,7 +484,7 @@ func GetParaConfig(configName, configType string) (models.GetConfigResult, error | |||
| sendjob: | |||
| res, err := client.R(). | |||
| SetQueryParams(map[string]string{ | |||
| "config_type": configType, | |||
| "config_type": configType, | |||
| }). | |||
| SetAuthToken(TOKEN). | |||
| SetResult(&result). | |||
| @@ -569,10 +569,10 @@ func GetTrainJobLog(jobID, versionID, baseLine, logFile, order string, lines int | |||
| sendjob: | |||
| res, err := client.R(). | |||
| SetQueryParams(map[string]string{ | |||
| "base_line": baseLine, | |||
| "lines": strconv.Itoa(lines), | |||
| "log_file": logFile, | |||
| "order": order, | |||
| "base_line": baseLine, | |||
| "lines": strconv.Itoa(lines), | |||
| "log_file": logFile, | |||
| "order": order, | |||
| }). | |||
| SetAuthToken(TOKEN). | |||
| SetResult(&result). | |||
| @@ -278,7 +278,7 @@ func TrainJobIndex(ctx *context.Context) { | |||
| PageSize: setting.UI.IssuePagingNum, | |||
| }, | |||
| RepoID: repo.ID, | |||
| Type: models.TypeCloudBrainTrainJob, | |||
| Type: models.TypeCloudBrainTrainJob, | |||
| }) | |||
| if err != nil { | |||
| ctx.ServerError("Cloudbrain", err) | |||
| @@ -430,11 +430,11 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| var parameters models.Parameters | |||
| param := make([]models.Parameter, 0) | |||
| param = append(param, models.Parameter{ | |||
| Label: modelarts.TrainUrl, | |||
| Value: outputObsPath, | |||
| Label: modelarts.TrainUrl, | |||
| Value: outputObsPath, | |||
| }, models.Parameter{ | |||
| Label: modelarts.DataUrl, | |||
| Value: dataPath, | |||
| Label: modelarts.DataUrl, | |||
| Value: dataPath, | |||
| }) | |||
| if len(params) != 0 { | |||
| err := json.Unmarshal([]byte(params), ¶meters) | |||
| @@ -447,8 +447,8 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| for _, parameter := range parameters.Parameter { | |||
| if parameter.Label != modelarts.TrainUrl && parameter.Label != modelarts.DataUrl { | |||
| param = append(param, models.Parameter{ | |||
| Label: parameter.Label, | |||
| Value: parameter.Value, | |||
| Label: parameter.Label, | |||
| Value: parameter.Value, | |||
| }) | |||
| } | |||
| } | |||
| @@ -463,43 +463,43 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| } | |||
| _, err := modelarts.CreateTrainJobConfig(models.CreateConfigParams{ | |||
| ConfigName: form.ParameterTemplateName, | |||
| Description: form.PrameterDescription, | |||
| DataUrl: dataPath, | |||
| AppUrl: codeObsPath, | |||
| BootFileUrl: codeObsPath + bootFile, | |||
| TrainUrl: outputObsPath, | |||
| Flavor: models.Flavor{ | |||
| Code: flavorCode, | |||
| ConfigName: form.ParameterTemplateName, | |||
| Description: form.PrameterDescription, | |||
| DataUrl: dataPath, | |||
| AppUrl: codeObsPath, | |||
| BootFileUrl: codeObsPath + bootFile, | |||
| TrainUrl: outputObsPath, | |||
| Flavor: models.Flavor{ | |||
| Code: flavorCode, | |||
| }, | |||
| WorkServerNum: workServerNumber, | |||
| EngineID: int64(engineID), | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| Parameter: param, | |||
| WorkServerNum: workServerNumber, | |||
| EngineID: int64(engineID), | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| Parameter: param, | |||
| }) | |||
| if err != nil { | |||
| log.Error("Failed to CreateTrainJobConfig: %v", err) | |||
| ctx.RenderWithErr("保存作业参数失败:" + err.Error(), tplModelArtsTrainJobNew, &form) | |||
| ctx.RenderWithErr("保存作业参数失败:"+err.Error(), tplModelArtsTrainJobNew, &form) | |||
| return | |||
| } | |||
| } | |||
| req := &modelarts.GenerateTrainJobReq{ | |||
| JobName: jobName, | |||
| DataUrl: dataPath, | |||
| Description: description, | |||
| CodeObsPath: codeObsPath, | |||
| BootFile: codeObsPath + bootFile, | |||
| TrainUrl: outputObsPath, | |||
| FlavorCode: flavorCode, | |||
| WorkServerNumber: workServerNumber, | |||
| EngineID: int64(engineID), | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| Uuid: uuid, | |||
| Parameters: param, | |||
| JobName: jobName, | |||
| DataUrl: dataPath, | |||
| Description: description, | |||
| CodeObsPath: codeObsPath, | |||
| BootFile: codeObsPath + bootFile, | |||
| TrainUrl: outputObsPath, | |||
| FlavorCode: flavorCode, | |||
| WorkServerNumber: workServerNumber, | |||
| EngineID: int64(engineID), | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| Uuid: uuid, | |||
| Parameters: param, | |||
| } | |||
| err = modelarts.GenerateTrainJob(ctx, req) | |||
| @@ -552,7 +552,7 @@ func uploadCodeToObs(codePath, jobName, parentDir string) error { | |||
| return err | |||
| } | |||
| if err = uploadCodeToObs(codePath + file.Name() + "/", jobName, parentDir + file.Name() + "/"); err != nil { | |||
| if err = uploadCodeToObs(codePath+file.Name()+"/", jobName, parentDir+file.Name()+"/"); err != nil { | |||
| log.Error("uploadCodeToObs(%s) failed: %s", file.Name(), err.Error()) | |||
| return err | |||
| } | |||
| @@ -591,7 +591,7 @@ func paramCheckCreateTrainJob(form auth.CreateModelArtsTrainJobForm) error { | |||
| return errors.New("启动文件必须是python文件") | |||
| } | |||
| if form.WorkServerNumber > 25 || form.WorkServerNumber < 1{ | |||
| if form.WorkServerNumber > 25 || form.WorkServerNumber < 1 { | |||
| log.Error("the WorkServerNumber(%d) must be in (1,25)", form.WorkServerNumber) | |||
| return errors.New("计算节点数必须在1-25之间") | |||
| } | |||
| @@ -677,7 +677,7 @@ func TrainJobGetLog(ctx *context.Context) { | |||
| //ctx.HTML(http.StatusOK, tplModelArtsTrainJobShow) | |||
| } | |||
| func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error){ | |||
| func trainJobGetLog(jobID string) (*models.GetTrainJobLogFileNamesResult, *models.GetTrainJobLogResult, error) { | |||
| task, err := models.GetCloudbrainByJobID(jobID) | |||
| if err != nil { | |||
| log.Error("GetCloudbrainByJobID(%s) failed:%v", jobID, err.Error()) | |||