| @@ -482,20 +482,20 @@ type Config struct { | |||
| BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 | |||
| Parameter []Parameter `json:"parameter"` | |||
| DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL | |||
| DatasetID string `json:"dataset_id"` | |||
| DataVersionID string `json:"dataset_version_id"` | |||
| DataSource []DataSource `json:"data_source"` | |||
| SpecID int64 `json:"spec_id"` | |||
| //DatasetID string `json:"dataset_id"` | |||
| //DataVersionID string `json:"dataset_version_id"` | |||
| //DataSource []DataSource `json:"data_source"` | |||
| //SpecID int64 `json:"spec_id"` | |||
| EngineID int64 `json:"engine_id"` | |||
| ModelID int64 `json:"model_id"` | |||
| //ModelID int64 `json:"model_id"` | |||
| TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL | |||
| LogUrl string `json:"log_url"` | |||
| UserImageUrl string `json:"user_image_url"` | |||
| UserCommand string `json:"user_command"` | |||
| //UserImageUrl string `json:"user_image_url"` | |||
| //UserCommand string `json:"user_command"` | |||
| CreateVersion bool `json:"create_version"` | |||
| Volumes []Volumes `json:"volumes"` | |||
| Flavor Flavor `json:"flavor"` | |||
| PoolID string `json:"pool_id"` | |||
| PoolID string `json:"pool_id"` | |||
| } | |||
| type Parameter struct { | |||
| @@ -21,9 +21,8 @@ type CreateModelArtsTrainJobForm struct { | |||
| BootFile string `form:"boot_file" binding:"Required"` | |||
| WorkServerNumber int `form:"work_server_number" binding:"Required"` | |||
| EngineID int `form:"engine_id" binding:"Required"` | |||
| SpecID int `form:"spec_id" binding:"Required"` | |||
| Flavor string `form:"flavor" binding:"Required"` | |||
| PoolID string `form:"pool_id" binding:"Required"` | |||
| Flavor string `form:"flavor" binding:"Required"` | |||
| Description string `form:"description"` | |||
| } | |||
| @@ -37,6 +37,7 @@ const ( | |||
| "]}" | |||
| CodePath = "/code/" | |||
| OutputPath = "/output/" | |||
| LogPath = "/log/" | |||
| JobPath = "/job/" | |||
| ) | |||
| @@ -49,8 +50,8 @@ type GenerateTrainJobReq struct { | |||
| DataUrl string | |||
| TrainUrl string | |||
| FlavorCode string | |||
| LogUrl string | |||
| PoolID string | |||
| SpecID int64 | |||
| WorkServerNumber int | |||
| EngineID int64 | |||
| } | |||
| @@ -137,8 +138,8 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { | |||
| DataUrl: req.DataUrl, | |||
| EngineID: req.EngineID, | |||
| TrainUrl: req.TrainUrl, | |||
| LogUrl: req.LogUrl, | |||
| PoolID: req.PoolID, | |||
| SpecID: req.SpecID, | |||
| Flavor: models.Flavor{ | |||
| Code: req.FlavorCode, | |||
| }, | |||
| @@ -86,6 +86,7 @@ func getToken() error { | |||
| } | |||
| TOKEN = res.Header().Get("X-Subject-Token") | |||
| log.Info(TOKEN) | |||
| return nil | |||
| } | |||
| @@ -311,7 +312,9 @@ sendjob: | |||
| return nil, fmt.Errorf("resty create train-job: %s", err) | |||
| } | |||
| log.Info("", res.StatusCode(), res.Request.Body) | |||
| //log.Info("%d", res.StatusCode()) | |||
| //req, _ := json.Marshal(createJobParams) | |||
| //log.Info("%s", req) | |||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
| retry++ | |||
| @@ -320,7 +323,7 @@ sendjob: | |||
| } | |||
| if res.StatusCode() != http.StatusOK { | |||
| log.Error("createTrainJob failed(%d)", res.StatusCode()) | |||
| log.Error("createTrainJob failed", res.StatusCode(), res.RawResponse.Body, result.ErrorCode, result.ErrorMsg) | |||
| return &result, fmt.Errorf("createTrainJob failed(%d)", res.StatusCode()) | |||
| } | |||
| @@ -347,7 +350,7 @@ sendjob: | |||
| Get(HOST + "/v1/" + setting.ProjectID + urlResourceSpecs) | |||
| if err != nil { | |||
| return nil, fmt.Errorf("resty GetJob: %v", err) | |||
| return nil, fmt.Errorf("resty GetResourceSpecs: %v", err) | |||
| } | |||
| if res.StatusCode() == http.StatusUnauthorized && retry < 1 { | |||
| @@ -356,8 +359,6 @@ sendjob: | |||
| goto sendjob | |||
| } | |||
| log.Info("", res.StatusCode(), res.RawResponse.Body) | |||
| if res.StatusCode() != http.StatusOK { | |||
| log.Error("GetResourceSpecs failed(%d)", res.StatusCode()) | |||
| return &result, fmt.Errorf("GetResourceSpecs failed(%d)", res.StatusCode()) | |||
| @@ -326,15 +326,6 @@ func TrainJobNew(ctx *context.Context) { | |||
| } | |||
| ctx.Data["flavor_infos"] = flavorInfos.Info | |||
| res, err := modelarts.GetResourceSpecs() | |||
| if err != nil { | |||
| log.Error("GetResourceSpecs failed: %v", err) | |||
| ctx.ServerError("GetResourceSpecs failed:", err) | |||
| return | |||
| } | |||
| log.Info("", res.SpecTotalCount) | |||
| ctx.HTML(200, tplModelArtsTrainJobNew) | |||
| } | |||
| @@ -348,11 +339,11 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| bootFile := form.BootFile | |||
| flavorCode := form.Flavor | |||
| poolID := form.PoolID | |||
| specID := form.SpecID | |||
| repo := ctx.Repo.Repository | |||
| codeLocalPath := setting.JobPath + jobName + modelarts.CodePath | |||
| codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath | |||
| outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath | |||
| logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath | |||
| dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" | |||
| if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil { | |||
| @@ -363,8 +354,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| //todo: upload code (send to file_server todo this work?) | |||
| if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { | |||
| log.Error("Failed to obsMkdir: %s (%v)", repo.FullName(), err) | |||
| ctx.RenderWithErr("Failed to obsMkdir", tplModelArtsTrainJobNew, &form) | |||
| log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) | |||
| ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form) | |||
| return | |||
| } | |||
| if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { | |||
| log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) | |||
| ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form) | |||
| return | |||
| } | |||
| @@ -382,10 +379,10 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) | |||
| BootFile: codeObsPath + bootFile, | |||
| TrainUrl: outputObsPath, | |||
| FlavorCode: flavorCode, | |||
| PoolID: poolID, | |||
| WorkServerNumber: workServerNumber, | |||
| EngineID: int64(engineID), | |||
| SpecID: int64(specID), | |||
| LogUrl: logObsPath, | |||
| PoolID: poolID, | |||
| } | |||
| err := modelarts.GenerateTrainJob(ctx, req) | |||