diff --git a/models/cloudbrain.go b/models/cloudbrain.go index 89b7abd4f..6b184f098 100755 --- a/models/cloudbrain.go +++ b/models/cloudbrain.go @@ -482,20 +482,20 @@ type Config struct { BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 Parameter []Parameter `json:"parameter"` DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL - DatasetID string `json:"dataset_id"` - DataVersionID string `json:"dataset_version_id"` - DataSource []DataSource `json:"data_source"` - SpecID int64 `json:"spec_id"` + //DatasetID string `json:"dataset_id"` + //DataVersionID string `json:"dataset_version_id"` + //DataSource []DataSource `json:"data_source"` + //SpecID int64 `json:"spec_id"` EngineID int64 `json:"engine_id"` - ModelID int64 `json:"model_id"` + //ModelID int64 `json:"model_id"` TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL LogUrl string `json:"log_url"` - UserImageUrl string `json:"user_image_url"` - UserCommand string `json:"user_command"` + //UserImageUrl string `json:"user_image_url"` + //UserCommand string `json:"user_command"` CreateVersion bool `json:"create_version"` Volumes []Volumes `json:"volumes"` Flavor Flavor `json:"flavor"` - PoolID string `json:"pool_id"` + PoolID string `json:"pool_id"` } type Parameter struct { diff --git a/modules/auth/modelarts.go b/modules/auth/modelarts.go index 951065059..306b6b963 100755 --- a/modules/auth/modelarts.go +++ b/modules/auth/modelarts.go @@ -21,9 +21,8 @@ type CreateModelArtsTrainJobForm struct { BootFile string `form:"boot_file" binding:"Required"` WorkServerNumber int `form:"work_server_number" binding:"Required"` EngineID int `form:"engine_id" binding:"Required"` - SpecID int `form:"spec_id" binding:"Required"` - Flavor string `form:"flavor" binding:"Required"` PoolID string `form:"pool_id" binding:"Required"` + Flavor string `form:"flavor" binding:"Required"` Description string `form:"description"` } diff --git a/modules/modelarts/modelarts.go b/modules/modelarts/modelarts.go index 8793f9b77..8f4ad945e 100755 --- a/modules/modelarts/modelarts.go +++ b/modules/modelarts/modelarts.go @@ -37,6 +37,7 @@ const ( "]}" CodePath = "/code/" OutputPath = "/output/" + LogPath = "/log/" JobPath = "/job/" ) @@ -49,8 +50,8 @@ type GenerateTrainJobReq struct { DataUrl string TrainUrl string FlavorCode string + LogUrl string PoolID string - SpecID int64 WorkServerNumber int EngineID int64 } @@ -137,8 +138,8 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error { DataUrl: req.DataUrl, EngineID: req.EngineID, TrainUrl: req.TrainUrl, + LogUrl: req.LogUrl, PoolID: req.PoolID, - SpecID: req.SpecID, Flavor: models.Flavor{ Code: req.FlavorCode, }, diff --git a/modules/modelarts/resty.go b/modules/modelarts/resty.go index bb514d0ba..2587eaa5d 100755 --- a/modules/modelarts/resty.go +++ b/modules/modelarts/resty.go @@ -86,6 +86,7 @@ func getToken() error { } TOKEN = res.Header().Get("X-Subject-Token") + log.Info(TOKEN) return nil } @@ -311,7 +312,9 @@ sendjob: return nil, fmt.Errorf("resty create train-job: %s", err) } - log.Info("", res.StatusCode(), res.Request.Body) + //log.Info("%d", res.StatusCode()) + //req, _ := json.Marshal(createJobParams) + //log.Info("%s", req) if res.StatusCode() == http.StatusUnauthorized && retry < 1 { retry++ @@ -320,7 +323,7 @@ sendjob: } if res.StatusCode() != http.StatusOK { - log.Error("createTrainJob failed(%d)", res.StatusCode()) + log.Error("createTrainJob failed", res.StatusCode(), res.RawResponse.Body, result.ErrorCode, result.ErrorMsg) return &result, fmt.Errorf("createTrainJob failed(%d)", res.StatusCode()) } @@ -347,7 +350,7 @@ sendjob: Get(HOST + "/v1/" + setting.ProjectID + urlResourceSpecs) if err != nil { - return nil, fmt.Errorf("resty GetJob: %v", err) + return nil, fmt.Errorf("resty GetResourceSpecs: %v", err) } if res.StatusCode() == http.StatusUnauthorized && retry < 1 { @@ -356,8 +359,6 @@ sendjob: goto sendjob } - log.Info("", res.StatusCode(), res.RawResponse.Body) - if res.StatusCode() != http.StatusOK { log.Error("GetResourceSpecs failed(%d)", res.StatusCode()) return &result, fmt.Errorf("GetResourceSpecs failed(%d)", res.StatusCode()) diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index e6bc5b49d..67925d163 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -326,15 +326,6 @@ func TrainJobNew(ctx *context.Context) { } ctx.Data["flavor_infos"] = flavorInfos.Info - res, err := modelarts.GetResourceSpecs() - if err != nil { - log.Error("GetResourceSpecs failed: %v", err) - ctx.ServerError("GetResourceSpecs failed:", err) - return - } - - log.Info("", res.SpecTotalCount) - ctx.HTML(200, tplModelArtsTrainJobNew) } @@ -348,11 +339,11 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) bootFile := form.BootFile flavorCode := form.Flavor poolID := form.PoolID - specID := form.SpecID repo := ctx.Repo.Repository codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath + logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil { @@ -363,8 +354,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) //todo: upload code (send to file_server todo this work?) if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { - log.Error("Failed to obsMkdir: %s (%v)", repo.FullName(), err) - ctx.RenderWithErr("Failed to obsMkdir", tplModelArtsTrainJobNew, &form) + log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) + ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form) + return + } + + if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil { + log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) + ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form) return } @@ -382,10 +379,10 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm) BootFile: codeObsPath + bootFile, TrainUrl: outputObsPath, FlavorCode: flavorCode, - PoolID: poolID, WorkServerNumber: workServerNumber, EngineID: int64(engineID), - SpecID: int64(specID), + LogUrl: logObsPath, + PoolID: poolID, } err := modelarts.GenerateTrainJob(ctx, req)