Browse Source

create job

tags/v1.21.12.1
lewis 4 years ago
parent
commit
af5313ab67
5 changed files with 29 additions and 31 deletions
  1. +8
    -8
      models/cloudbrain.go
  2. +1
    -2
      modules/auth/modelarts.go
  3. +3
    -2
      modules/modelarts/modelarts.go
  4. +6
    -5
      modules/modelarts/resty.go
  5. +11
    -14
      routers/repo/modelarts.go

+ 8
- 8
models/cloudbrain.go View File

@@ -482,20 +482,20 @@ type Config struct {
BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下 BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
Parameter []Parameter `json:"parameter"` Parameter []Parameter `json:"parameter"`
DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
DatasetID string `json:"dataset_id"`
DataVersionID string `json:"dataset_version_id"`
DataSource []DataSource `json:"data_source"`
SpecID int64 `json:"spec_id"`
//DatasetID string `json:"dataset_id"`
//DataVersionID string `json:"dataset_version_id"`
//DataSource []DataSource `json:"data_source"`
//SpecID int64 `json:"spec_id"`
EngineID int64 `json:"engine_id"` EngineID int64 `json:"engine_id"`
ModelID int64 `json:"model_id"`
//ModelID int64 `json:"model_id"`
TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
LogUrl string `json:"log_url"` LogUrl string `json:"log_url"`
UserImageUrl string `json:"user_image_url"`
UserCommand string `json:"user_command"`
//UserImageUrl string `json:"user_image_url"`
//UserCommand string `json:"user_command"`
CreateVersion bool `json:"create_version"` CreateVersion bool `json:"create_version"`
Volumes []Volumes `json:"volumes"` Volumes []Volumes `json:"volumes"`
Flavor Flavor `json:"flavor"` Flavor Flavor `json:"flavor"`
PoolID string `json:"pool_id"`
PoolID string `json:"pool_id"`
} }


type Parameter struct { type Parameter struct {


+ 1
- 2
modules/auth/modelarts.go View File

@@ -21,9 +21,8 @@ type CreateModelArtsTrainJobForm struct {
BootFile string `form:"boot_file" binding:"Required"` BootFile string `form:"boot_file" binding:"Required"`
WorkServerNumber int `form:"work_server_number" binding:"Required"` WorkServerNumber int `form:"work_server_number" binding:"Required"`
EngineID int `form:"engine_id" binding:"Required"` EngineID int `form:"engine_id" binding:"Required"`
SpecID int `form:"spec_id" binding:"Required"`
Flavor string `form:"flavor" binding:"Required"`
PoolID string `form:"pool_id" binding:"Required"` PoolID string `form:"pool_id" binding:"Required"`
Flavor string `form:"flavor" binding:"Required"`
Description string `form:"description"` Description string `form:"description"`
} }




+ 3
- 2
modules/modelarts/modelarts.go View File

@@ -37,6 +37,7 @@ const (
"]}" "]}"
CodePath = "/code/" CodePath = "/code/"
OutputPath = "/output/" OutputPath = "/output/"
LogPath = "/log/"
JobPath = "/job/" JobPath = "/job/"
) )


@@ -49,8 +50,8 @@ type GenerateTrainJobReq struct {
DataUrl string DataUrl string
TrainUrl string TrainUrl string
FlavorCode string FlavorCode string
LogUrl string
PoolID string PoolID string
SpecID int64
WorkServerNumber int WorkServerNumber int
EngineID int64 EngineID int64
} }
@@ -137,8 +138,8 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error {
DataUrl: req.DataUrl, DataUrl: req.DataUrl,
EngineID: req.EngineID, EngineID: req.EngineID,
TrainUrl: req.TrainUrl, TrainUrl: req.TrainUrl,
LogUrl: req.LogUrl,
PoolID: req.PoolID, PoolID: req.PoolID,
SpecID: req.SpecID,
Flavor: models.Flavor{ Flavor: models.Flavor{
Code: req.FlavorCode, Code: req.FlavorCode,
}, },


+ 6
- 5
modules/modelarts/resty.go View File

@@ -86,6 +86,7 @@ func getToken() error {
} }


TOKEN = res.Header().Get("X-Subject-Token") TOKEN = res.Header().Get("X-Subject-Token")
log.Info(TOKEN)


return nil return nil
} }
@@ -311,7 +312,9 @@ sendjob:
return nil, fmt.Errorf("resty create train-job: %s", err) return nil, fmt.Errorf("resty create train-job: %s", err)
} }


log.Info("", res.StatusCode(), res.Request.Body)
//log.Info("%d", res.StatusCode())
//req, _ := json.Marshal(createJobParams)
//log.Info("%s", req)


if res.StatusCode() == http.StatusUnauthorized && retry < 1 { if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++ retry++
@@ -320,7 +323,7 @@ sendjob:
} }


if res.StatusCode() != http.StatusOK { if res.StatusCode() != http.StatusOK {
log.Error("createTrainJob failed(%d)", res.StatusCode())
log.Error("createTrainJob failed", res.StatusCode(), res.RawResponse.Body, result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("createTrainJob failed(%d)", res.StatusCode()) return &result, fmt.Errorf("createTrainJob failed(%d)", res.StatusCode())
} }


@@ -347,7 +350,7 @@ sendjob:
Get(HOST + "/v1/" + setting.ProjectID + urlResourceSpecs) Get(HOST + "/v1/" + setting.ProjectID + urlResourceSpecs)


if err != nil { if err != nil {
return nil, fmt.Errorf("resty GetJob: %v", err)
return nil, fmt.Errorf("resty GetResourceSpecs: %v", err)
} }


if res.StatusCode() == http.StatusUnauthorized && retry < 1 { if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
@@ -356,8 +359,6 @@ sendjob:
goto sendjob goto sendjob
} }


log.Info("", res.StatusCode(), res.RawResponse.Body)

if res.StatusCode() != http.StatusOK { if res.StatusCode() != http.StatusOK {
log.Error("GetResourceSpecs failed(%d)", res.StatusCode()) log.Error("GetResourceSpecs failed(%d)", res.StatusCode())
return &result, fmt.Errorf("GetResourceSpecs failed(%d)", res.StatusCode()) return &result, fmt.Errorf("GetResourceSpecs failed(%d)", res.StatusCode())


+ 11
- 14
routers/repo/modelarts.go View File

@@ -326,15 +326,6 @@ func TrainJobNew(ctx *context.Context) {
} }
ctx.Data["flavor_infos"] = flavorInfos.Info ctx.Data["flavor_infos"] = flavorInfos.Info


res, err := modelarts.GetResourceSpecs()
if err != nil {
log.Error("GetResourceSpecs failed: %v", err)
ctx.ServerError("GetResourceSpecs failed:", err)
return
}

log.Info("", res.SpecTotalCount)

ctx.HTML(200, tplModelArtsTrainJobNew) ctx.HTML(200, tplModelArtsTrainJobNew)
} }


@@ -348,11 +339,11 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
bootFile := form.BootFile bootFile := form.BootFile
flavorCode := form.Flavor flavorCode := form.Flavor
poolID := form.PoolID poolID := form.PoolID
specID := form.SpecID
repo := ctx.Repo.Repository repo := ctx.Repo.Repository
codeLocalPath := setting.JobPath + jobName + modelarts.CodePath codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath
dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/" dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"


if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil { if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil {
@@ -363,8 +354,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)


//todo: upload code (send to file_server todo this work?) //todo: upload code (send to file_server todo this work?)
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
log.Error("Failed to obsMkdir: %s (%v)", repo.FullName(), err)
ctx.RenderWithErr("Failed to obsMkdir", tplModelArtsTrainJobNew, &form)
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form)
return
}

if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil {
log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form)
return return
} }


@@ -382,10 +379,10 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
BootFile: codeObsPath + bootFile, BootFile: codeObsPath + bootFile,
TrainUrl: outputObsPath, TrainUrl: outputObsPath,
FlavorCode: flavorCode, FlavorCode: flavorCode,
PoolID: poolID,
WorkServerNumber: workServerNumber, WorkServerNumber: workServerNumber,
EngineID: int64(engineID), EngineID: int64(engineID),
SpecID: int64(specID),
LogUrl: logObsPath,
PoolID: poolID,
} }


err := modelarts.GenerateTrainJob(ctx, req) err := modelarts.GenerateTrainJob(ctx, req)


Loading…
Cancel
Save