Browse Source

create job

tags/v1.21.12.1
lewis 4 years ago
parent
commit
af5313ab67
5 changed files with 29 additions and 31 deletions
  1. +8
    -8
      models/cloudbrain.go
  2. +1
    -2
      modules/auth/modelarts.go
  3. +3
    -2
      modules/modelarts/modelarts.go
  4. +6
    -5
      modules/modelarts/resty.go
  5. +11
    -14
      routers/repo/modelarts.go

+ 8
- 8
models/cloudbrain.go View File

@@ -482,20 +482,20 @@ type Config struct {
BootFileUrl string `json:"boot_file_url"` //训练作业的代码启动文件,需要在代码目录下
Parameter []Parameter `json:"parameter"`
DataUrl string `json:"data_url"` //训练作业需要的数据集OBS路径URL
DatasetID string `json:"dataset_id"`
DataVersionID string `json:"dataset_version_id"`
DataSource []DataSource `json:"data_source"`
SpecID int64 `json:"spec_id"`
//DatasetID string `json:"dataset_id"`
//DataVersionID string `json:"dataset_version_id"`
//DataSource []DataSource `json:"data_source"`
//SpecID int64 `json:"spec_id"`
EngineID int64 `json:"engine_id"`
ModelID int64 `json:"model_id"`
//ModelID int64 `json:"model_id"`
TrainUrl string `json:"train_url"` //训练作业的输出文件OBS路径URL
LogUrl string `json:"log_url"`
UserImageUrl string `json:"user_image_url"`
UserCommand string `json:"user_command"`
//UserImageUrl string `json:"user_image_url"`
//UserCommand string `json:"user_command"`
CreateVersion bool `json:"create_version"`
Volumes []Volumes `json:"volumes"`
Flavor Flavor `json:"flavor"`
PoolID string `json:"pool_id"`
PoolID string `json:"pool_id"`
}

type Parameter struct {


+ 1
- 2
modules/auth/modelarts.go View File

@@ -21,9 +21,8 @@ type CreateModelArtsTrainJobForm struct {
BootFile string `form:"boot_file" binding:"Required"`
WorkServerNumber int `form:"work_server_number" binding:"Required"`
EngineID int `form:"engine_id" binding:"Required"`
SpecID int `form:"spec_id" binding:"Required"`
Flavor string `form:"flavor" binding:"Required"`
PoolID string `form:"pool_id" binding:"Required"`
Flavor string `form:"flavor" binding:"Required"`
Description string `form:"description"`
}



+ 3
- 2
modules/modelarts/modelarts.go View File

@@ -37,6 +37,7 @@ const (
"]}"
CodePath = "/code/"
OutputPath = "/output/"
LogPath = "/log/"
JobPath = "/job/"
)

@@ -49,8 +50,8 @@ type GenerateTrainJobReq struct {
DataUrl string
TrainUrl string
FlavorCode string
LogUrl string
PoolID string
SpecID int64
WorkServerNumber int
EngineID int64
}
@@ -137,8 +138,8 @@ func GenerateTrainJob(ctx *context.Context, req *GenerateTrainJobReq) error {
DataUrl: req.DataUrl,
EngineID: req.EngineID,
TrainUrl: req.TrainUrl,
LogUrl: req.LogUrl,
PoolID: req.PoolID,
SpecID: req.SpecID,
Flavor: models.Flavor{
Code: req.FlavorCode,
},


+ 6
- 5
modules/modelarts/resty.go View File

@@ -86,6 +86,7 @@ func getToken() error {
}

TOKEN = res.Header().Get("X-Subject-Token")
log.Info(TOKEN)

return nil
}
@@ -311,7 +312,9 @@ sendjob:
return nil, fmt.Errorf("resty create train-job: %s", err)
}

log.Info("", res.StatusCode(), res.Request.Body)
//log.Info("%d", res.StatusCode())
//req, _ := json.Marshal(createJobParams)
//log.Info("%s", req)

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
retry++
@@ -320,7 +323,7 @@ sendjob:
}

if res.StatusCode() != http.StatusOK {
log.Error("createTrainJob failed(%d)", res.StatusCode())
log.Error("createTrainJob failed", res.StatusCode(), res.RawResponse.Body, result.ErrorCode, result.ErrorMsg)
return &result, fmt.Errorf("createTrainJob failed(%d)", res.StatusCode())
}

@@ -347,7 +350,7 @@ sendjob:
Get(HOST + "/v1/" + setting.ProjectID + urlResourceSpecs)

if err != nil {
return nil, fmt.Errorf("resty GetJob: %v", err)
return nil, fmt.Errorf("resty GetResourceSpecs: %v", err)
}

if res.StatusCode() == http.StatusUnauthorized && retry < 1 {
@@ -356,8 +359,6 @@ sendjob:
goto sendjob
}

log.Info("", res.StatusCode(), res.RawResponse.Body)

if res.StatusCode() != http.StatusOK {
log.Error("GetResourceSpecs failed(%d)", res.StatusCode())
return &result, fmt.Errorf("GetResourceSpecs failed(%d)", res.StatusCode())


+ 11
- 14
routers/repo/modelarts.go View File

@@ -326,15 +326,6 @@ func TrainJobNew(ctx *context.Context) {
}
ctx.Data["flavor_infos"] = flavorInfos.Info

res, err := modelarts.GetResourceSpecs()
if err != nil {
log.Error("GetResourceSpecs failed: %v", err)
ctx.ServerError("GetResourceSpecs failed:", err)
return
}

log.Info("", res.SpecTotalCount)

ctx.HTML(200, tplModelArtsTrainJobNew)
}

@@ -348,11 +339,11 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
bootFile := form.BootFile
flavorCode := form.Flavor
poolID := form.PoolID
specID := form.SpecID
repo := ctx.Repo.Repository
codeLocalPath := setting.JobPath + jobName + modelarts.CodePath
codeObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.CodePath
outputObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.OutputPath
logObsPath := "/" + setting.Bucket + modelarts.JobPath + jobName + modelarts.LogPath
dataPath := "/" + setting.Bucket + "/" + setting.BasePath + path.Join(uuid[0:1], uuid[1:2]) + "/" + uuid + "/"

if err := git.Clone(repo.RepoPath(), codeLocalPath, git.CloneRepoOptions{}); err != nil {
@@ -363,8 +354,14 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)

//todo: upload code (send to file_server todo this work?)
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
log.Error("Failed to obsMkdir: %s (%v)", repo.FullName(), err)
ctx.RenderWithErr("Failed to obsMkdir", tplModelArtsTrainJobNew, &form)
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)
ctx.RenderWithErr("Failed to obsMkdir_output", tplModelArtsTrainJobNew, &form)
return
}

if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath); err != nil {
log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
ctx.RenderWithErr("Failed to obsMkdir_log", tplModelArtsTrainJobNew, &form)
return
}

@@ -382,10 +379,10 @@ func TrainJobCreate(ctx *context.Context, form auth.CreateModelArtsTrainJobForm)
BootFile: codeObsPath + bootFile,
TrainUrl: outputObsPath,
FlavorCode: flavorCode,
PoolID: poolID,
WorkServerNumber: workServerNumber,
EngineID: int64(engineID),
SpecID: int64(specID),
LogUrl: logObsPath,
PoolID: poolID,
}

err := modelarts.GenerateTrainJob(ctx, req)


Loading…
Cancel
Save