Browse Source

提交代码

tags/v1.22.11.2^2
ychao_1983 3 years ago
parent
commit
f676bbb943
3 changed files with 65 additions and 50 deletions
  1. +17
    -2
      models/base_message.go
  2. +33
    -33
      services/cloudbrain/cloudbrainTask/inference.go
  3. +15
    -15
      services/cloudbrain/cloudbrainTask/train.go

+ 17
- 2
models/base_message.go View File

@@ -1,8 +1,8 @@
package models package models


type BaseMessage struct { type BaseMessage struct {
Code int `json:"code"`
Message string `json:"message"`
Code int
Message string
} }


var BaseOKMessage = BaseMessage{ var BaseOKMessage = BaseMessage{
@@ -14,3 +14,18 @@ func BaseErrorMessage(message string) BaseMessage {
1, message, 1, message,
} }
} }

type BaseMessageApi struct {
Code int `json:"code"`
Message string `json:"message"`
}

var BaseOKMessageApi = BaseMessageApi{
0, "",
}

func BaseErrorMessageApi(message string) BaseMessageApi {
return BaseMessageApi{
1, message,
}
}

+ 33
- 33
services/cloudbrain/cloudbrainTask/inference.go View File

@@ -54,7 +54,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, option api.CreateTrainJo
if !isOk { if !isOk {
log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])


ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("repo.cloudbrain_samejob_err")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
return return
} }


@@ -63,7 +63,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, option api.CreateTrainJo
command, err := getInferenceJobCommand(option) command, err := getInferenceJobCommand(option)
if err != nil { if err != nil {
log.Error("getTrainJobCommand failed: %v", err) log.Error("getTrainJobCommand failed: %v", err)
ctx.JSON(http.StatusOK, models.BaseErrorMessage(err.Error()))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
return return
} }


@@ -71,40 +71,40 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, option api.CreateTrainJo
if err == nil { if err == nil {
if len(tasks) != 0 { if len(tasks) != 0 {
log.Error("the job name did already exist", ctx.Data["MsgID"]) log.Error("the job name did already exist", ctx.Data["MsgID"])
ctx.JSON(http.StatusOK, models.BaseErrorMessage("the job name did already exist"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("the job name did already exist"))
return return
} }
} else { } else {
if !models.IsErrJobNotExist(err) { if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"]) log.Error("system error, %v", err, ctx.Data["MsgID"])


ctx.JSON(http.StatusOK, models.BaseErrorMessage("system error"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error"))
return return
} }
} }


if !jobNamePattern.MatchString(displayJobName) { if !jobNamePattern.MatchString(displayJobName) {


ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("repo.cloudbrain_jobname_err")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_jobname_err")))
return return
} }


bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName)
if err != nil || !bootFileExist { if err != nil || !bootFileExist {
log.Error("Get bootfile error:", err, ctx.Data["MsgID"]) log.Error("Get bootfile error:", err, ctx.Data["MsgID"])
ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("repo.cloudbrain_bootfile_err")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_bootfile_err")))
return return
} }


count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType) count, err := GetNotFinalStatusTaskCount(ctx.User.ID, models.TypeCloudBrainOne, jobType)
if err != nil { if err != nil {
log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"]) log.Error("GetCloudbrainCountByUserID failed:%v", err, ctx.Data["MsgID"])
ctx.JSON(http.StatusOK, models.BaseErrorMessage("system error"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error"))
return return
} else { } else {
if count >= 1 { if count >= 1 {
log.Error("the user already has running or waiting task", ctx.Data["MsgID"]) log.Error("the user already has running or waiting task", ctx.Data["MsgID"])
ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("repo.cloudbrain.morethanonejob")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain.morethanonejob")))
return return
} }
} }
@@ -115,7 +115,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, option api.CreateTrainJo
errStr := loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ResultPath) errStr := loadCodeAndMakeModelPath(repo, codePath, branchName, jobName, cloudbrain.ResultPath)
if errStr != "" { if errStr != "" {


ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr(errStr)))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr(errStr)))
return return
} }


@@ -125,7 +125,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, option api.CreateTrainJo
if err != nil { if err != nil {
log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"]) log.Error("GetDatasetInfo failed: %v", err, ctx.Data["MsgID"])


ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("cloudbrain.error.dataset_select")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.error.dataset_select")))
return return
} }
spec, err := resource.GetAndCheckSpec(ctx.User.ID, option.SpecId, models.FindSpecsOptions{ spec, err := resource.GetAndCheckSpec(ctx.User.ID, option.SpecId, models.FindSpecsOptions{
@@ -135,12 +135,12 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, option api.CreateTrainJo
AiCenterCode: models.AICenterOfCloudBrainOne}) AiCenterCode: models.AICenterOfCloudBrainOne})
if err != nil || spec == nil { if err != nil || spec == nil {


ctx.JSON(http.StatusOK, models.BaseErrorMessage("Resource specification is not available"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Resource specification is not available"))
return return
} }
if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID) log.Error("point balance is not enough,userId=%d specId=%d", ctx.User.ID, spec.ID)
ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("points.insufficient_points_balance")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance")))
return return
} }
req := cloudbrain.GenerateCloudBrainTaskReq{ req := cloudbrain.GenerateCloudBrainTaskReq{
@@ -175,10 +175,10 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, option api.CreateTrainJo
jobId, err := cloudbrain.GenerateTask(req) jobId, err := cloudbrain.GenerateTask(req)
if err != nil { if err != nil {


ctx.JSON(http.StatusOK, models.BaseErrorMessage(err.Error()))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
return return
} }
ctx.JSON(http.StatusOK, models.BaseMessage{Code: 0, Message: jobId})
ctx.JSON(http.StatusOK, models.BaseMessageApi{Code: 0, Message: jobId})
} }


func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJobOption) { func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJobOption) {
@@ -212,7 +212,7 @@ func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJob
errStr := checkInferenceJobMultiNode(ctx.User.ID, option.WorkServerNumber) errStr := checkInferenceJobMultiNode(ctx.User.ID, option.WorkServerNumber)
if errStr != "" { if errStr != "" {


ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr(errStr)))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr(errStr)))
return return
} }


@@ -221,7 +221,7 @@ func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJob
if !isOk { if !isOk {
log.Error("lock processed failed:%v", err, ctx.Data["MsgID"]) log.Error("lock processed failed:%v", err, ctx.Data["MsgID"])


ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("repo.cloudbrain_samejob_err")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_samejob_err")))
return return
} }
defer lock.UnLock() defer lock.UnLock()
@@ -230,13 +230,13 @@ func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJob
if err != nil { if err != nil {
log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"]) log.Error("GetCloudbrainInferenceJobCountByUserID failed:%v", err, ctx.Data["MsgID"])


ctx.JSON(http.StatusOK, models.BaseErrorMessage("system error"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error"))
return return
} else { } else {
if count >= 1 { if count >= 1 {
log.Error("the user already has running or waiting inference task", ctx.Data["MsgID"]) log.Error("the user already has running or waiting inference task", ctx.Data["MsgID"])


ctx.JSON(http.StatusOK, models.BaseErrorMessage("you have already a running or waiting inference task, can not create more"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("you have already a running or waiting inference task, can not create more"))
return return
} }
} }
@@ -244,14 +244,14 @@ func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJob
if err := paramCheckCreateInferenceJob(option); err != nil { if err := paramCheckCreateInferenceJob(option); err != nil {
log.Error("paramCheckCreateInferenceJob failed:(%v)", err) log.Error("paramCheckCreateInferenceJob failed:(%v)", err)


ctx.JSON(http.StatusOK, models.BaseErrorMessage(err.Error()))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
return return
} }


bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName) bootFileExist, err := ctx.Repo.FileExists(bootFile, branchName)
if err != nil || !bootFileExist { if err != nil || !bootFileExist {
log.Error("Get bootfile error:", err) log.Error("Get bootfile error:", err)
ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("repo.cloudbrain_bootfile_err")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("repo.cloudbrain_bootfile_err")))
return return
} }


@@ -261,14 +261,14 @@ func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJob
if len(tasks) != 0 { if len(tasks) != 0 {
log.Error("the job name did already exist", ctx.Data["MsgID"]) log.Error("the job name did already exist", ctx.Data["MsgID"])


ctx.JSON(http.StatusOK, models.BaseErrorMessage("the job name did already exist"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("the job name did already exist"))
return return
} }
} else { } else {
if !models.IsErrJobNotExist(err) { if !models.IsErrJobNotExist(err) {
log.Error("system error, %v", err, ctx.Data["MsgID"]) log.Error("system error, %v", err, ctx.Data["MsgID"])


ctx.JSON(http.StatusOK, models.BaseErrorMessage("system error"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("system error"))
return return
} }
} }
@@ -280,12 +280,12 @@ func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJob
AiCenterCode: models.AICenterOfCloudBrainTwo}) AiCenterCode: models.AICenterOfCloudBrainTwo})
if err != nil || spec == nil { if err != nil || spec == nil {


ctx.JSON(http.StatusOK, models.BaseErrorMessage("Resource specification not available"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Resource specification not available"))
return return
} }
if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) { if !account.IsPointBalanceEnough(ctx.User.ID, spec.UnitPrice) {
log.Error("point balance is not enough,userId=%d specId=%d ", ctx.User.ID, spec.ID) log.Error("point balance is not enough,userId=%d specId=%d ", ctx.User.ID, spec.ID)
ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("points.insufficient_points_balance")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("points.insufficient_points_balance")))
return return
} }


@@ -301,7 +301,7 @@ func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJob
if err := downloadCode(repo, codeLocalPath, branchName); err != nil { if err := downloadCode(repo, codeLocalPath, branchName); err != nil {
log.Error("Create task failed, server timed out: %s (%v)", repo.FullName(), err) log.Error("Create task failed, server timed out: %s (%v)", repo.FullName(), err)


ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("cloudbrain.load_code_failed")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
return return
} }


@@ -309,19 +309,19 @@ func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJob
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.ResultPath + VersionOutputPath + "/"); err != nil { if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.ResultPath + VersionOutputPath + "/"); err != nil {
log.Error("Failed to obsMkdir_result: %s (%v)", repo.FullName(), err) log.Error("Failed to obsMkdir_result: %s (%v)", repo.FullName(), err)


ctx.JSON(http.StatusOK, models.BaseErrorMessage("Failed to obsMkdir_result"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Failed to obsMkdir_result"))
return return
} }


if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil { if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.LogPath + VersionOutputPath + "/"); err != nil {
log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err) log.Error("Failed to obsMkdir_log: %s (%v)", repo.FullName(), err)
ctx.JSON(http.StatusOK, models.BaseErrorMessage("Failed to obsMkdir_log"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Failed to obsMkdir_log"))
return return
} }


if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)
ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("cloudbrain.load_code_failed")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
return return
} }


@@ -338,7 +338,7 @@ func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJob
datasUrlList, dataUrl, datasetNames, isMultiDataset, err := getDatasUrlListByUUIDS(uuid) datasUrlList, dataUrl, datasetNames, isMultiDataset, err := getDatasUrlListByUUIDS(uuid)
if err != nil { if err != nil {


ctx.JSON(http.StatusOK, models.BaseErrorMessage(err.Error()))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
return return
} }
dataPath := dataUrl dataPath := dataUrl
@@ -346,7 +346,7 @@ func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJob
if err != nil { if err != nil {
log.Error("Failed to Marshal: %v", err) log.Error("Failed to Marshal: %v", err)


ctx.JSON(http.StatusOK, models.BaseErrorMessage("json error:"+err.Error()))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("json error:"+err.Error()))
return return
} }
if isMultiDataset { if isMultiDataset {
@@ -362,7 +362,7 @@ func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJob
if err != nil { if err != nil {
log.Error("Failed to Unmarshal params: %s (%v)", params, err) log.Error("Failed to Unmarshal params: %s (%v)", params, err)


ctx.JSON(http.StatusOK, models.BaseErrorMessage("运行参数错误"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("运行参数错误"))
return return
} }


@@ -421,10 +421,10 @@ func ModelArtsInferenceJobCreate(ctx *context.Context, option api.CreateTrainJob
if err != nil { if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error()) log.Error("GenerateTrainJob failed:%v", err.Error())


ctx.JSON(http.StatusOK, models.BaseErrorMessage(err.Error()))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
return return
} }
ctx.JSON(http.StatusOK, models.BaseMessage{Code: 0, Message: jobId})
ctx.JSON(http.StatusOK, models.BaseMessageApi{Code: 0, Message: jobId})
} }


func getDatasUrlListByUUIDS(uuidStr string) ([]models.Datasurl, string, string, bool, error) { func getDatasUrlListByUUIDS(uuidStr string) ([]models.Datasurl, string, string, bool, error) {


+ 15
- 15
services/cloudbrain/cloudbrainTask/train.go View File

@@ -53,7 +53,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, option api.CreateTrainJobOpt
defer lock.UnLock() defer lock.UnLock()
spec, datasetInfos, datasetNames, err := checkParameters(ctx, option, lock, repo) spec, datasetInfos, datasetNames, err := checkParameters(ctx, option, lock, repo)
if err != nil { if err != nil {
ctx.JSON(http.StatusOK, models.BaseErrorMessage(err.Error()))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
return return
} }


@@ -65,7 +65,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, option api.CreateTrainJobOpt


if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("cloudbrain.load_code_failed")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))


} }


@@ -73,21 +73,21 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, option api.CreateTrainJobOpt
//upload code //upload code
if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil { if err := uploadCodeToMinio(codeLocalPath+"/", jobName, cloudbrain.CodeMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("cloudbrain.load_code_failed")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
return return
} }


modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/" modelPath := setting.JobPath + jobName + cloudbrain.ModelMountPath + "/"
if err := mkModelPath(modelPath); err != nil { if err := mkModelPath(modelPath); err != nil {
log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) log.Error("Failed to mkModelPath: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("cloudbrain.load_code_failed")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
return return
} }


//init model readme //init model readme
if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil { if err := uploadCodeToMinio(modelPath, jobName, cloudbrain.ModelMountPath+"/"); err != nil {
log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"]) log.Error("Failed to uploadCodeToMinio: %s (%v)", repo.FullName(), err, ctx.Data["MsgID"])
ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("cloudbrain.load_code_failed")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
return return
} }


@@ -109,7 +109,7 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, option api.CreateTrainJobOpt
command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", allFileName, preTrainModelPath, option.CkptName) command, err := generateCommand(repo.Name, grampus.ProcessorTypeGPU, codeMinioPath+cloudbrain.DefaultBranchName+".zip", datasetRemotePath, bootFile, params, setting.CBCodePathPrefix+jobName+cloudbrain.ModelMountPath+"/", allFileName, preTrainModelPath, option.CkptName)
if err != nil { if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])
ctx.JSON(http.StatusOK, models.BaseErrorMessage("Create task failed, internal error"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Create task failed, internal error"))
return return
} }


@@ -150,10 +150,10 @@ func GrampusTrainJobGpuCreate(ctx *context.Context, option api.CreateTrainJobOpt
jobId, err := grampus.GenerateTrainJob(ctx, req) jobId, err := grampus.GenerateTrainJob(ctx, req)
if err != nil { if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"]) log.Error("GenerateTrainJob failed:%v", err.Error(), ctx.Data["MsgID"])
ctx.JSON(http.StatusOK, models.BaseErrorMessage(err.Error()))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
return return
} }
ctx.JSON(http.StatusOK, models.BaseMessage{Code: 0, Message: jobId})
ctx.JSON(http.StatusOK, models.BaseMessageApi{Code: 0, Message: jobId})
} }


func checkParameters(ctx *context.Context, option api.CreateTrainJobOption, lock *redis_lock.DistributeLock, repo *models.Repository) (*models.Specification, map[string]models.DatasetInfo, string, error) { func checkParameters(ctx *context.Context, option api.CreateTrainJobOption, lock *redis_lock.DistributeLock, repo *models.Repository) (*models.Specification, map[string]models.DatasetInfo, string, error) {
@@ -258,7 +258,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, option api.CreateTrainJobOpt
defer lock.UnLock() defer lock.UnLock()
spec, datasetInfos, datasetNames, err := checkParameters(ctx, option, lock, repo) spec, datasetInfos, datasetNames, err := checkParameters(ctx, option, lock, repo)
if err != nil { if err != nil {
ctx.JSON(http.StatusOK, models.BaseErrorMessage(err.Error()))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
return return
} }


@@ -271,7 +271,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, option api.CreateTrainJobOpt
if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil { if err := downloadZipCode(ctx, codeLocalPath, branchName); err != nil {
log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err) log.Error("downloadZipCode failed, server timed out: %s (%v)", repo.FullName(), err)


ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("cloudbrain.load_code_failed")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
return return
} }


@@ -279,14 +279,14 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, option api.CreateTrainJobOpt
if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil { if err := obsMkdir(setting.CodePathPrefix + jobName + modelarts.OutputPath); err != nil {
log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err) log.Error("Failed to obsMkdir_output: %s (%v)", repo.FullName(), err)


ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("cloudbrain.load_code_failed")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
return return
} }


if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil { if err := uploadCodeToObs(codeLocalPath, jobName, ""); err != nil {
log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err) log.Error("Failed to uploadCodeToObs: %s (%v)", repo.FullName(), err)


ctx.JSON(http.StatusOK, models.BaseErrorMessage(ctx.Tr("cloudbrain.load_code_failed")))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(ctx.Tr("cloudbrain.load_code_failed")))
return return
} }


@@ -308,7 +308,7 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, option api.CreateTrainJobOpt
if err != nil { if err != nil {
log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"]) log.Error("Failed to generateCommand: %s (%v)", displayJobName, err, ctx.Data["MsgID"])


ctx.JSON(http.StatusOK, models.BaseErrorMessage("Create task failed, internal error"))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi("Create task failed, internal error"))
return return
} }


@@ -352,10 +352,10 @@ func GrampusTrainJobNpuCreate(ctx *context.Context, option api.CreateTrainJobOpt
if err != nil { if err != nil {
log.Error("GenerateTrainJob failed:%v", err.Error()) log.Error("GenerateTrainJob failed:%v", err.Error())


ctx.JSON(http.StatusOK, models.BaseErrorMessage(err.Error()))
ctx.JSON(http.StatusOK, models.BaseErrorMessageApi(err.Error()))
return return
} }
ctx.JSON(http.StatusOK, models.BaseMessage{Code: 0, Message: jobId})
ctx.JSON(http.StatusOK, models.BaseMessageApi{Code: 0, Message: jobId})
} }


func obsMkdir(dir string) error { func obsMkdir(dir string) error {


Loading…
Cancel
Save