Browse Source

#2701

fix bug of restart
tags/v1.22.9.1^2^2
chenyifan01 3 years ago
parent
commit
19603f0fbf
3 changed files with 21 additions and 20 deletions
  1. +1
    -4
      modules/cloudbrain/cloudbrain.go
  2. +0
    -16
      routers/repo/cloudbrain.go
  3. +20
    -0
      routers/repo/modelarts.go

+ 1
- 4
modules/cloudbrain/cloudbrain.go View File

@@ -61,7 +61,6 @@ type GenerateCloudBrainTaskReq struct {
Snn4ImageNetPath string Snn4ImageNetPath string
BrainScorePath string BrainScorePath string
JobType string JobType string
GpuQueue string
Description string Description string
BranchName string BranchName string
BootFile string BootFile string
@@ -72,7 +71,6 @@ type GenerateCloudBrainTaskReq struct {
DatasetInfos map[string]models.DatasetInfo DatasetInfos map[string]models.DatasetInfo
BenchmarkTypeID int BenchmarkTypeID int
BenchmarkChildTypeID int BenchmarkChildTypeID int
ResourceSpecId int
ResultPath string ResultPath string
TrainUrl string TrainUrl string
ModelName string ModelName string
@@ -344,8 +342,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error {
Type: models.TypeCloudBrainOne, Type: models.TypeCloudBrainOne,
Uuid: req.Uuids, Uuid: req.Uuids,
Image: req.Image, Image: req.Image,
GpuQueue: req.GpuQueue,
ResourceSpecId: req.ResourceSpecId,
GpuQueue: req.Spec.QueueCode,
ComputeResource: models.GPUResource, ComputeResource: models.GPUResource,
BenchmarkTypeID: req.BenchmarkTypeID, BenchmarkTypeID: req.BenchmarkTypeID,
BenchmarkChildTypeID: req.BenchmarkChildTypeID, BenchmarkChildTypeID: req.BenchmarkChildTypeID,


+ 0
- 16
routers/repo/cloudbrain.go View File

@@ -191,9 +191,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
image := strings.TrimSpace(form.Image) image := strings.TrimSpace(form.Image)
uuids := form.Attachment uuids := form.Attachment
jobType := form.JobType jobType := form.JobType
gpuQueue := form.GpuType
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
resourceSpecId := form.ResourceSpecId
branchName := form.BranchName branchName := form.BranchName
repo := ctx.Repo.Repository repo := ctx.Repo.Repository
tpl := tplCloudBrainNew tpl := tplCloudBrainNew
@@ -311,7 +309,6 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
JobType: jobType, JobType: jobType,
GpuQueue: gpuQueue,
Description: form.Description, Description: form.Description,
BranchName: branchName, BranchName: branchName,
BootFile: form.BootFile, BootFile: form.BootFile,
@@ -319,7 +316,6 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) {
CommitID: commitID, CommitID: commitID,
BenchmarkTypeID: 0, BenchmarkTypeID: 0,
BenchmarkChildTypeID: 0, BenchmarkChildTypeID: 0,
ResourceSpecId: resourceSpecId,
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
Spec: spec, Spec: spec,
} }
@@ -369,9 +365,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra
image := strings.TrimSpace(form.Image) image := strings.TrimSpace(form.Image)
uuid := form.Attachment uuid := form.Attachment
jobType := string(models.JobTypeInference) jobType := string(models.JobTypeInference)
gpuQueue := form.GpuType
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
resourceSpecId := form.ResourceSpecId
branchName := form.BranchName branchName := form.BranchName
labelName := form.LabelName labelName := form.LabelName
repo := ctx.Repo.Repository repo := ctx.Repo.Repository
@@ -469,13 +463,11 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
JobType: jobType, JobType: jobType,
GpuQueue: gpuQueue,
Description: form.Description, Description: form.Description,
BranchName: branchName, BranchName: branchName,
BootFile: form.BootFile, BootFile: form.BootFile,
Params: form.Params, Params: form.Params,
CommitID: commitID, CommitID: commitID,
ResourceSpecId: resourceSpecId,
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
ModelName: form.ModelName, ModelName: form.ModelName,
ModelVersion: form.ModelVersion, ModelVersion: form.ModelVersion,
@@ -2201,10 +2193,8 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
displayJobName := form.DisplayJobName displayJobName := form.DisplayJobName
jobName := util.ConvertDisplayJobNameToJobName(displayJobName) jobName := util.ConvertDisplayJobNameToJobName(displayJobName)
image := strings.TrimSpace(form.Image) image := strings.TrimSpace(form.Image)
gpuQueue := form.GpuType
command := cloudbrain.CommandBenchmark command := cloudbrain.CommandBenchmark
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
resourceSpecId := cloudbrain.BenchMarkResourceID
benchmarkTypeID := form.BenchmarkTypeID benchmarkTypeID := form.BenchmarkTypeID
benchmarkChildTypeID := form.BenchmarkChildTypeID benchmarkChildTypeID := form.BenchmarkChildTypeID


@@ -2352,7 +2342,6 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
JobType: string(models.JobTypeBenchmark), JobType: string(models.JobTypeBenchmark),
GpuQueue: gpuQueue,
Description: form.Description, Description: form.Description,
BranchName: cloudbrain.DefaultBranchName, BranchName: cloudbrain.DefaultBranchName,
BootFile: "", BootFile: "",
@@ -2360,7 +2349,6 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo
CommitID: "", CommitID: "",
BenchmarkTypeID: benchmarkTypeID, BenchmarkTypeID: benchmarkTypeID,
BenchmarkChildTypeID: benchmarkChildTypeID, BenchmarkChildTypeID: benchmarkChildTypeID,
ResourceSpecId: resourceSpecId,
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
Spec: spec, Spec: spec,
} }
@@ -2382,9 +2370,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
image := form.Image image := form.Image
uuid := form.Attachment uuid := form.Attachment
jobType := form.JobType jobType := form.JobType
gpuQueue := form.GpuType
codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath
resourceSpecId := form.ResourceSpecId
branchName := cloudbrain.DefaultBranchName branchName := cloudbrain.DefaultBranchName
repo := ctx.Repo.Repository repo := ctx.Repo.Repository


@@ -2491,7 +2477,6 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"),
BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"),
JobType: jobType, JobType: jobType,
GpuQueue: gpuQueue,
Description: form.Description, Description: form.Description,
BranchName: branchName, BranchName: branchName,
BootFile: form.BootFile, BootFile: form.BootFile,
@@ -2499,7 +2484,6 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm)
CommitID: "", CommitID: "",
BenchmarkTypeID: 0, BenchmarkTypeID: 0,
BenchmarkChildTypeID: benchmarkChildTypeID, BenchmarkChildTypeID: benchmarkChildTypeID,
ResourceSpecId: resourceSpecId,
ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"),
Spec: spec, Spec: spec,
} }


+ 20
- 0
routers/repo/modelarts.go View File

@@ -432,6 +432,7 @@ func NotebookManage(ctx *context.Context) {
var resultCode = "0" var resultCode = "0"
var errorMsg = "" var errorMsg = ""
var status = "" var status = ""
var spec *models.Specification


for { for {
task, err := models.GetCloudbrainByID(ID) task, err := models.GetCloudbrainByID(ID)
@@ -489,6 +490,24 @@ func NotebookManage(ctx *context.Context) {
break break
} }
} }
oldSpec, err := resource.GetCloudbrainSpec(task.ID)
if err != nil {
log.Error("NotebookManage GetCloudbrainSpec error.%v", err)
resultCode = "-1"
errorMsg = "Resource specification not available"
break
}
spec, err = resource.GetAndCheckSpec(ctx.User.ID, oldSpec.ID, models.FindSpecsOptions{
JobType: models.JobType(task.JobType),
ComputeResource: models.NPU,
Cluster: models.OpenICluster,
AiCenterCode: models.AICenterOfCloudBrainTwo})
if err != nil || spec == nil {
log.Error("NotebookManage GetAndCheckSpec error.task.id = %d", task.ID)
resultCode = "-1"
errorMsg = "Resource specification not support any more"
break
}


action = models.ActionStart action = models.ActionStart
} else { } else {
@@ -532,6 +551,7 @@ func NotebookManage(ctx *context.Context) {
UpdatedUnix: createTime, UpdatedUnix: createTime,
FlavorCode: task.FlavorCode, FlavorCode: task.FlavorCode,
FlavorName: task.FlavorName, FlavorName: task.FlavorName,
Spec: spec,
} }


err = models.RestartCloudbrain(task, newTask) err = models.RestartCloudbrain(task, newTask)


Loading…
Cancel
Save