From 19603f0fbf1ad9ab492b4aff2c0b45fd8f53a11a Mon Sep 17 00:00:00 2001 From: chenyifan01 Date: Wed, 24 Aug 2022 17:47:08 +0800 Subject: [PATCH] #2701 fix bug of restart --- modules/cloudbrain/cloudbrain.go | 5 +---- routers/repo/cloudbrain.go | 16 ---------------- routers/repo/modelarts.go | 20 ++++++++++++++++++++ 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/modules/cloudbrain/cloudbrain.go b/modules/cloudbrain/cloudbrain.go index 90ed21e4d..d6edfc706 100755 --- a/modules/cloudbrain/cloudbrain.go +++ b/modules/cloudbrain/cloudbrain.go @@ -61,7 +61,6 @@ type GenerateCloudBrainTaskReq struct { Snn4ImageNetPath string BrainScorePath string JobType string - GpuQueue string Description string BranchName string BootFile string @@ -72,7 +71,6 @@ type GenerateCloudBrainTaskReq struct { DatasetInfos map[string]models.DatasetInfo BenchmarkTypeID int BenchmarkChildTypeID int - ResourceSpecId int ResultPath string TrainUrl string ModelName string @@ -344,8 +342,7 @@ func GenerateTask(req GenerateCloudBrainTaskReq) error { Type: models.TypeCloudBrainOne, Uuid: req.Uuids, Image: req.Image, - GpuQueue: req.GpuQueue, - ResourceSpecId: req.ResourceSpecId, + GpuQueue: req.Spec.QueueCode, ComputeResource: models.GPUResource, BenchmarkTypeID: req.BenchmarkTypeID, BenchmarkChildTypeID: req.BenchmarkChildTypeID, diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index 7e2de1219..6d10beddf 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -191,9 +191,7 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { image := strings.TrimSpace(form.Image) uuids := form.Attachment jobType := form.JobType - gpuQueue := form.GpuType codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath - resourceSpecId := form.ResourceSpecId branchName := form.BranchName repo := ctx.Repo.Repository tpl := tplCloudBrainNew @@ -311,7 +309,6 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), JobType: jobType, - GpuQueue: gpuQueue, Description: form.Description, BranchName: branchName, BootFile: form.BootFile, @@ -319,7 +316,6 @@ func CloudBrainCreate(ctx *context.Context, form auth.CreateCloudBrainForm) { CommitID: commitID, BenchmarkTypeID: 0, BenchmarkChildTypeID: 0, - ResourceSpecId: resourceSpecId, ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), Spec: spec, } @@ -369,9 +365,7 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra image := strings.TrimSpace(form.Image) uuid := form.Attachment jobType := string(models.JobTypeInference) - gpuQueue := form.GpuType codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath - resourceSpecId := form.ResourceSpecId branchName := form.BranchName labelName := form.LabelName repo := ctx.Repo.Repository @@ -469,13 +463,11 @@ func CloudBrainInferenceJobCreate(ctx *context.Context, form auth.CreateCloudBra Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), JobType: jobType, - GpuQueue: gpuQueue, Description: form.Description, BranchName: branchName, BootFile: form.BootFile, Params: form.Params, CommitID: commitID, - ResourceSpecId: resourceSpecId, ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), ModelName: form.ModelName, ModelVersion: form.ModelVersion, @@ -2201,10 +2193,8 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo displayJobName := form.DisplayJobName jobName := util.ConvertDisplayJobNameToJobName(displayJobName) image := strings.TrimSpace(form.Image) - gpuQueue := form.GpuType command := cloudbrain.CommandBenchmark codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath - resourceSpecId := cloudbrain.BenchMarkResourceID benchmarkTypeID := form.BenchmarkTypeID benchmarkChildTypeID := form.BenchmarkChildTypeID @@ -2352,7 +2342,6 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), JobType: string(models.JobTypeBenchmark), - GpuQueue: gpuQueue, Description: form.Description, BranchName: cloudbrain.DefaultBranchName, BootFile: "", @@ -2360,7 +2349,6 @@ func BenchMarkAlgorithmCreate(ctx *context.Context, form auth.CreateCloudBrainFo CommitID: "", BenchmarkTypeID: benchmarkTypeID, BenchmarkChildTypeID: benchmarkChildTypeID, - ResourceSpecId: resourceSpecId, ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), Spec: spec, } @@ -2382,9 +2370,7 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) image := form.Image uuid := form.Attachment jobType := form.JobType - gpuQueue := form.GpuType codePath := setting.JobPath + jobName + cloudbrain.CodeMountPath - resourceSpecId := form.ResourceSpecId branchName := cloudbrain.DefaultBranchName repo := ctx.Repo.Repository @@ -2491,7 +2477,6 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) Snn4ImageNetPath: storage.GetMinioPath(jobName, cloudbrain.Snn4imagenetMountPath+"/"), BrainScorePath: storage.GetMinioPath(jobName, cloudbrain.BrainScoreMountPath+"/"), JobType: jobType, - GpuQueue: gpuQueue, Description: form.Description, BranchName: branchName, BootFile: form.BootFile, @@ -2499,7 +2484,6 @@ func ModelBenchmarkCreate(ctx *context.Context, form auth.CreateCloudBrainForm) CommitID: "", BenchmarkTypeID: 0, BenchmarkChildTypeID: benchmarkChildTypeID, - ResourceSpecId: resourceSpecId, ResultPath: storage.GetMinioPath(jobName, cloudbrain.ResultPath+"/"), Spec: spec, } diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index fefb13808..f72bd40b3 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -432,6 +432,7 @@ func NotebookManage(ctx *context.Context) { var resultCode = "0" var errorMsg = "" var status = "" + var spec *models.Specification for { task, err := models.GetCloudbrainByID(ID) @@ -489,6 +490,24 @@ func NotebookManage(ctx *context.Context) { break } } + oldSpec, err := resource.GetCloudbrainSpec(task.ID) + if err != nil { + log.Error("NotebookManage GetCloudbrainSpec error.%v", err) + resultCode = "-1" + errorMsg = "Resource specification not available" + break + } + spec, err = resource.GetAndCheckSpec(ctx.User.ID, oldSpec.ID, models.FindSpecsOptions{ + JobType: models.JobType(task.JobType), + ComputeResource: models.NPU, + Cluster: models.OpenICluster, + AiCenterCode: models.AICenterOfCloudBrainTwo}) + if err != nil || spec == nil { + log.Error("NotebookManage GetAndCheckSpec error.task.id = %d", task.ID) + resultCode = "-1" + errorMsg = "Resource specification not support any more" + break + } action = models.ActionStart } else { @@ -532,6 +551,7 @@ func NotebookManage(ctx *context.Context) { UpdatedUnix: createTime, FlavorCode: task.FlavorCode, FlavorName: task.FlavorName, + Spec: spec, } err = models.RestartCloudbrain(task, newTask)