From a3ee7b0660bbdb4aad9f66c59406f2b9d6b32bd9 Mon Sep 17 00:00:00 2001 From: ychao_1983 Date: Fri, 9 Dec 2022 09:18:45 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8F=90=E4=BA=A4=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/grampus/grampus.go | 4 ++-- options/locale/locale_en-US.ini | 3 +++ options/locale/locale_zh-CN.ini | 4 ++++ routers/repo/cloudbrain.go | 16 +++++++++++++++- routers/repo/grampus.go | 6 +++++- routers/repo/modelarts.go | 7 ++++++- 6 files changed, 35 insertions(+), 5 deletions(-) diff --git a/modules/grampus/grampus.go b/modules/grampus/grampus.go index c8a5e2253..5854ba051 100755 --- a/modules/grampus/grampus.go +++ b/modules/grampus/grampus.go @@ -29,7 +29,7 @@ const ( BucketRemote = "grampus" RemoteModelPath = "/output/" + models.ModelSuffix autoStopDurationMs = 4 * 60 * 60 * 1000 - CommandGpuDebug = "mkdir -p /dataset;%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" + CommandGpuDebug = "mkdir -p /dataset;%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --ServerApp.shutdown_no_activity_timeout=%s --TerminalManager.cull_inactive_timeout=%s --TerminalManager.cull_interval=%s --MappingKernelManager.cull_idle_timeout=%s --MappingKernelManager.cull_interval=%s --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" ) var ( @@ -206,7 +206,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job ReadOnly: false, ContainerPath: cloudbrain.CodeMountPath, } - req.Command = fmt.Sprintf(CommandGpuDebug, cpCommand) + req.Command = fmt.Sprintf(CommandGpuDebug, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval) log.Info("debug command:" + req.Command) } diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index cb5d686af..aa6df19de 100755 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1341,6 +1341,9 @@ modelconvert.manage.create_error2=Only one running model transformation task can modelconvert.manage.model_not_exist=The model in the task does not exist or has been deleted. modelconvert.manage.no_operate_right=You have no right to do the operation. +debug.manage.model_not_exist=The model in the task does not exist or has been deleted, please create a new debug job. +debug.manage.dataset_not_exist=The part of datasets in the task does not exist or has been deleted, please create a new debug job. + grampus.train_job.ai_center = AI Center grampus.dataset_path_rule = The code is storaged in /cache/code;the dataset is storaged in /cache/dataset;and please put your model into /cache/output, then you can download it online。 grampus.gpu_dataset_path_rule = The code is storaged in /tmp/code;the dataset is storaged in /tmp/dataset;and please put your model into /tmp/output, then you can download it online。 diff --git a/options/locale/locale_zh-CN.ini b/options/locale/locale_zh-CN.ini index dcad1345d..319695119 100755 --- a/options/locale/locale_zh-CN.ini +++ b/options/locale/locale_zh-CN.ini @@ -1355,6 +1355,10 @@ modelconvert.manage.create_error2=只能创建一个正在运行的模型转换 modelconvert.manage.model_not_exist=任务中选择的模型不存在或者已被删除。 modelconvert.manage.no_operate_right=您没有操作权限。 + +debug.manage.model_not_exist=任务中选择的模型不存在或者已被删除,请新建调试任务。 +debug.manage.dataset_not_exist=任务中选择的部分数据集不存在或者已被删除,请新建调试任务。 + grampus.train_job.ai_center=智算中心 grampus.dataset_path_rule = 训练脚本存储在/cache/code中,数据集存储在/cache/dataset中,训练输出请存储在/cache/output中以供后续下载。 grampus.gpu_dataset_path_rule = 训练脚本存储在/tmp/code中,数据集存储在/tmp/dataset中,训练输出请存储在/tmp/output中以供后续下载。 diff --git a/routers/repo/cloudbrain.go b/routers/repo/cloudbrain.go index a4acd80bd..905c25a64 100755 --- a/routers/repo/cloudbrain.go +++ b/routers/repo/cloudbrain.go @@ -706,7 +706,13 @@ func CloudBrainRestart(ctx *context.Context) { } if !HasModelFile(task) { resultCode = "-1" - errorMsg = ctx.Tr("repo.modelconvert.manage.model_not_exist") + errorMsg = ctx.Tr("repo.debug.manage.model_not_exist") + break + } + + if hasDatasetDeleted(task) { + resultCode = "-1" + errorMsg = ctx.Tr("repo.debug.manage.dataset_not_exist") break } @@ -729,6 +735,14 @@ func CloudBrainRestart(ctx *context.Context) { }) } +func hasDatasetDeleted(task *models.Cloudbrain) bool { + if task.Uuid == "" { + return false + } + uuids := strings.Split(task.Uuid, ";") + attachs, _ := models.GetAttachmentsByUUIDs(uuids) + return len(attachs) < len(uuids) +} func HasModelFile(task *models.Cloudbrain) bool { if task.PreTrainModelUrl == "" { diff --git a/routers/repo/grampus.go b/routers/repo/grampus.go index 7e26ab93a..3011ccd79 100755 --- a/routers/repo/grampus.go +++ b/routers/repo/grampus.go @@ -1670,7 +1670,11 @@ func GrampusNotebookRestart(ctx *context.Context) { } if !HasModelFile(task) { //使用预训练模型训练 - errorMsg = ctx.Tr("repo.modelconvert.manage.model_not_exist") + errorMsg = ctx.Tr("repo.debug.manage.model_not_exist") + break + } + if hasDatasetDeleted(task) { + errorMsg = ctx.Tr("repo.debug.manage.dataset_not_exist") break } diff --git a/routers/repo/modelarts.go b/routers/repo/modelarts.go index 679fdc463..828590564 100755 --- a/routers/repo/modelarts.go +++ b/routers/repo/modelarts.go @@ -525,7 +525,12 @@ func NotebookRestart(ctx *context.Context) { break } if !HasModelFile(task) { //使用预训练模型训练 - errorMsg = ctx.Tr("repo.modelconvert.manage.model_not_exist") + errorMsg = ctx.Tr("repo.debug.manage.model_not_exist") + break + } + + if hasDatasetDeleted(task) { + errorMsg = ctx.Tr("repo.debug.manage.dataset_not_exist") break }