| @@ -29,7 +29,7 @@ const ( | |||||
| BucketRemote = "grampus" | BucketRemote = "grampus" | ||||
| RemoteModelPath = "/output/" + models.ModelSuffix | RemoteModelPath = "/output/" + models.ModelSuffix | ||||
| autoStopDurationMs = 4 * 60 * 60 * 1000 | autoStopDurationMs = 4 * 60 * 60 * 1000 | ||||
| CommandGpuDebug = "mkdir -p /dataset;%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" | |||||
| CommandGpuDebug = "mkdir -p /dataset;%s! [ -x \"$(command -v jupyter)\" ] && pip install jupyterlab==3 -i https://pypi.tuna.tsinghua.edu.cn/simple;jupyter lab --ServerApp.shutdown_no_activity_timeout=%s --TerminalManager.cull_inactive_timeout=%s --TerminalManager.cull_interval=%s --MappingKernelManager.cull_idle_timeout=%s --MappingKernelManager.cull_interval=%s --MappingKernelManager.cull_connected=True --MappingKernelManager.cull_busy=True --no-browser --ip=0.0.0.0 --allow-root --notebook-dir='/code' --port=$OCTOPUS_NOTEBOOK_PORT --LabApp.token='' --LabApp.allow_origin='*' --LabApp.base_url=$OCTOPUS_NOTEBOOK_BASE_URL;" | |||||
| ) | ) | ||||
| var ( | var ( | ||||
| @@ -206,7 +206,7 @@ func GenerateNotebookJob(ctx *context.Context, req *GenerateNotebookJobReq) (job | |||||
| ReadOnly: false, | ReadOnly: false, | ||||
| ContainerPath: cloudbrain.CodeMountPath, | ContainerPath: cloudbrain.CodeMountPath, | ||||
| } | } | ||||
| req.Command = fmt.Sprintf(CommandGpuDebug, cpCommand) | |||||
| req.Command = fmt.Sprintf(CommandGpuDebug, cpCommand, setting.CullIdleTimeout, setting.CullIdleTimeout, setting.CullInterval, setting.CullIdleTimeout, setting.CullInterval) | |||||
| log.Info("debug command:" + req.Command) | log.Info("debug command:" + req.Command) | ||||
| } | } | ||||
| @@ -1341,6 +1341,9 @@ modelconvert.manage.create_error2=Only one running model transformation task can | |||||
| modelconvert.manage.model_not_exist=The model in the task does not exist or has been deleted. | modelconvert.manage.model_not_exist=The model in the task does not exist or has been deleted. | ||||
| modelconvert.manage.no_operate_right=You have no right to do the operation. | modelconvert.manage.no_operate_right=You have no right to do the operation. | ||||
| debug.manage.model_not_exist=The model in the task does not exist or has been deleted, please create a new debug job. | |||||
| debug.manage.dataset_not_exist=The part of datasets in the task does not exist or has been deleted, please create a new debug job. | |||||
| grampus.train_job.ai_center = AI Center | grampus.train_job.ai_center = AI Center | ||||
| grampus.dataset_path_rule = The code is storaged in /cache/code;the dataset is storaged in /cache/dataset;and please put your model into /cache/output, then you can download it online。 | grampus.dataset_path_rule = The code is storaged in /cache/code;the dataset is storaged in /cache/dataset;and please put your model into /cache/output, then you can download it online。 | ||||
| grampus.gpu_dataset_path_rule = The code is storaged in /tmp/code;the dataset is storaged in /tmp/dataset;and please put your model into /tmp/output, then you can download it online。 | grampus.gpu_dataset_path_rule = The code is storaged in /tmp/code;the dataset is storaged in /tmp/dataset;and please put your model into /tmp/output, then you can download it online。 | ||||
| @@ -1355,6 +1355,10 @@ modelconvert.manage.create_error2=只能创建一个正在运行的模型转换 | |||||
| modelconvert.manage.model_not_exist=任务中选择的模型不存在或者已被删除。 | modelconvert.manage.model_not_exist=任务中选择的模型不存在或者已被删除。 | ||||
| modelconvert.manage.no_operate_right=您没有操作权限。 | modelconvert.manage.no_operate_right=您没有操作权限。 | ||||
| debug.manage.model_not_exist=任务中选择的模型不存在或者已被删除,请新建调试任务。 | |||||
| debug.manage.dataset_not_exist=任务中选择的部分数据集不存在或者已被删除,请新建调试任务。 | |||||
| grampus.train_job.ai_center=智算中心 | grampus.train_job.ai_center=智算中心 | ||||
| grampus.dataset_path_rule = 训练脚本存储在/cache/code中,数据集存储在/cache/dataset中,训练输出请存储在/cache/output中以供后续下载。 | grampus.dataset_path_rule = 训练脚本存储在/cache/code中,数据集存储在/cache/dataset中,训练输出请存储在/cache/output中以供后续下载。 | ||||
| grampus.gpu_dataset_path_rule = 训练脚本存储在/tmp/code中,数据集存储在/tmp/dataset中,训练输出请存储在/tmp/output中以供后续下载。 | grampus.gpu_dataset_path_rule = 训练脚本存储在/tmp/code中,数据集存储在/tmp/dataset中,训练输出请存储在/tmp/output中以供后续下载。 | ||||
| @@ -706,7 +706,13 @@ func CloudBrainRestart(ctx *context.Context) { | |||||
| } | } | ||||
| if !HasModelFile(task) { | if !HasModelFile(task) { | ||||
| resultCode = "-1" | resultCode = "-1" | ||||
| errorMsg = ctx.Tr("repo.modelconvert.manage.model_not_exist") | |||||
| errorMsg = ctx.Tr("repo.debug.manage.model_not_exist") | |||||
| break | |||||
| } | |||||
| if hasDatasetDeleted(task) { | |||||
| resultCode = "-1" | |||||
| errorMsg = ctx.Tr("repo.debug.manage.dataset_not_exist") | |||||
| break | break | ||||
| } | } | ||||
| @@ -729,6 +735,14 @@ func CloudBrainRestart(ctx *context.Context) { | |||||
| }) | }) | ||||
| } | } | ||||
| func hasDatasetDeleted(task *models.Cloudbrain) bool { | |||||
| if task.Uuid == "" { | |||||
| return false | |||||
| } | |||||
| uuids := strings.Split(task.Uuid, ";") | |||||
| attachs, _ := models.GetAttachmentsByUUIDs(uuids) | |||||
| return len(attachs) < len(uuids) | |||||
| } | |||||
| func HasModelFile(task *models.Cloudbrain) bool { | func HasModelFile(task *models.Cloudbrain) bool { | ||||
| if task.PreTrainModelUrl == "" { | if task.PreTrainModelUrl == "" { | ||||
| @@ -1670,7 +1670,11 @@ func GrampusNotebookRestart(ctx *context.Context) { | |||||
| } | } | ||||
| if !HasModelFile(task) { //使用预训练模型训练 | if !HasModelFile(task) { //使用预训练模型训练 | ||||
| errorMsg = ctx.Tr("repo.modelconvert.manage.model_not_exist") | |||||
| errorMsg = ctx.Tr("repo.debug.manage.model_not_exist") | |||||
| break | |||||
| } | |||||
| if hasDatasetDeleted(task) { | |||||
| errorMsg = ctx.Tr("repo.debug.manage.dataset_not_exist") | |||||
| break | break | ||||
| } | } | ||||
| @@ -525,7 +525,12 @@ func NotebookRestart(ctx *context.Context) { | |||||
| break | break | ||||
| } | } | ||||
| if !HasModelFile(task) { //使用预训练模型训练 | if !HasModelFile(task) { //使用预训练模型训练 | ||||
| errorMsg = ctx.Tr("repo.modelconvert.manage.model_not_exist") | |||||
| errorMsg = ctx.Tr("repo.debug.manage.model_not_exist") | |||||
| break | |||||
| } | |||||
| if hasDatasetDeleted(task) { | |||||
| errorMsg = ctx.Tr("repo.debug.manage.dataset_not_exist") | |||||
| break | break | ||||
| } | } | ||||