From 69f765d45a6e819288b88875f0e927677eb10a24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B9=8B=E6=B1=9F=E5=A4=A9=E6=9E=A2?= Date: Wed, 30 Jun 2021 14:45:20 +0800 Subject: [PATCH] update data process --- dataset-util/.gitignore | 9 + dataset-util/README.md | 82 +- dataset-util/pom.xml | 81 +- .../datasetutil/DatasetUtilApplication.java | 98 +- .../datasetutil/common/aspect/LogAspect.java | 76 -- .../common/base/MagicNumConstant.java | 2 + .../common/config/ImageConfig.java | 63 ++ .../common/config/MinioConfig.java | 12 +- .../common/constant/AnnotateTypeEnum.java | 93 ++ .../common/constant/BusinessConstant.java | 62 ++ .../common/constant/DatatypeEnum.java | 88 ++ .../common/enums/DatatypeEnum.java | 58 ++ .../common/enums/PresetDatasetEnum.java | 77 ++ .../common/util/EsConfiguration.java | 91 ++ .../datasetutil/common/util/FileUtil.java | 43 + .../common/util/GeneratorKeyUtil.java | 59 +- .../common/util/HandleFileUtil.java | 24 + .../dubhe/datasetutil/common/util/IOUtil.java | 45 + .../datasetutil/common/util/LogUtil.java | 10 +- .../datasetutil/common/util/MinioUtil.java | 112 ++- .../util/MyPreciseShardingAlgorithm.java | 29 +- .../common/util/ProcessBarUtil.java | 32 + .../datasetutil/common/util/ThreadUtils.java | 20 +- .../dao/DataFileAnnotationMapper.java | 46 + .../dubhe/datasetutil/dao/DataFileMapper.java | 33 +- .../datasetutil/dao/DataLabelMapper.java | 21 + .../datasetutil/dao/DataSequenceMapper.java | 17 +- .../dao/DataVersionFileMapper.java | 44 +- .../dao/DatasetDataLabelMapper.java | 9 + .../dubhe/datasetutil/dao/DatasetMapper.java | 28 + .../datasetutil/dao/DatasetVersionMapper.java | 11 + .../datasetutil/domain/dto/AnnotationDTO.java | 40 + .../domain/dto/EsTransportDTO.java | 95 ++ .../domain/dto/FileAnnotationDTO.java | 46 + .../dubhe/datasetutil/domain/dto/IdAlloc.java | 44 +- .../datasetutil/domain/entity/DataFile.java | 79 +- .../domain/entity/DataFileAnnotation.java | 88 ++ .../domain/entity/DataLabelGroup.java | 5 + .../{dto => entity}/DataVersionFile.java | 21 +- .../datasetutil/domain/entity/Dataset.java | 4 + .../domain/entity/DatasetVersion.java | 53 + .../datasetutil/domain/entity/LogInfo.java | 2 +- .../handle/CustomDatasetImportHandle.java | 171 ++++ .../handle/DatasetImageUploadHandle.java | 287 +++++- .../handle/DatasetImportHandle.java | 419 ++++++-- .../handle/PresetDatasetImportHandle.java | 938 ++++++++++++++++++ .../service/DataFileAnnotationService.java | 43 + .../datasetutil/service/DataFileService.java | 13 + .../datasetutil/service/DataLabelService.java | 18 + .../service/DataSequenceService.java | 11 +- .../service/DataVersionFileService.java | 17 +- .../service/DatasetDataLabelService.java | 7 + .../datasetutil/service/DatasetService.java | 49 + .../service/DatasetVersionService.java | 15 + .../impl/DataFileAnnotationServiceImpl.java | 53 + .../service/impl/DataFileServiceImpl.java | 30 + .../service/impl/DataLabelServiceImpl.java | 37 +- .../service/impl/DataSequenceServiceImpl.java | 26 +- .../impl/DataVersionFileServiceImpl.java | 39 +- .../impl/DatasetDataLabelServiceImpl.java | 13 + .../service/impl/DatasetServiceImpl.java | 70 ++ .../impl/DatasetVersionServiceImpl.java | 30 + .../src/main/resources/application-dev.yml | 32 - .../src/main/resources/application-prod.yml | 41 + .../src/main/resources/application-test.yml | 32 - .../src/main/resources/application.yml | 65 +- ...spring-dev.xml => logback-spring-prod.xml} | 0 .../main/resources/logback-spring-test.xml | 248 ----- .../mapper/DataFileAnnotationMapper.xml | 13 + .../main/resources/mapper/DataFileMapper.xml | 4 +- .../main/resources/mapper/DataLabelMapper.xml | 14 + .../mapper/DataVersionFileMapper.xml | 6 +- 72 files changed, 3943 insertions(+), 750 deletions(-) delete mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/common/aspect/LogAspect.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/common/config/ImageConfig.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/common/constant/AnnotateTypeEnum.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/common/constant/DatatypeEnum.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/common/enums/DatatypeEnum.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/common/enums/PresetDatasetEnum.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/common/util/EsConfiguration.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/common/util/FileUtil.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/common/util/IOUtil.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/common/util/ProcessBarUtil.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataFileAnnotationMapper.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/dao/DatasetVersionMapper.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/AnnotationDTO.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/EsTransportDTO.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/FileAnnotationDTO.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataFileAnnotation.java rename dataset-util/src/main/java/org/dubhe/datasetutil/domain/{dto => entity}/DataVersionFile.java (84%) create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DatasetVersion.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/handle/CustomDatasetImportHandle.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/handle/PresetDatasetImportHandle.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/service/DataFileAnnotationService.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/service/DatasetVersionService.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataFileAnnotationServiceImpl.java create mode 100644 dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DatasetVersionServiceImpl.java delete mode 100644 dataset-util/src/main/resources/application-dev.yml create mode 100644 dataset-util/src/main/resources/application-prod.yml delete mode 100644 dataset-util/src/main/resources/application-test.yml rename dataset-util/src/main/resources/{logback-spring-dev.xml => logback-spring-prod.xml} (100%) delete mode 100644 dataset-util/src/main/resources/logback-spring-test.xml create mode 100644 dataset-util/src/main/resources/mapper/DataFileAnnotationMapper.xml diff --git a/dataset-util/.gitignore b/dataset-util/.gitignore index df214c3..7cce71f 100644 --- a/dataset-util/.gitignore +++ b/dataset-util/.gitignore @@ -4,3 +4,12 @@ logs HELP.md target/ + +# vscode +.classpath +.settings/org.eclipse.core.resources.prefs +.settings/org.eclipse.m2e.core.prefs +.settings/org.eclipse.jdt.apt.core.prefs +.settings/org.eclipse.jdt.core.prefs +.project +.factorypath diff --git a/dataset-util/README.md b/dataset-util/README.md index 124d4d9..1e4c283 100644 --- a/dataset-util/README.md +++ b/dataset-util/README.md @@ -1,50 +1,58 @@ -# 之江天枢-数据集导入脚本 +# 之江天枢 - 数据集导入脚本 -**之江天枢一站式人工智能开源平台**(简称:**之江天枢**),为了实现其他平台已标注完成的数据集在「一站式开发平台」上进行开发,我们增加了数据集导入的功能,实现对数据集的全流程功能操作。 +**之江天枢一站式人工智能开源平台**(简称:**之江天枢**),为了实现其他平台已标注完成的数据集在「一站式开发平台」上进行开发,我们增加了数据集导入功能,用来导入本地已存在的数据集文件。 -## 源码部署 +## 环境依赖 安装如下软件环境。 - OpenJDK:1.8+ ## 下载脚本 -- 数据集模板:http://tianshu.org.cn/static/upload/file/dubhe-dataset-template.zip -- 上传数据集脚本:http://tianshu.org.cn/static/upload/file/upload_dataset.zip +- 数据集导入模板:http://tianshu.org.cn/static/upload/file/dubhe-dataset-template.zip +- 数据集导入脚本:http://tianshu.org.cn/static/upload/file/upload_dataset.zip -## 脚本使用说明: +## 创建数据集: -- 登录天枢系统深度学习平台,在数据管理菜单下的数据集管理中创建数据集。获取数据集ID -- 需要自行准备图片文件、标注文件、标签文件 +- 首先需要参考[部署文档](http://docs.dubhe.ai/docs/setup/deploy-guide)成功部署「一站式平台」 +- 准备好本地待导入数据集文件,包括图片、标注和标签文件,文件格式参考 [目录说明](http://docs.dubhe.ai/docs/module/dataset/import-dataset#%E7%9B%AE%E5%BD%95%E8%AF%B4%E6%98%8E) +- 登录天枢深度学习平台,在「数据管理」模块下创建数据集,[使用文档](http://docs.dubhe.ai/docs/module/dataset/create-dataset) ## 运行脚本: -1.解压下载的zip文件,需要自行配置数据源、MinIO相关配置 +1.下载导入脚本压缩包(upload_dataset),解压之后, `application-{env}` 为脚本配置文件,默认 `env` 环境为 `dev`,需要自行配置数据源、MinIO 相关配置。 -2.运行脚本Windows 运行 run.bat; macOS/Linux 系统运行 run.sh +2.运行脚本,Windows 下执行 `run.bat`; macOS/Linux 系统运行 run.sh。 -注:可自行配置'application-{env}.xml'文件,执行命令后面添加 'run.bat {env}'即可执行对应的application-{env}.xml;然后按提示进行操作 +3. 根据不同环境需求,可自行配置 `application-{env}.yml`文件。` +run.bat {env}`即可执行对应的 `application-{env}.yml` 配置文件,注意在运行脚本前需要保证配置文件已存在。 -3.输入数据集ID +3.根据提示输入数据集 ID。 + +4.根据提示输入待导入数据集绝对路径。 -4.输入待导入数据集绝对路径 +5. 导入成功。 ## 目录结构: -``` -标签文件: label_{name}.json ({name} 代表标签组名,可自行定义; 只读标签组文件夹下的第一个标签组文件,标签文件仅支持:.json 支持大小写;文件内容为JSON字符串) -图片文件目录: origin (图片文件需要有后缀名,支持四种格式:.jpg,.png,.bmp,.jpeg 支持大小写) -标注文件目录: annotation (标注文件需要有后缀名,仅支持格式:.json 支持大小写; 文件内容为JSON字符串) -``` +[目录说明](http://docs.dubhe.ai/img/data/import-data9.png) + +- 图片目录:origin (图片支持四种格式:.jpg,.png,.bmp,.jpeg) +- 标注目录:annotation (标注文件仅支持 .json 格式) +- 标签文件:文件格式为 `label_{name}.json`,其中 `name` 为「标签组」名称,且不能与已有标签组名称重复 + ## 文件格式 -- 标签文件内容样例: +### 标签文件: + +> 格式如下: ``` name: 名称 color: 颜色(16进制编码) -``` +``` + 详细示例: ``` [{ @@ -61,23 +69,39 @@ }] ``` -- 标注文件内容样例: +### 标注文件: + +1. 图片分类 + +> 格式如下: ``` - name: 名称 + name: 对应标签名称 + score:置信分数(0-1) +``` +详细示例: +``` +[{"name":"wheaten_terrier","score":1}] +``` + +2. 目标检测 + +> 格式如下: +``` + name: 对应标签名称 bbox: 标注位置 - score:分数 + score:置信分数(0-1) ``` 详细示例: ``` [{ - "name": "行人", - "bbox": [321.6755762696266, 171.32076993584633, 185.67924201488495, 145.02639323472977], - "score": 0.6922634840011597 + "name": "行人", + "bbox": [321.6755762696266, 171.32076993584633, 185.67924201488495, 145.02639323472977], + "score": 0.6922634840011597 }, { - "name": "自行车", - "bbox": [40.88740050792694, 22.707078605890274, 451.21362805366516, 326.0102793574333], - "score": 0.6069411635398865 + "name": "自行车", + "bbox": [40.88740050792694, 22.707078605890274, 451.21362805366516, 326.0102793574333], + "score": 0.6069411635398865 }] ``` diff --git a/dataset-util/pom.xml b/dataset-util/pom.xml index 171c082..896397d 100644 --- a/dataset-util/pom.xml +++ b/dataset-util/pom.xml @@ -4,7 +4,7 @@ 4.0.0 org.dubhe dataset-util - 0.0.1-SNAPSHOT + 0.1.0-SNAPSHOT dataset-util 数据处理模块工具 @@ -48,19 +48,49 @@ io.minio minio - 7.0.2 + 8.2.1 + + + stax + stax-api + + + stax + stax + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + - - com.xiaoleilu - hutool-all - 3.0.1 - - org.apache.shardingsphere sharding-jdbc-spring-boot-starter - 4.0.0-RC1 + 4.1.1 + + + org.apache.shardingsphere + shardingsphere-sql-parser-oracle + + + org.apache.shardingsphere + shardingsphere-sql-parser-postgresql + + + org.apache.shardingsphere + shardingsphere-sql-parser-sqlserver + + @@ -74,12 +104,6 @@ 1.3.2 - - org.aspectj - aspectjweaver - 1.8.9 - - org.bgee.log4jdbc-log4j2 @@ -90,10 +114,29 @@ cn.hutool - hutool-all + hutool-core 5.0.6 + + com.twelvemonkeys.imageio + imageio-jpeg + 3.4.1 + + + me.tongfei + progressbar + 0.9.1 + + + org.springframework.boot + spring-boot-starter-aop + + + + org.elasticsearch.client + transport + @@ -104,6 +147,12 @@ ${spring-boot.version} pom import + + + com.zaxxer + HikariCP + + diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/DatasetUtilApplication.java b/dataset-util/src/main/java/org/dubhe/datasetutil/DatasetUtilApplication.java index 958b170..9cbbb4c 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/DatasetUtilApplication.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/DatasetUtilApplication.java @@ -17,15 +17,19 @@ package org.dubhe.datasetutil; import lombok.extern.slf4j.Slf4j; +import org.dubhe.datasetutil.common.util.PrintUtils; import org.dubhe.datasetutil.common.util.SpringContextHolder; +import org.dubhe.datasetutil.handle.CustomDatasetImportHandle; import org.dubhe.datasetutil.handle.DatasetImageUploadHandle; import org.dubhe.datasetutil.handle.DatasetImportHandle; -import org.dubhe.datasetutil.common.util.PrintUtils; +import org.dubhe.datasetutil.handle.PresetDatasetImportHandle; import org.mybatis.spring.annotation.MapperScan; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.context.ApplicationContext; +import java.time.Duration; +import java.time.LocalDateTime; import java.util.Scanner; /** @@ -43,7 +47,7 @@ public class DatasetUtilApplication { * @param args 入参 */ public static void main(String[] args) { - ApplicationContext applicationContext = SpringApplication.run(DatasetUtilApplication.class, args); + ApplicationContext applicationContext = SpringApplication.run(org.dubhe.datasetutil.DatasetUtilApplication.class, args); SpringContextHolder springContextHolder = new SpringContextHolder(); springContextHolder.setApplicationContext(applicationContext); execute(applicationContext); @@ -57,18 +61,19 @@ public class DatasetUtilApplication { public static void execute(ApplicationContext applicationContext) { while (true) { Scanner scanner = new Scanner(System.in); - log.warn("###################请输入需要执行的任务#############"); - log.warn("# 输入1.执行上传图片 "); - log.warn("# 输入2.执行导入数据集 "); - log.warn("# 输入命令 :exit 退出 "); - log.warn("################################################"); + System.out.println(" "); + System.out.println("###请输入需要执行的任务###"); + System.out.println("# 输入1:上传文件 "); + System.out.println("# 输入2:导入数据集 "); + System.out.println("# 输入exit:退出 "); + System.out.println("##########################"); String a = scanner.nextLine(); switch (a) { case "1": uploadDatasetImage(scanner, applicationContext); break; case "2": - importDataset(scanner, applicationContext); + executeImportDataset(applicationContext); break; case "exit": default: @@ -78,6 +83,47 @@ public class DatasetUtilApplication { } } + public static void executeImportDataset(ApplicationContext applicationContext) { + Boolean importFlag = true; + while (importFlag) { + Scanner scanner = new Scanner(System.in); + System.out.println(" "); + System.out.println("###请输入导入数据集类型###"); + System.out.println("# 输入1: 导入普通数据集 "); + System.out.println("# 输入2: 导入预置数据集 "); + System.out.println("# 输入3: 导入自定义数据集 "); + System.out.println("# 输入命令:exit 返回 "); + System.out.println("##########################"); + + switch (scanner.nextLine()) { + case "1": + importDataset(scanner, applicationContext); + break; + case "2": + importPresetDataset(scanner, applicationContext); + break; + case "3": + importCustomDataset(scanner, applicationContext); + break; + case "exit": + default: + importFlag = false; + break; + } + } + } + + /** + * 导入预置数据集 + * + * @param scanner 输入控制台 + * @param applicationContext 请求上下文 + */ + private static void importPresetDataset(Scanner scanner, ApplicationContext applicationContext) { + PresetDatasetImportHandle datasetImportHandle = (PresetDatasetImportHandle) applicationContext.getBean("presetDatasetImportHandle"); + datasetImportHandle.importPresetDataset(scanner); + } + /** * 导入图片 * @@ -85,17 +131,13 @@ public class DatasetUtilApplication { * @param applicationContext 请求上下文 */ public static void uploadDatasetImage(Scanner scanner, ApplicationContext applicationContext) { - log.warn("# 请输入数据集ID #"); - String datasetIdStr = scanner.nextLine(); - Long datasetId = Long.parseLong(datasetIdStr); - log.warn("# 请输入要上传的图片地址 #"); - String filePath = scanner.nextLine(); DatasetImageUploadHandle datasetImageUploadHandle = (DatasetImageUploadHandle) applicationContext.getBean("datasetImageUploadHandle"); try { - datasetImageUploadHandle.execute(filePath, datasetId); + datasetImageUploadHandle.importPicture(scanner); } catch (Exception e) { - log.error("", e); - log.error("# 数据集上传失败,请重新尝试....."); + log.error(""); + PrintUtils.printLine(" Error:" + e.getMessage(), PrintUtils.RED); + log.error(""); } } @@ -116,4 +158,28 @@ public class DatasetUtilApplication { } } + /** + * 导入之定义数据集 + * + * @param scanner 输入控制台 + * @param applicationContext 请求上下文 + */ + public static void importCustomDataset(Scanner scanner, ApplicationContext applicationContext) { + System.out.println(" "); + System.out.println("# 请输入数据集ID #"); + String datasetIdStr = scanner.nextLine(); + Long datasetId = Long.parseLong(datasetIdStr); + System.out.println(" "); + System.out.println("# 请输入待上传本地文件的绝对路径 #"); + String filePath = scanner.nextLine(); + CustomDatasetImportHandle customDatasetImportHandle = (CustomDatasetImportHandle) applicationContext.getBean("customDatasetImportHandle"); + try { + customDatasetImportHandle.execute(new Object[]{datasetId, filePath}); + } catch (Exception e) { + log.error(""); + PrintUtils.printLine(" Error:" + e.getMessage(), PrintUtils.RED); + log.error(""); + } + } + } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/aspect/LogAspect.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/aspect/LogAspect.java deleted file mode 100644 index 60a3acc..0000000 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/common/aspect/LogAspect.java +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Copyright 2020 Zhejiang Lab. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.dubhe.datasetutil.common.aspect; - -import lombok.extern.slf4j.Slf4j; -import org.aspectj.lang.JoinPoint; -import org.aspectj.lang.ProceedingJoinPoint; -import org.aspectj.lang.annotation.Around; -import org.aspectj.lang.annotation.Aspect; -import org.aspectj.lang.annotation.Pointcut; -import org.dubhe.datasetutil.common.enums.LogEnum; -import org.dubhe.datasetutil.common.util.LogUtil; -import org.slf4j.MDC; -import org.springframework.stereotype.Component; -import org.springframework.util.StringUtils; - -import java.util.UUID; - -/** - * @description 日志切面 - * @date 2020-04-10 - */ -@Component -@Aspect -@Slf4j -public class LogAspect { - - public static final String TRACE_ID = "traceId"; - - @Pointcut("execution(* org.dubhe..service..*.*(..))) ") - public void serviceAspect() { - } - - @Pointcut(" serviceAspect() ") - public void aroundAspect() { - } - - @Around("aroundAspect()") - public Object around(JoinPoint joinPoint) throws Throwable { - if (StringUtils.isEmpty(MDC.get(TRACE_ID))) { - MDC.put(TRACE_ID, UUID.randomUUID().toString()); - } - return combineLogInfo(joinPoint); - } - - /** - * 根据连接点返回结果 - * - * @param joinPoint 连接点 - * @return Object 返回结果 - */ - private Object combineLogInfo(JoinPoint joinPoint) throws Throwable { - Object[] param = joinPoint.getArgs(); - LogUtil.info(LogEnum.REST_REQ, "uri:{},input:{},==>begin", joinPoint.getSignature(), param); - long start = System.currentTimeMillis(); - Object result = ((ProceedingJoinPoint) joinPoint).proceed(); - long end = System.currentTimeMillis(); - LogUtil.info(LogEnum.REST_REQ, "uri:{},output:{},proc_time:{},<==end", joinPoint.getSignature().toString(), - result, end - start); - return result; - } - -} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/base/MagicNumConstant.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/base/MagicNumConstant.java index 01472cb..dee8aa9 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/common/base/MagicNumConstant.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/base/MagicNumConstant.java @@ -47,6 +47,7 @@ public final class MagicNumConstant { public static final int ONE_HUNDRED = 100; public static final int ONE_HUNDRED_TWENTY_EIGHT = 128; public static final int TWO_HUNDRED = 200; + public static final int FOUR_HUNDRED = 400; public static final int FIVE_HUNDRED = 500; public static final int FIVE_HUNDRED_AND_SIXTEEN = 516; public static final int ONE_THOUSAND = 1000; @@ -89,6 +90,7 @@ public final class MagicNumConstant { public static final long TWELVE_LONG = 12L; public static final long SIXTY_LONG = 60L; public static final long FIFTY_LONG = 50L; + public static final long HUNDRED_LONG = 100L; public static final long THOUSAND_LONG = 1000L; public static final long TEN_THOUSAND_LONG = 10000L; public static final long ONE_ZERO_ONE_ZERO_ONE_ZERO_LONG = 101010L; diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/config/ImageConfig.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/config/ImageConfig.java new file mode 100644 index 0000000..d4cfd63 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/config/ImageConfig.java @@ -0,0 +1,63 @@ +/** + * Copyright 2020 Zhejiang Lab. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.datasetutil.common.config; + +import lombok.Data; +import org.dubhe.datasetutil.common.constant.BusinessConstant; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.stereotype.Component; + +import java.util.Arrays; +import java.util.List; + +/** + * @description 图片格式配置文件 + * @date 2020-10-29 + */ +@Data +@Component +@ConfigurationProperties(prefix = "suffix") +public class ImageConfig { + /** + * 图片格式字符串 + */ + private String imageFormat; + + /** + * 文本格式字符串 + */ + private String txtFormat; + + /** + * 构建图片格式集合 + * + * @return List + */ + public List buildImageFormat() { + return Arrays.asList(imageFormat.split(BusinessConstant.COMMA)); + } + + /** + * 构建文本格式集合 + * + * @return List + */ + public List buildTxtFormat() { + return Arrays.asList(txtFormat.split(BusinessConstant.COMMA)); + } + +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/config/MinioConfig.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/config/MinioConfig.java index ac938ad..00a629b 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/common/config/MinioConfig.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/config/MinioConfig.java @@ -17,8 +17,6 @@ package org.dubhe.datasetutil.common.config; import io.minio.MinioClient; -import io.minio.errors.InvalidEndpointException; -import io.minio.errors.InvalidPortException; import lombok.Data; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.context.annotation.Bean; @@ -45,14 +43,20 @@ public class MinioConfig { private String bucketName; + private String nfsRootPath; + + private String serverUserName; + + private double blockingCoefficient; + /** * 获取Minio客户端信息 * * @return Minio客户端信息 */ @Bean - public MinioClient getMinioClient() throws InvalidEndpointException, InvalidPortException { - return new MinioClient(endpoint, port, accessKey, secretKey,secure); + public MinioClient getMinioClient() { + return MinioClient.builder().endpoint("http://" + endpoint + ":" + port).credentials(accessKey, secretKey).build(); } } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/constant/AnnotateTypeEnum.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/constant/AnnotateTypeEnum.java new file mode 100644 index 0000000..ee9118e --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/constant/AnnotateTypeEnum.java @@ -0,0 +1,93 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.datasetutil.common.constant; + +import lombok.Getter; + +/** + * @description 标注类型枚举类 + * @date 2020-05-21 + */ +@Getter +public enum AnnotateTypeEnum { + + /** + * 图像分类 + */ + CLASSIFICATION(2, "图像分类"), + /** + * 目标检测 + */ + OBJECT_DETECTION(1, "目标检测"), + /** + * 目标跟踪 + */ + OBJECT_TRACK(5, "目标跟踪"), + /** + * 语义分割 + */ + SEMANTIC_CUP(7, "语义分割"), + /** + * 文本分类 + */ + TEXT_CLASSIFICATION(6, "文本分类"), + /** + * 自定义导入 + */ + AUTO_IMPORT(100, "自定义导入"); + + + AnnotateTypeEnum(Integer value, String msg) { + this.value = value; + this.msg = msg; + } + + private Integer value; + private String msg; + + /** + * 标注类型校验 用户web端接口调用时参数校验 + * + * @param value 标注类型Integer值 + * @return 参数校验结果 + */ + public static boolean isValid(Integer value) { + for (AnnotateTypeEnum annotateTypeEnum : AnnotateTypeEnum.values()) { + if (annotateTypeEnum.value.equals(value)) { + return true; + } + } + return false; + } + + /** + * 根据标注类型获取类型code值 + * + * @param annotate 标注类型 + * @return 类型code值 + */ + public static Integer getConvertAnnotateType(String annotate) { + for (AnnotateTypeEnum annotateTypeEnum : AnnotateTypeEnum.values()) { + if (annotateTypeEnum.msg.equals(annotate)) { + return annotateTypeEnum.value; + } + } + return null; + } + +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/constant/BusinessConstant.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/constant/BusinessConstant.java index 9c4d1d9..e6952fa 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/common/constant/BusinessConstant.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/constant/BusinessConstant.java @@ -35,6 +35,12 @@ public class BusinessConstant { * 分表业务编码 - 文件版本关系表 */ public static final String DATA_VERSION_FILE = "DATA_VERSION_FILE"; + + /** + * 分表业务编码 - 文件版本标签关系表 + */ + public static final String DATA_FILE_ANNOTATION = "DATA_FILE_ANNOTATION"; + /** * 图片文件路径 */ @@ -43,6 +49,26 @@ public class BusinessConstant { * 标注文件路径 */ public static final String ANNOTATION = "annotation"; + /** + * 版本文件路径 + */ + public static final String VERSION_FILE = "versionFile"; + /** + * 视频文件路径 + */ + public static final String VIDEO = "video"; + /** + * 版本文件V0001路径 + */ + public static final String V0001 = "V0001"; + /** + * 版本文件ofrecord路径 + */ + public static final String OFRECORD = "ofrecord"; + /** + * 版本文件train路径 + */ + public static final String TRAIN = "train"; /** * 标签文件路径 */ @@ -55,11 +81,20 @@ public class BusinessConstant { * 后缀. */ public static final String SPOT = "."; + /** + * 逗号, + */ + public static final String COMMA = ","; /** * JSON后缀名 */ public static final String SUFFIX_JSON = ".JSON"; + + /** + * SQL后缀名 + */ + public static final String SUFFIX_SQL = ".sql"; /** * minio根目录 */ @@ -83,4 +118,31 @@ public class BusinessConstant { */ public static final String Y = "Y"; + public static final String DEFAULT_VERSION = "V0001"; + + /** + * 版本文件表 + */ + public static final String DATA_DATASET_VERSION_FILE = "data_dataset_version_file"; + /** + * 数据集文件表 + */ + public static final String DATASET_FILE = "data_file"; + + /** + * 文本摘要 + */ + public static final String ABSTRACT = "abstract_"; + + /** + * 表后缀 + */ + public static final String TABLE_SUFFIX = "_1"; + + /** + * 删除服务器无效文件(大文件) + * 示例:rsync --delete-before -d /空目录 /需要回收的源目录 + */ + public static final String DEL_COMMAND = "ssh %s@%s \"mkdir -p %s; rsync --delete-before -d %s %s; rmdir %s %s\""; + } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/constant/DatatypeEnum.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/constant/DatatypeEnum.java new file mode 100644 index 0000000..4e3dec5 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/constant/DatatypeEnum.java @@ -0,0 +1,88 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.datasetutil.common.constant; + +import lombok.Getter; + +/** + * @description 数据类型 + * @date 2020-05-21 + */ +@Getter +public enum DatatypeEnum { + + /** + * 图片 + */ + IMAGE(0, "图片"), + /** + * 视频 + */ + VIDEO(1, "视频"), + /** + * 文本 + */ + TEXT(2, "文本"), + /** + * 自定义导入 + */ + AUTO_IMPORT(100, "自定义导入"); + + DatatypeEnum(Integer value, String msg) { + this.value = value; + this.msg = msg; + } + + private Integer value; + private String msg; + + /** + * 数据类型校验 用户web端接口调用时参数校验 + * + * @param value 数据类型 + * @return 参数校验结果 + */ + public static boolean isValid(Integer value) { + for (DatatypeEnum datatypeEnum : DatatypeEnum.values()) { + if (datatypeEnum.value.equals(value)) { + return true; + } + } + return false; + } + + /** + * 获取数据类型枚举 + * + * @param value 获取数据类型枚举值 + * @return 数据类型枚举 + */ + public static DatatypeEnum getEnumValue(Integer value) { + switch (value) { + case 0: + return IMAGE; + case 1: + return VIDEO; + case 2: + return TEXT; + default: + return IMAGE; + } + } + +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/enums/DatatypeEnum.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/enums/DatatypeEnum.java new file mode 100644 index 0000000..6590412 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/enums/DatatypeEnum.java @@ -0,0 +1,58 @@ +/** + * Copyright 2020 Zhejiang Lab. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.datasetutil.common.enums; + +import lombok.Getter; + +/** + * @description 数据类型 + * @date 2020-11-23 + */ +@Getter +public enum DatatypeEnum { + + /** + * 图片 + */ + IMAGE(0, "图片"), + /** + * 视频 + */ + VIDEO(1, "视频"), + /** + * 文本 + */ + TXT(2, "文本"); + + DatatypeEnum(Integer value, String msg) { + this.value = value; + this.msg = msg; + } + + /** + * 数据类型 + */ + private Integer value; + + /** + * 数据描述 + */ + private String msg; + + +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/enums/PresetDatasetEnum.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/enums/PresetDatasetEnum.java new file mode 100644 index 0000000..b0a1c5a --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/enums/PresetDatasetEnum.java @@ -0,0 +1,77 @@ +/** + * Copyright 2020 Zhejiang Lab. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.datasetutil.common.enums; + +import lombok.Getter; +import lombok.ToString; + +/** + * @Description 预置数据集类型枚举 + * @Date 2020-11-03 + */ +@ToString +@Getter +public enum PresetDatasetEnum { + /** + * COCO2017-val + */ + COCO2017Val("1", "COCO2017-val"), + + /** + * Caltech-256 + */ + Caltech256("2", "Caltech-256"), + + /** + * COCO2017-train + */ + COCO2017Train("3", "COCO2017-train"), + + /** + * Object-Tracking + */ + ObjectTracking("4", "Object-Tracking"), + + /** + * Data-Augment + */ + DataAugment("5", "Data-Augment"), + + /** + * IMDB_DATASET + */ + ImdbDataset("101", "NLP_IMDB"), + + ; + + /** + * 预置数据集类型 + */ + private String type; + + /** + * 操作类型备注 + */ + private String desc; + + PresetDatasetEnum(String type, String desc) { + this.type = type; + this.desc = desc; + } + +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/EsConfiguration.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/EsConfiguration.java new file mode 100644 index 0000000..cc64fe5 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/EsConfiguration.java @@ -0,0 +1,91 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.datasetutil.common.util; + +import org.elasticsearch.action.bulk.BackoffPolicy; +import org.elasticsearch.action.bulk.BulkProcessor; +import org.elasticsearch.action.bulk.BulkRequest; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.client.Client; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.common.unit.ByteSizeUnit; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.transport.client.PreBuiltTransportClient; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import java.net.InetAddress; +import java.net.UnknownHostException; + +/** + * @description ES批量同步数据配置 + * @date 2021-03-24 + */ +@Configuration +public class EsConfiguration { + + /** + * es服务地址 + */ + @Value("${es.host}") + private String esServerHost; + + /** + * es同步端口 + */ + @Value("${es.transportPort}") + private String estransportPort; + + /** + * 集群名称 + */ + @Value("${es.clusterName}") + private String clusterName; + + @Bean(name = "bulkProcessor") + public BulkProcessor bulkProcessor() throws UnknownHostException { + Settings settings = Settings.builder().put("cluster.name", clusterName).build(); + Client client = new PreBuiltTransportClient(settings) + .addTransportAddress(new TransportAddress(InetAddress.getByName(esServerHost), Integer.parseInt(estransportPort))); + return BulkProcessor.builder(client, new BulkProcessor.Listener() { + @Override + public void beforeBulk(long l, BulkRequest bulkRequest) { + + } + + @Override + public void afterBulk(long l, BulkRequest bulkRequest, BulkResponse bulkResponse) { + + } + + @Override + public void afterBulk(long l, BulkRequest bulkRequest, Throwable throwable) { + + } + + }).setBulkActions(1000) + .setBulkSize(new ByteSizeValue(5, ByteSizeUnit.MB)) + .setFlushInterval(TimeValue.timeValueSeconds(5)) + .setConcurrentRequests(1) + .setBackoffPolicy(BackoffPolicy.exponentialBackoff(TimeValue.timeValueMillis(100), 3)) + .build(); + } +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/FileUtil.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/FileUtil.java new file mode 100644 index 0000000..f57f469 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/FileUtil.java @@ -0,0 +1,43 @@ +package org.dubhe.datasetutil.common.util; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +/** + * @description 文件处理工具 + * @date 2021-03-23 + */ +public class FileUtil { + + /** + * 遍历文件 + * + * @param path 文件路径 + */ + public static List traverseFolder(String path) { + List filePaths = new ArrayList<>(); + // 实例化file对象,指明要操作的文件路径 + File file = new File(path); + // 判断是否有文件 + if (file.exists()) { + // 获取该目录下的所有文件或者文件目录的File数组 + File[] files = file.listFiles(); + // 判断文件是否为空 + if (files != null && files.length > 0) { + // 利用foreach 进行循环遍历 + for (File f : files) { + // 判断是文件还是文件夹 + if (f.isDirectory()) { + // 递归调用 + filePaths.addAll(traverseFolder(f.getPath())); + } else { + filePaths.add(f.getAbsolutePath()); + } + } + } + } + return filePaths; + } + +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/GeneratorKeyUtil.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/GeneratorKeyUtil.java index 5aadb7f..5adebc7 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/GeneratorKeyUtil.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/GeneratorKeyUtil.java @@ -18,16 +18,14 @@ package org.dubhe.datasetutil.common.util; import cn.hutool.core.util.ObjectUtil; import org.dubhe.datasetutil.common.base.MagicNumConstant; -import org.dubhe.datasetutil.common.enums.LogEnum; import org.dubhe.datasetutil.common.exception.DataSequenceException; import org.dubhe.datasetutil.domain.dto.IdAlloc; import org.dubhe.datasetutil.domain.entity.DataSequence; import org.dubhe.datasetutil.service.DataSequenceService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; -import org.springframework.transaction.annotation.Transactional; import org.springframework.util.StringUtils; - +import java.util.Queue; import java.util.concurrent.ConcurrentHashMap; /** @@ -49,8 +47,7 @@ public class GeneratorKeyUtil { * @param number 数量 * @return Long 起始位置 */ - @Transactional(rollbackFor = Exception.class) - public synchronized Long getSequenceByBusinessCode(String businessCode, int number) { + public synchronized Queue getSequenceByBusinessCode(String businessCode, int number) { if (StringUtils.isEmpty(businessCode)) { throw new DataSequenceException("业务编码不可为空"); } @@ -63,56 +60,26 @@ public class GeneratorKeyUtil { idAllocConcurrentHashMap.put(businessCode, idAlloc); } - if (idAlloc.getUsedNumber() == MagicNumConstant.ZERO) { - DataSequence dataSequence = getDataSequence(businessCode); - updateDataSequence(businessCode); - idAlloc.setStartNumber(dataSequence.getStart()); - idAlloc.setEndNumber(dataSequence.getStart() + dataSequence.getStep() - MagicNumConstant.ONE); - idAlloc.setUsedNumber(idAlloc.getEndNumber() - idAlloc.getStartNumber() + MagicNumConstant.ONE); - } - if (idAlloc.getUsedNumber() <= number) { + if (idAlloc.getUnUsed() < number) { + //执行扩容操作 expansionUsedNumber(businessCode, number); } - long returnStartNumber = idAlloc.getStartNumber(); - idAlloc.setStartNumber(idAlloc.getStartNumber() + number); - idAlloc.setUsedNumber(idAlloc.getUsedNumber() - number); - return returnStartNumber; - } - - /** - * 根据业务编码获取配置信息 - * @param businessCode 业务编码 - * @return DataSequence 数据索引 - */ - private DataSequence getDataSequence(String businessCode) { - DataSequence dataSequence = dataSequenceService.getSequence(businessCode); - if (dataSequence == null || dataSequence.getStart() == null || dataSequence.getStep() == null) { - throw new DataSequenceException("配置出错,请检查data_sequence表配置"); - } - return dataSequence; + //获取ids + return idAlloc.poll(number); } /** - * 根据业务编码更新起始值 - * @param businessCode 业务编码 - */ - private void updateDataSequence(String businessCode) { - dataSequenceService.updateSequenceStart(businessCode); - } - - /** - * 多次扩容 + * 扩容 * @param businessCode 业务编码 * @param number 数量 */ - private void expansionUsedNumber(String businessCode, int number) { + protected void expansionUsedNumber(String businessCode, int number) { IdAlloc idAlloc = idAllocConcurrentHashMap.get(businessCode); - updateDataSequence(businessCode); - DataSequence dataSequenceNew = getDataSequence(businessCode); - idAlloc.setEndNumber(idAlloc.getEndNumber() + dataSequenceNew.getStep()); - idAlloc.setUsedNumber(idAlloc.getEndNumber() - idAlloc.getStartNumber() + MagicNumConstant.ONE); - if (idAlloc.getUsedNumber() <= number) { + DataSequence dataSequenceNew = dataSequenceService.expansionUsedNumber(businessCode); + idAlloc.add(dataSequenceNew); + if(idAlloc.getUnUsed() < number) { expansionUsedNumber(businessCode, number); } } -} + +} \ No newline at end of file diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/HandleFileUtil.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/HandleFileUtil.java index baafdb9..9a41042 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/HandleFileUtil.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/HandleFileUtil.java @@ -19,9 +19,12 @@ package org.dubhe.datasetutil.common.util; import org.apache.commons.io.FileUtils; import org.apache.commons.io.LineIterator; import org.dubhe.datasetutil.common.constant.BusinessConstant; +import org.springframework.util.ObjectUtils; import java.io.File; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; /** * @description 文件工具类 @@ -45,6 +48,27 @@ public class HandleFileUtil { } + /** + * 读取文件内容 + * + * @param file 文件对象 + * @return Map> 文件内容 + */ + public static List readFileInfo(File file) throws IOException{ + + List datasetList = new ArrayList<>(); + LineIterator fileContext = FileUtils.lineIterator(file,"UTF-8"); + while (fileContext.hasNext()) { + String line = fileContext.nextLine(); + if(!ObjectUtils.isEmpty(line)){ + datasetList.add(line); + } + + } + return datasetList; + } + + /** * 获取文件名后缀名 * diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/IOUtil.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/IOUtil.java new file mode 100644 index 0000000..a4d63ba --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/IOUtil.java @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Zhejiang Lab. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.datasetutil.common.util; + +import org.dubhe.datasetutil.common.exception.ImportDatasetException; +import java.io.Closeable; +import java.io.IOException; + +/** + * @description IO流操作工具类 + * @date 2020-11-14 + */ +public class IOUtil { + + /** + * 循环的依次关闭流 + * + * @param closeableList 要被关闭的流集合 + */ + public static void close(Closeable... closeableList) { + for (Closeable closeable : closeableList) { + try { + if (closeable != null) { + closeable.close(); + } + } catch (IOException e) { + throw new ImportDatasetException(" 流关闭异常 "); + } + } + } +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/LogUtil.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/LogUtil.java index 1b429f9..c7e4e0f 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/LogUtil.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/LogUtil.java @@ -22,15 +22,12 @@ import com.alibaba.fastjson.JSON; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.exception.ExceptionUtils; -import org.dubhe.datasetutil.common.aspect.LogAspect; import org.dubhe.datasetutil.common.enums.LogEnum; import org.dubhe.datasetutil.domain.entity.LogInfo; import org.slf4j.MDC; import org.slf4j.MarkerFactory; import org.slf4j.helpers.MessageFormatter; - import java.util.Arrays; -import java.util.UUID; /** * @description 日志工具类 @@ -179,14 +176,9 @@ public class LogUtil { logType = LogEnum.SYS_ERR; } - // 获取trace_id - if (StringUtils.isEmpty(MDC.get(LogAspect.TRACE_ID))) { - MDC.put(LogAspect.TRACE_ID, UUID.randomUUID().toString()); - } // 设置logInfo的level,type,traceId属性 logInfo.setLevel(level.levelStr) - .setType(logType.toString()) - .setTraceId(MDC.get(LogAspect.TRACE_ID)); + .setType(logType.toString()); //自定义日志级别 diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/MinioUtil.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/MinioUtil.java index f5bb17e..6219c99 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/MinioUtil.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/MinioUtil.java @@ -16,14 +16,20 @@ */ package org.dubhe.datasetutil.common.util; -import io.minio.MinioClient; -import io.minio.PutObjectOptions; +import cn.hutool.core.io.FileUtil; +import cn.hutool.core.io.IoUtil; +import io.minio.*; +import org.dubhe.datasetutil.common.base.MagicNumConstant; import org.dubhe.datasetutil.common.config.MinioConfig; +import org.dubhe.datasetutil.common.constant.BusinessConstant; import org.dubhe.datasetutil.common.enums.LogEnum; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; +import java.io.File; +import java.io.IOException; import java.io.InputStream; +import java.nio.charset.Charset; /** * @description Minio工具类 @@ -41,24 +47,110 @@ public class MinioUtil { /** * 上传文件 * - * @param objectName 对象名称 - * @param inputStream 文件流 + * @param sourceFilePath 原文件绝对路径 + * @param targetFilePath 目标文件路径 + * @throws Exception 上传异常 + */ + public void upLoadFile(String sourceFilePath, String targetFilePath) throws Exception { + LogUtil.info(LogEnum.BIZ_DATASET, "源文件目录: 【" + sourceFilePath + "】" + " 目标目录: 【" + targetFilePath + "】"); + try { + ObjectWriteResponse objectWriteResponse = minioClient.uploadObject(UploadObjectArgs + .builder() + .bucket(minioConfig.getBucketName()) + .object(targetFilePath) + .filename(sourceFilePath) + .contentType(FileUtil.getName(sourceFilePath)) + .build() + ); + } catch (IOException e) { + LogUtil.error(LogEnum.BIZ_DATASET, "上传文件失败: {} ", e); + } + } + + + /** + * 上传文件 (文件流消费后直接关闭) + * + * @param targetFilePath 原文件绝对路径 * @throws Exception 上传异常 */ - public void upLoadFile(String objectName, InputStream inputStream) throws Exception { - LogUtil.info(LogEnum.BIZ_DATASET,"文件上传名称为: 【" + objectName + "】"); - PutObjectOptions options = new PutObjectOptions(inputStream.available(), -1); - minioClient.putObject(minioConfig.getBucketName(), objectName, inputStream, options); + public void upLoadFileByInputStream(String targetFilePath, String filePath) throws Exception { + try { + minioClient.uploadObject(UploadObjectArgs + .builder() + .bucket(minioConfig.getBucketName()) + .object(targetFilePath) + .filename(filePath) + .contentType( + contentType( + FileUtil.getName(filePath) + ) + ).build() + ); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_DATASET, "上传文件失败: {} ", e); + } } /** * 获取文件URL - * + * * @param objectName 对象名称 * @return String 文件路径 */ public String getUrl(String objectName) { - return minioConfig.getBucketName() + "/" + objectName; + return minioConfig.getBucketName() + BusinessConstant.FILE_SEPARATOR + objectName; + } + + /** + * 读取文件 + * + * @param bucketName 桶 + * @param fullFilePath 文件存储的全路径,包括文件名,非'/'开头. e.g. dataset/12/annotation/test.txt + * @return String + */ + public String readString(String bucketName, String fullFilePath) { + try (InputStream is = minioClient.getObject(GetObjectArgs + .builder() + .bucket(bucketName) + .object(fullFilePath) + .build() + )) { + return IoUtil.read(is, Charset.defaultCharset()); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_DATASET, "读取文本content失败: {} ", e); + return null; + } + } + + private String contentType(String fileName) { + if (fileName.endsWith("xml")) { + return "text/xml"; + } else if (fileName.endsWith("jpg") || fileName.endsWith("jpe") || fileName.endsWith("jpeg")) { + return "image/jpg"; + } else if (fileName.endsWith("png")) { + return "image/png"; + } else if (fileName.endsWith("pic")) { + return "image/pict"; + } else if (fileName.endsWith("avi")) { + return "video/x-msvideo"; + } else if (fileName.endsWith("mp4")) { + return "video/mp4"; + } else if (fileName.endsWith("ogg")) { + return "video/ogg"; + } else if (fileName.endsWith("webm")) { + return "video/webm"; + } else if (fileName.endsWith("HTML") || fileName.endsWith("html")) { + return "text/html"; + } else if (fileName.endsWith("DOCX") || fileName.endsWith("docx") || fileName.endsWith("DOC") + || fileName.endsWith("doc")) { + return "application/msword"; + } else if (fileName.endsWith("XML") || fileName.endsWith("xml")) { + return "text/xml"; + } else if (fileName.endsWith("pdf")) { + return "application/pdf"; + } + return "image/jpeg"; } } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/MyPreciseShardingAlgorithm.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/MyPreciseShardingAlgorithm.java index fb0c9e4..727246d 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/MyPreciseShardingAlgorithm.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/MyPreciseShardingAlgorithm.java @@ -16,6 +16,7 @@ */ package org.dubhe.datasetutil.common.util; +import lombok.extern.slf4j.Slf4j; import org.apache.shardingsphere.api.sharding.standard.PreciseShardingAlgorithm; import org.apache.shardingsphere.api.sharding.standard.PreciseShardingValue; import org.dubhe.datasetutil.common.base.MagicNumConstant; @@ -24,16 +25,21 @@ import org.dubhe.datasetutil.service.DataSequenceService; import org.springframework.beans.factory.annotation.Autowired; import java.util.Collection; +import java.util.HashSet; +import java.util.Set; /** * @description 数据分片 * @date 2020-09-21 */ -public class MyPreciseShardingAlgorithm implements PreciseShardingAlgorithm{ +@Slf4j +public class MyPreciseShardingAlgorithm implements PreciseShardingAlgorithm { @Autowired private DataSequenceService dataSequenceService; + private static Set tableNames = new HashSet<>(); + /** * 数据表分片 * @@ -45,10 +51,17 @@ public class MyPreciseShardingAlgorithm implements PreciseShardingAlgorithm collection, PreciseShardingValue preciseShardingValue) { long startIndex = MagicNumConstant.ONE; long endIndex = MagicNumConstant.FIFTY; - dataSequenceService = SpringContextHolder.getBean(DataSequenceService.class); - String tableName = preciseShardingValue.getLogicTableName()+ BusinessConstant.UNDERLINE + preciseSharding(preciseShardingValue.getValue(),startIndex ,endIndex); - if(!dataSequenceService.checkTableExist(tableName)){ - dataSequenceService.createTable(tableName); + String tableName = preciseShardingValue.getLogicTableName() + BusinessConstant.UNDERLINE + preciseSharding(preciseShardingValue.getValue(), startIndex, endIndex); + if (!tableNames.contains(tableName)) { + dataSequenceService = SpringContextHolder.getBean(DataSequenceService.class); + if (!dataSequenceService.checkTableExist(tableName)) { + try { + dataSequenceService.createTable(tableName); + } catch (Exception e) { + log.error("table name repeat {}", tableName); + } + } + tableNames.add(tableName); } return tableName; } @@ -61,11 +74,11 @@ public class MyPreciseShardingAlgorithm implements PreciseShardingAlgorithm endIndex){ + public long preciseSharding(long indexId, long startIndex, long endIndex) { + if (indexId > endIndex) { startIndex = startIndex + BusinessConstant.INTERVAL_NUMBER; endIndex = endIndex + BusinessConstant.INTERVAL_NUMBER; - return preciseSharding(indexId,startIndex,endIndex); + return preciseSharding(indexId, startIndex, endIndex); } return endIndex / BusinessConstant.INTERVAL_NUMBER; } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/ProcessBarUtil.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/ProcessBarUtil.java new file mode 100644 index 0000000..09cb944 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/ProcessBarUtil.java @@ -0,0 +1,32 @@ +package org.dubhe.datasetutil.common.util; + +import me.tongfei.progressbar.ProgressBar; + +/** + * @description 进度条工具类 + * @date 2021-03-23 + */ +public class ProcessBarUtil { + + public static ProgressBar pb = null; + + /** + * 初始化进度条工具 + * + * @param task + * @param maxValue + */ + public static void initProcess(String task, Long maxValue) { + pb = new ProgressBar(task, maxValue); + } + + /** + * 更新进度条 + * + * @param step + */ + public static void processBar01(Long step) { + pb.stepBy(step); + } + +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/ThreadUtils.java b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/ThreadUtils.java index 8e58cfc..a811d9c 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/ThreadUtils.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/common/util/ThreadUtils.java @@ -18,6 +18,9 @@ package org.dubhe.datasetutil.common.util; import lombok.extern.slf4j.Slf4j; +import org.dubhe.datasetutil.common.base.MagicNumConstant; +import org.dubhe.datasetutil.common.config.MinioConfig; +import org.springframework.util.CollectionUtils; import java.util.List; import java.util.concurrent.*; @@ -29,7 +32,8 @@ import java.util.concurrent.*; @Slf4j public class ThreadUtils { - private ThreadUtils(){} + private ThreadUtils() { + } /** * 根据需要处理的数量创建线程数 @@ -38,7 +42,7 @@ public class ThreadUtils { * @return int 数量 */ public static int createThread(int listSize) { - return listSize / getNeedThreadNumber() == 0 ? 1 : listSize / getNeedThreadNumber(); + return listSize / getNeedThreadNumber() == MagicNumConstant.ZERO ? MagicNumConstant.ONE : listSize / getNeedThreadNumber(); } @@ -49,24 +53,26 @@ public class ThreadUtils { */ public static int getNeedThreadNumber() { final int numOfCores = Runtime.getRuntime().availableProcessors(); - final double blockingCoefficient = 0.8; - return (int) (numOfCores / (1 - blockingCoefficient)); + MinioConfig minioConfig = (MinioConfig) SpringContextHolder.getBean("minioConfig"); + final double blockingCoefficient = minioConfig.getBlockingCoefficient(); + return (int) (numOfCores / (MagicNumConstant.ONE - blockingCoefficient)); } /** * 按要求分多线程执行 * - * @param partitions 分线程集合 + * @param partitions 分线程集合 * @throws Exception 线程执行异常 */ public static void runMultiThread(List> partitions) throws Exception { final ExecutorService executorService = Executors.newFixedThreadPool(ThreadUtils.getNeedThreadNumber()); final List> valueOfStocks = executorService.invokeAll(partitions); - Integer endCount = 0; + Integer endCount = MagicNumConstant.ZERO; for (final Future value : valueOfStocks) { endCount += value.get(); } - log.warn("#-------------处理结束,成功处理文件 【" + endCount + "】个-------------#"); + executorService.shutdown(); + Thread.sleep(1000); } } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataFileAnnotationMapper.java b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataFileAnnotationMapper.java new file mode 100644 index 0000000..c9a2a88 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataFileAnnotationMapper.java @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Zhejiang Lab. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.datasetutil.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.apache.ibatis.annotations.Delete; +import org.apache.ibatis.annotations.Param; +import org.dubhe.datasetutil.domain.entity.DataFileAnnotation; + +import java.util.List; + +/** + * @description nlp文件 服务实现类 + * @date 2021-01-07 + */ +public interface DataFileAnnotationMapper extends BaseMapper { + + /** + * 批量保存nlp中间表 + * + * @param dataFileAnnotations nlp集合 + */ + void saveDataFileAnnotation(@Param("dataFileAnnotations") List dataFileAnnotations); + + /** + * 删除数据集文件标注数据通过数据集ID + * + * @param datasetId 数据集ID + */ + @Delete("delete from data_file_annotation where dataset_id = #{datasetId}") + void delDataFileAnnotationById(long datasetId); +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataFileMapper.java b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataFileMapper.java index 0229e77..fface8f 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataFileMapper.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataFileMapper.java @@ -17,7 +17,7 @@ package org.dubhe.datasetutil.dao; import com.baomidou.mybatisplus.core.mapper.BaseMapper; -import org.apache.ibatis.annotations.Param; +import org.apache.ibatis.annotations.*; import org.dubhe.datasetutil.domain.entity.DataFile; import java.util.List; @@ -33,4 +33,35 @@ public interface DataFileMapper extends BaseMapper { * @param listDataFile 文件数据集合 */ void saveBatchDataFile(@Param("listDataFile") List listDataFile); + + + /** + * 创建新表 data_file_1 + */ + @Update("CREATE TABLE data_file_1 LIKE data_file") + void createNewTableOne(); + + + /** + * 创建新表 data_file_2 + */ + @Update("CREATE TABLE data_file_2 LIKE data_file") + void createNewTableTwo(); + + /** + * 根据表名获取表数量 + * + * @param tableName 表名称 + * @return 表数量 + */ + @Select("select count(*) from information_schema.TABLES where table_name = #{tableName}") + int selectCountByTableName(@Param("tableName") String tableName); + + /** + * 删除数据集文件通过数据集ID + * + * @param datasetId 数据集ID + */ + @Delete("delete from data_file where dataset_id = #{datasetId}") + void deleteFileByDatasetId(@Param("datasetId") long datasetId); } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataLabelMapper.java b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataLabelMapper.java index 1040584..2173cc9 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataLabelMapper.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataLabelMapper.java @@ -17,6 +17,7 @@ package org.dubhe.datasetutil.dao; import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.apache.ibatis.annotations.Delete; import org.apache.ibatis.annotations.Param; import org.dubhe.datasetutil.domain.entity.DataLabel; @@ -33,4 +34,24 @@ public interface DataLabelMapper extends BaseMapper { * @param listDataLabel 标签数据 */ void saveBatchDataLabel(@Param("listDataLabel") List listDataLabel); + + + + /** + * 根据预置标签组获取预置标签 + * + * @param groupIds 预置标签组IDS + * @return 预置标签 key: 预置标签名称 value:预置标签ID + */ + List getPresetLabelList(@Param("groupIds") List groupIds); + + /** + * 删除标签 + * + * @param datasetId + */ + @Delete("delete from data_label where id IN (\n" + + " select * from ( select label_id from data_dataset_label where dataset_id = #{datasetId}) t\n" + + ")") + void deleteLabelByDatasetId(@Param("datasetId") long datasetId); } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataSequenceMapper.java b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataSequenceMapper.java index abf5d00..fa02294 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataSequenceMapper.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataSequenceMapper.java @@ -34,7 +34,16 @@ public interface DataSequenceMapper extends BaseMapper { * @return DataSequence 根据业务编码得到的序列 */ @Select("select id, business_code ,start, step from data_sequence where business_code = #{businessCode}") - DataSequence selectByBusiness(String businessCode); + DataSequence selectByBusiness(@Param("businessCode") String businessCode); + + /** + * 根据ID查询 + * + * @param id 序列ID + * @return DataSequence 根据业务编码得到的序列 + */ + @Select("select id, business_code ,start, step from data_sequence where id = #{id} for update") + DataSequence selectDataSequenceById(@Param("id") Long id); /** * 根据业务编码更新序列起始值 @@ -43,7 +52,7 @@ public interface DataSequenceMapper extends BaseMapper { * @return DataSequence 根据业务编码更新序列起始值 */ @Update("update data_sequence set start = start + step where business_code = #{businessCode} ") - int updateStartByBusinessCode(String businessCode); + int updateStartByBusinessCode(@Param("businessCode") String businessCode); /** * 查询存在表的记录数 @@ -60,7 +69,7 @@ public interface DataSequenceMapper extends BaseMapper { * @param tableName 类型名称 * @param oldTableName 旧类型名称 */ - @Update({"CREATE TABLE ${tableName} AS select * from ${oldTableName} "}) + @Update({"CREATE TABLE ${tableName} like ${oldTableName}"}) void createNewTable(@Param("tableName") String tableName, @Param("oldTableName") String oldTableName); -} +} \ No newline at end of file diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataVersionFileMapper.java b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataVersionFileMapper.java index d8c107d..6c69301 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataVersionFileMapper.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DataVersionFileMapper.java @@ -17,8 +17,8 @@ package org.dubhe.datasetutil.dao; import com.baomidou.mybatisplus.core.mapper.BaseMapper; -import org.apache.ibatis.annotations.Param; -import org.dubhe.datasetutil.domain.dto.DataVersionFile; +import org.apache.ibatis.annotations.*; +import org.dubhe.datasetutil.domain.entity.DataVersionFile; import java.util.List; @@ -34,4 +34,44 @@ public interface DataVersionFileMapper extends BaseMapper { * @param listDataVersionFile 数据集文件中间表数据集合 */ void saveBatchDataFileVersion(@Param("listDataVersionFile") List listDataVersionFile); + + + /** + * 创建新表 data_dataset_version_file_1 + */ + @Update("create table data_dataset_version_file_1 like data_dataset_version_file") + void createNewTableOne(); + + + /** + * 创建新表 data_dataset_version_file_2 + */ + @Update("create table data_dataset_version_file_2 like data_dataset_version_file") + void createNewTableTwo(); + + + /** + * 根据表名获取表数量 + * + * @param tableName 表名称 + * @return 表数量 + */ + @Select("select count(*) from information_schema.TABLES where table_name = #{tableName}") + int selectCountByTableName(@Param("tableName") String tableName); + + /** + * 删除数据集版本文件通过数据集ID + * + * @param datasetId 数据集ID + */ + @Delete("delete from data_dataset_version_file where dataset_id = #{datasetId}") + void deleteVersionFileByDatasetId(@Param("datasetId") long datasetId); + + /** + * 删除数据集版本通过数据集ID + * + * @param datasetId 数据集ID + */ + @Delete("delete from data_dataset_version where dataset_id = #{datasetId}") + void deleteVersionByDatasetId(@Param("datasetId") long datasetId); } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DatasetDataLabelMapper.java b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DatasetDataLabelMapper.java index cff8aa6..5752655 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DatasetDataLabelMapper.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DatasetDataLabelMapper.java @@ -17,6 +17,7 @@ package org.dubhe.datasetutil.dao; import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.apache.ibatis.annotations.Delete; import org.apache.ibatis.annotations.Param; import org.dubhe.datasetutil.domain.entity.DatasetDataLabel; @@ -34,4 +35,12 @@ public interface DatasetDataLabelMapper extends BaseMapper { * @param datasetDataLabelList 数据集与标签 */ void saveBatchDatasetDataLabel(@Param("datasetDataLabelList") List datasetDataLabelList); + + /** + * 删除数据集标签关系通过数据集ID + * + * @param datasetId 数据集ID + */ + @Delete("delete from data_dataset_label where dataset_id = #{datasetId}") + void deleteDatasetLabelByDatasetId(@Param("datasetId") long datasetId); } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DatasetMapper.java b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DatasetMapper.java index f21cc5b..2bceff7 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DatasetMapper.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DatasetMapper.java @@ -17,6 +17,8 @@ package org.dubhe.datasetutil.dao; import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.apache.ibatis.annotations.Delete; +import org.apache.ibatis.annotations.Insert; import org.apache.ibatis.annotations.Param; import org.apache.ibatis.annotations.Select; import org.dubhe.datasetutil.domain.entity.Dataset; @@ -53,4 +55,30 @@ public interface DatasetMapper extends BaseMapper { */ @Select("select count(1) from data_file where dataset_id = #{datasetId}") int findDataFileById(@Param("datasetId") Long datasetId); + + /** + * 根据数据集ID查询数据集 + * + * @param datasetId 数据集id + * @return Dataset 根据数据集ID得到数据集 + */ + @Select("select * from data_dataset where id = #{datasetId}") + Dataset findDatasetByIdNormal(@Param("datasetId") Long datasetId); + + + /** + * 新增数据集 + * + * @param insertSql sql语句 + */ + @Insert("${insertSql}") + void saveBatch(@Param("insertSql") String insertSql); + + /** + * 删除数据集通过数据集ID + * + * @param datasetId 数据集ID + */ + @Delete("delete from data_dataset where id = #{datasetId}") + void deleteDatasetById(@Param("datasetId") long datasetId); } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DatasetVersionMapper.java b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DatasetVersionMapper.java new file mode 100644 index 0000000..171df6a --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/dao/DatasetVersionMapper.java @@ -0,0 +1,11 @@ +package org.dubhe.datasetutil.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import org.dubhe.datasetutil.domain.entity.DatasetVersion; + +/** + * @description TODO + * @date 2021-03-23 + */ +public interface DatasetVersionMapper extends BaseMapper { +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/AnnotationDTO.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/AnnotationDTO.java new file mode 100644 index 0000000..488dd45 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/AnnotationDTO.java @@ -0,0 +1,40 @@ +/** + * Copyright 2020 Zhejiang Lab. All Rights Reserved. + *

+ * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.datasetutil.domain.dto; + +import lombok.Data; + +import java.io.Serializable; + +/** + * @description 标注DTO + * @date 2021-04-14 + */ +@Data +public class AnnotationDTO implements Serializable { + + /** + * 标签ID + */ + private Long categoryId; + + /** + * 预估分 + */ + private Double score; +} + diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/EsTransportDTO.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/EsTransportDTO.java new file mode 100644 index 0000000..639a3ba --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/EsTransportDTO.java @@ -0,0 +1,95 @@ +/** + * Copyright 2020 Tianshu AI Platform. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ + +package org.dubhe.datasetutil.domain.dto; + +import lombok.Data; + +import java.sql.Timestamp; + +/** + * @description ES数据同步DTO + * @date 2020-03-24 + */ +@Data +public class EsTransportDTO { + + /** + * 状态 + */ + private Integer annotationStatus; + + /** + * 文件名称 + */ + private String fileName; + + /** + * 文件url + */ + private String url; + + /** + * 创建人ID + */ + private Long createUserId; + + /** + * 创建时间 + */ + private Timestamp createTime; + + /** + * 更新人ID + */ + private Long updateUserId; + + /** + * 更新时间 + */ + private Timestamp updateTime; + + /** + * 文件类型 + */ + private Integer fileType; + + /** + * 增强类型 + */ + private Integer enhanceType; + + /** + * 用户ID + */ + private Long originUserId; + + /** + * 预测值 + */ + private Double prediction; + + /** + * 文件ID + */ + private Long id; + + /** + * 标签ID + */ + private Long labelId; +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/FileAnnotationDTO.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/FileAnnotationDTO.java new file mode 100644 index 0000000..6c7b793 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/FileAnnotationDTO.java @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Zhejiang Lab. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.datasetutil.domain.dto; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.io.Serializable; + +/** + * @description 文件标注DTO + * @date 2020-01-07 + */ +@Data +@NoArgsConstructor +@AllArgsConstructor +@Builder +public class FileAnnotationDTO implements Serializable { + + /** + * 标签ID + */ + private Long categoryId; + + /** + * 分数 + */ + private String score; + +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/IdAlloc.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/IdAlloc.java index 87ae460..2960d7e 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/IdAlloc.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/IdAlloc.java @@ -17,7 +17,10 @@ package org.dubhe.datasetutil.domain.dto; import lombok.Data; -import org.dubhe.datasetutil.common.base.MagicNumConstant; +import org.dubhe.datasetutil.domain.entity.DataSequence; + +import java.util.LinkedList; +import java.util.Queue; /** * @description ID策略实体 @@ -26,25 +29,34 @@ import org.dubhe.datasetutil.common.base.MagicNumConstant; @Data public class IdAlloc { - /** - * 起始位置 - */ - private long startNumber; + private Queue ids; - /** - * 结束位置 - */ - private long endNumber; + private Long unUsed; + + public IdAlloc() { + ids = new LinkedList<>(); + unUsed = 0L; + } /** - * 可用数量 + * 补充ID + * + * @param dataSequence */ - private long usedNumber; + public void add(DataSequence dataSequence) { + for (Long i = dataSequence.getStart(); i < dataSequence.getStart() + dataSequence.getStep(); i++) { + ids.add(i); + unUsed++; + } + } - public IdAlloc() { - this.startNumber = MagicNumConstant.ZERO; - this.endNumber = MagicNumConstant.ZERO; - this.usedNumber = MagicNumConstant.ZERO; + public Queue poll(int number) { + Queue result = new LinkedList<>(); + for (int i = 0; i < number; i++) { + result.add(ids.poll()); + unUsed--; + } + return result; } -} +} \ No newline at end of file diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataFile.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataFile.java index 19a553b..1d32e1a 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataFile.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataFile.java @@ -16,10 +16,16 @@ */ package org.dubhe.datasetutil.domain.entity; -import com.baomidou.mybatisplus.annotation.IdType; -import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.AllArgsConstructor; +import lombok.Builder; import lombok.Data; +import lombok.EqualsAndHashCode; +import org.apache.commons.lang3.StringUtils; import org.dubhe.datasetutil.common.base.BaseEntity; +import org.dubhe.datasetutil.common.base.MagicNumConstant; +import org.dubhe.datasetutil.common.constant.BusinessConstant; +import org.dubhe.datasetutil.common.constant.FileStateCodeConstant; import java.awt.image.BufferedImage; import java.io.Serializable; @@ -28,13 +34,16 @@ import java.io.Serializable; * @description 文件类 * @date 2020-09-17 */ +@AllArgsConstructor +@EqualsAndHashCode(callSuper = false) +@Builder +@TableName("data_file") @Data public class DataFile extends BaseEntity implements Serializable { /** * id */ - @TableId(type = IdType.AUTO) private Long id; /** @@ -92,7 +101,8 @@ public class DataFile extends BaseEntity implements Serializable { */ private Long originUserId; - public DataFile() {} + public DataFile() { + } /** * 插入文件表 @@ -101,18 +111,69 @@ public class DataFile extends BaseEntity implements Serializable { * @param datasetId 数据集id * @param url 文件路径 * @param createUserId 创建人id - * @param read 文件宽高 * @return DataFile file对象 - */ - public DataFile(String name, Long datasetId, String url, Long createUserId, BufferedImage read) { - this.name = name.substring(0, name.lastIndexOf(".")); + */ + public DataFile(String name, Long datasetId, String url, Long createUserId, int status) { + this.name = name; this.datasetId = datasetId; this.url = url; - this.status = 101; + this.status = status; this.setDeleted(false); this.originUserId = createUserId; + } + + + /** + * 插入文件表 + * + * @param name 文件名字 + * @param datasetId 数据集id + * @param url 文件路径 + * @param createUserId 创建人id + * @param status 状态 + * @param fileType 文件类型 + * @param pid 父文件ID + * @param originUserId 资源拥有者ID + * @return DataFile file对象 + */ + public DataFile(String name, Long datasetId, String url, Long createUserId, int status, int fileType, long pid, long originUserId) { + this.name = name; + this.datasetId = datasetId; + this.url = url; + this.status = status; + this.setDeleted(false); + this.setCreateUserId(createUserId); + this.fileType = fileType; + this.pid = pid; + this.originUserId = originUserId; + } + + /** + * 插入文件表 + * + * @param name 文件名字 + * @param datasetId 数据集id + * @param url 文件路径 + * @param createUserId 创建人id + * @param read 文件宽高 + * @param status 状态 + * @param fileType 文件类型 + * @param pid 父文件ID + * @param originUserId 资源拥有者ID + * @return DataFile file对象 + */ + public DataFile(String name, Long datasetId, String url, Long createUserId, BufferedImage read, int status, int fileType, long pid, long originUserId) { + this.name = name; + this.datasetId = datasetId; + this.url = url; + this.status = status; + this.setDeleted(false); + this.setCreateUserId(createUserId); this.width = read.getWidth(); this.height = read.getHeight(); + this.fileType = fileType; + this.pid = pid; + this.originUserId = originUserId; } } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataFileAnnotation.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataFileAnnotation.java new file mode 100644 index 0000000..365bfc7 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataFileAnnotation.java @@ -0,0 +1,88 @@ +/** + * Copyright 2020 Zhejiang Lab. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.datasetutil.domain.entity; + +import com.baomidou.mybatisplus.annotation.IdType; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.*; +import org.dubhe.datasetutil.common.base.BaseEntity; + +import java.io.Serializable; + +/** + * @description nlp中间表 + * @date 2020-01-07 + */ +@Data +@NoArgsConstructor +@AllArgsConstructor +@EqualsAndHashCode(callSuper = false) +@Builder +@TableName("data_file_annotation") +public class DataFileAnnotation extends BaseEntity implements Serializable { + + /** + * id + */ + private Long id; + + /** + * 数据集ID + */ + private Long datasetId; + + /** + * 标签ID + */ + private Long LabelId; + + /** + * 数据集版本文件ID + */ + private Long versionFileId; + + /** + * 预测值(值=实际值*100) + */ + private Double prediction; + + /** + * 文件名称 + */ + private String fileName; + + /** + * 插入nlp中间表 + * + * @param datasetId 数据集id + * @param labelId 标签id + * @param versionFileId 数据集版本文件id + * @param prediction 预测值 + * @param createUserId 创建人id + * @param fileName 文件名称 + * @return DataFileAnnotation nlp中间表 + */ + public DataFileAnnotation(Long datasetId,Long labelId,Long versionFileId,Double prediction,Long createUserId, String fileName){ + this.datasetId = datasetId; + this.LabelId = labelId; + this.versionFileId = versionFileId; + this.prediction = prediction; + this.setCreateUserId(createUserId); + this.fileName = fileName; + } +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataLabelGroup.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataLabelGroup.java index 4bc6120..04771ad 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataLabelGroup.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataLabelGroup.java @@ -67,4 +67,9 @@ public class DataLabelGroup extends BaseEntity implements Serializable { @TableField(value = "origin_user_id") private Long originUserId; + /** + * 标签组数据类型(0:视觉,1:文本) + */ + private Integer labelGroupType; + } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/DataVersionFile.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataVersionFile.java similarity index 84% rename from dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/DataVersionFile.java rename to dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataVersionFile.java index a7c89ec..6e57973 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/DataVersionFile.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DataVersionFile.java @@ -14,11 +14,10 @@ * limitations under the License. * ============================================================= */ -package org.dubhe.datasetutil.domain.dto; +package org.dubhe.datasetutil.domain.entity; -import com.baomidou.mybatisplus.annotation.IdType; -import com.baomidou.mybatisplus.annotation.TableId; -import lombok.Data; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.*; import org.dubhe.datasetutil.common.base.BaseEntity; import java.io.Serializable; @@ -28,13 +27,16 @@ import java.io.Serializable; * @description 数据集文件关系类 * @date 2020-9-17 */ +@AllArgsConstructor +@EqualsAndHashCode(callSuper = false) +@Builder +@TableName("data_dataset_version_file") @Data public class DataVersionFile extends BaseEntity implements Serializable { /** * id */ - @TableId(type = IdType.AUTO) private Long id; /** @@ -72,6 +74,12 @@ public class DataVersionFile extends BaseEntity implements Serializable { */ private Integer changed; + /** + * 文件名称 + */ + private String fileName; + + public DataVersionFile() { } @@ -84,10 +92,11 @@ public class DataVersionFile extends BaseEntity implements Serializable { * @param status 状态 * @return DataVersionFile 数据集版本文件表 */ - public DataVersionFile(Long datasetId, Long fileId,Integer annotationStatus,Integer status) { + public DataVersionFile(Long datasetId, Long fileId,Integer annotationStatus,Integer status,String fileName) { this.datasetId = datasetId; this.fileId = fileId; this.annotationStatus = annotationStatus; this.status = status; + this.fileName = fileName; } } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/Dataset.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/Dataset.java index 5abc7bc..296e2d2 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/Dataset.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/Dataset.java @@ -140,6 +140,10 @@ public class Dataset extends BaseEntity implements Serializable { */ private Long originUserId; + + private Long labelGroupId; + + public Dataset() {} } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DatasetVersion.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DatasetVersion.java new file mode 100644 index 0000000..380c577 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/DatasetVersion.java @@ -0,0 +1,53 @@ +package org.dubhe.datasetutil.domain.entity; + +import com.baomidou.mybatisplus.annotation.*; +import lombok.Data; +import org.dubhe.datasetutil.common.base.BaseEntity; + +import java.sql.Timestamp; +import java.util.Date; + +/** + * @description TODO + * @date 2021-03-23 + */ +@Data +@TableName("data_dataset_version") +public class DatasetVersion extends BaseEntity { + + @TableId(type = IdType.AUTO) + private Long id; + + private Long datasetId; + + private Long teamId; + + private String versionName; + + private String versionNote; + + private String versionSource; + + private String versionUrl; + + private Integer dataConversion; + + @TableField(value = "deleted", fill = FieldFill.INSERT) + private Boolean deleted = false; + + private Long originUserId; + + public DatasetVersion() {} + + public DatasetVersion(Long datasetId, String versionName, String versionNote) { + this.datasetId = datasetId; + this.versionName = versionName; + this.setCreateUserId(0L); + this.setCreateTime(new Timestamp(System.currentTimeMillis())); + this.versionUrl = "dataset/"+datasetId +"/versionFile/"+versionName; + this.dataConversion = 2; + this.originUserId = 0L; + this.versionNote = versionNote; + } + +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/LogInfo.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/LogInfo.java index 69924c5..9d599fe 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/LogInfo.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/entity/LogInfo.java @@ -17,8 +17,8 @@ package org.dubhe.datasetutil.domain.entity; +import cn.hutool.core.date.DateUtil; import com.alibaba.fastjson.annotation.JSONField; -import com.xiaoleilu.hutool.date.DateUtil; import lombok.Data; import lombok.experimental.Accessors; diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/handle/CustomDatasetImportHandle.java b/dataset-util/src/main/java/org/dubhe/datasetutil/handle/CustomDatasetImportHandle.java new file mode 100644 index 0000000..f324ee9 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/handle/CustomDatasetImportHandle.java @@ -0,0 +1,171 @@ +package org.dubhe.datasetutil.handle; + +import lombok.extern.slf4j.Slf4j; +import org.dubhe.datasetutil.common.base.MagicNumConstant; +import org.dubhe.datasetutil.common.constant.AnnotateTypeEnum; +import org.dubhe.datasetutil.common.constant.BusinessConstant; +import org.dubhe.datasetutil.common.constant.DataStateCodeConstant; +import org.dubhe.datasetutil.common.constant.DatatypeEnum; +import org.dubhe.datasetutil.common.exception.BusinessException; +import org.dubhe.datasetutil.common.util.*; +import org.dubhe.datasetutil.domain.entity.Dataset; +import org.dubhe.datasetutil.service.DatasetService; +import org.dubhe.datasetutil.service.DatasetVersionService; +import org.springframework.aop.framework.AopContext; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.EnableAspectJAutoProxy; +import org.springframework.stereotype.Component; +import org.springframework.transaction.annotation.Transactional; + +import java.io.File; +import java.io.FileFilter; +import java.io.FileInputStream; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.Scanner; +import java.util.concurrent.Callable; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * @description 自定义数据集导入 + * @date 2021-03-23 + */ +@Slf4j +@Component +@EnableAspectJAutoProxy(exposeProxy = true) +public class CustomDatasetImportHandle { + + @Autowired + DatasetService datasetService; + @Autowired + DatasetVersionService datasetVersionService; + @Autowired + MinioUtil minioUtil; + + /** + * 自定义数据集导入 + * 1.修改数据集状态为已完成 + * 2.创建版本数据 + * 3.文件导入到版本目录 + * + * @param args 参数 (1)数据集ID (2)文件路径 + */ + public void execute(Object[] args) throws Exception { + valid(args); + ((CustomDatasetImportHandle) AopContext.currentProxy()).sqlExecute(args); + fileExecute(args); + log.warn(""); + PrintUtils.printLine(" Success: 执行成功 ", PrintUtils.GREEN); + log.warn(""); + System.out.println("# 是否结束? Y / N #"); + Scanner scannerExit = new Scanner(System.in); + if (BusinessConstant.Y.toLowerCase().equals(scannerExit.nextLine().toLowerCase())) { + System.exit(MagicNumConstant.ZERO); + } + } + + /** + * 数据库处理 + * 1.修改数据集状态为已完成 + * 2.增加数据集版本数据 + * 已存在的情况下,不重复添加 + * + * @param args 参数 (1)数据集ID (2)文件路径 + */ + @Transactional(rollbackFor = Exception.class) + public void sqlExecute(Object[] args) { + Dataset dataset = datasetService.findDatasetById((long)args[0]); + if (Objects.isNull(dataset)) { + throw new BusinessException("数据集不存在"); + } + //更新数据集状态为已完成 + if (!DataStateCodeConstant.ANNOTATION_COMPLETE_STATE.equals(dataset.getStatus())) { + dataset.setStatus(DataStateCodeConstant.ANNOTATION_COMPLETE_STATE); + dataset.setCurrentVersionName(BusinessConstant.DEFAULT_VERSION); + datasetService.updateDataset(dataset); + } + //生成版本信息 只会生成V0001 + if (Objects.isNull(datasetVersionService.getByDatasetIdAndVersionNum(dataset.getId(), BusinessConstant.DEFAULT_VERSION))) { + datasetVersionService.insertVersion(dataset.getId(), BusinessConstant.DEFAULT_VERSION, "自定义"); + } + + } + + /** + * 遍历用户文件夹上传所有问题 + * + * @param args 参数 (1)数据集ID (2)文件路径 + */ + public void fileExecute(Object[] args) throws Exception { + List filePaths = FileUtil.traverseFolder((String) args[1]); + List> partitions = new ArrayList<>(); + int oneSize = ThreadUtils.createThread(filePaths.size()); + List need = new ArrayList<>(); + Integer integer = new Integer(0); + //初始化进度条 + ProcessBarUtil.initProcess("自定义导入", (long) filePaths.size()); + for (String filePath : filePaths) { + need.add(filePath); + if (need.size() == oneSize || integer.intValue() == filePaths.size() - 1) { + List now = new ArrayList<>(need); + need.clear(); + partitions.add(() -> run(now, args)); + } + integer ++; + } + ThreadUtils.runMultiThread(partitions); + } + + public Integer run(List filePaths, Object[] args) { + log.info("#-------------开始处理,时间[" + DateUtil.getNowStr() + "]-------------#"); + log.info("#-------------文件数量[" + filePaths.size() + "]------------------------"); + Integer success = 0; + for (String str : filePaths) { + try { + String objectName = "dataset/" + (long) args[0] + "/versionFile/V0001" + str.replace((String)args[1], "").replaceAll("\\\\", "/"); + minioUtil.upLoadFileByInputStream(objectName, str); + ProcessBarUtil.processBar01(1L); + success ++; + } catch (Exception e) { + log.error(str + "upload error {}", e); + } + } + return success; + } + + /** + * 数据校验 + * 1.参数校验 + * 2.参数对应数据集是否存在 + * 3.用户输入目录下是否有问题 + * 4.数据集标注类型和数据类型是否正确 + * + * @param args 参数 (1)数据集ID (2)文件路径 + */ + public void valid(Object[] args) { + if (args == null || args.length != 2) { + throw new BusinessException("参数数量不匹配"); + } + Long datasetId = null; + try { + datasetId = (long) args[0]; + } catch (Exception e) { + log.error("数据集ID输入不正确, {}", e); + throw new BusinessException("数据集ID输入不正确"); + } + Dataset dataset = datasetService.findDatasetById(datasetId); + if (Objects.isNull(dataset)) { + throw new BusinessException("输入数据集不存在"); + } + if (!AnnotateTypeEnum.AUTO_IMPORT.getValue().equals(dataset.getAnnotateType()) || !DatatypeEnum.AUTO_IMPORT.getValue().equals(dataset.getDataType())) { + throw new BusinessException("请确认该数据及的标注类型以及数据类型都是自定义导入"); + } + String filePath = (String) args[1]; + if (!cn.hutool.core.io.FileUtil.exist(filePath) || !cn.hutool.core.io.FileUtil.isDirectory(filePath)) { + throw new BusinessException("请确保您输入的数据集路径是否正确"); + } + } + +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/handle/DatasetImageUploadHandle.java b/dataset-util/src/main/java/org/dubhe/datasetutil/handle/DatasetImageUploadHandle.java index 312d5e5..6309fe4 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/handle/DatasetImageUploadHandle.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/handle/DatasetImageUploadHandle.java @@ -16,29 +16,39 @@ */ package org.dubhe.datasetutil.handle; -import com.xiaoleilu.hutool.io.FileUtil; +import cn.hutool.core.io.FileUtil; +import com.google.common.collect.Lists; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; +import org.checkerframework.checker.units.qual.A; +import org.dubhe.datasetutil.common.base.MagicNumConstant; +import org.dubhe.datasetutil.common.config.ImageConfig; import org.dubhe.datasetutil.common.constant.BusinessConstant; -import org.dubhe.datasetutil.common.util.DateUtil; -import org.dubhe.datasetutil.common.util.GeneratorKeyUtil; -import org.dubhe.datasetutil.common.util.MinioUtil; -import org.dubhe.datasetutil.common.util.ThreadUtils; +import org.dubhe.datasetutil.common.enums.LogEnum; +import org.dubhe.datasetutil.common.util.*; +import org.dubhe.datasetutil.common.constant.DataStateCodeConstant; +import org.dubhe.datasetutil.common.constant.FileStateCodeConstant; +import org.dubhe.datasetutil.common.enums.DatatypeEnum; +import org.dubhe.datasetutil.common.exception.ImportDatasetException; +import org.dubhe.datasetutil.common.util.*; import org.dubhe.datasetutil.domain.entity.DataFile; -import org.dubhe.datasetutil.domain.dto.DataVersionFile; +import org.dubhe.datasetutil.domain.entity.DataVersionFile; import org.dubhe.datasetutil.domain.entity.Dataset; import org.dubhe.datasetutil.service.DataFileService; import org.dubhe.datasetutil.service.DataVersionFileService; import org.dubhe.datasetutil.service.DatasetService; +import org.elasticsearch.action.bulk.BulkProcessor; +import org.elasticsearch.action.index.IndexRequest; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; +import org.springframework.util.CollectionUtils; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.File; -import java.util.ArrayList; -import java.util.List; +import java.util.*; import java.util.concurrent.Callable; -import java.util.concurrent.atomic.AtomicInteger; /** * @description 上传图片工具类 @@ -48,6 +58,12 @@ import java.util.concurrent.atomic.AtomicInteger; @Component public class DatasetImageUploadHandle { + /** + * esSearch索引 + */ + @Value("${es.index}") + private String esIndex; + @Autowired private MinioUtil minioUtil; @@ -63,6 +79,34 @@ public class DatasetImageUploadHandle { @Autowired private GeneratorKeyUtil generatorKeyUtil; + @Autowired + private ImageConfig imageConfig; + + @Autowired + private BulkProcessor bulkProcessor; + + /** + * 缺陷图片集合 + */ + public final List defectsFile = new ArrayList<>(); + + /** + * 上传图片 + * + * @param scanner 输入 + */ + public void importPicture(Scanner scanner) throws Exception { + Dataset dataset = verificationDatasetId(scanner); + String imagePath = verificationFilePath(scanner,dataset); + try{ + execute(imagePath, dataset.getId()); + } catch (Exception e) { + log.error(""); + PrintUtils.printLine(" Error:" + e.getMessage(), PrintUtils.RED); + log.error(""); + } + } + /** * 启动线程 * @@ -70,27 +114,89 @@ public class DatasetImageUploadHandle { * @param datasetId 数据集Id */ public void execute(String imagePath, Long datasetId) throws Exception { - log.info("#-------------开始处理,时间[" + DateUtil.getNowStr() + "]-------------#"); List fileNames = FileUtil.listFileNames(imagePath); - log.info("#-------------文件数量[" + fileNames.size() + "]------------------------"); + log.warn("需要处理文件:【" + fileNames.size() + "】个文件"); String fileBaseDir = BusinessConstant.MINIO_ROOT_PATH + BusinessConstant.FILE_SEPARATOR + datasetId + BusinessConstant.FILE_SEPARATOR + BusinessConstant.IMAGE_ORIGIN + BusinessConstant.FILE_SEPARATOR; - List> partitions = new ArrayList<>(); int oneSize = ThreadUtils.createThread(fileNames.size()); + int batchNumber = MagicNumConstant.ZERO; + //初始化进度条 + ProcessBarUtil.initProcess("图片导入", (long) fileNames.size()); + if (fileNames.size() > MagicNumConstant.TEN_THOUSAND) { + log.warn("........系统处理中........."); + List> partitionList = Lists.partition(fileNames, MagicNumConstant.FIVE_THOUSAND); + for (List imageFileNameList1 : partitionList) { + batchNumber++; + dealFileList(imageFileNameList1, oneSize, fileBaseDir, imagePath, datasetId, batchNumber); + } + } else { + log.warn("........系统处理中........."); + batchNumber++; + dealFileList(fileNames, oneSize, fileBaseDir, imagePath, datasetId, batchNumber); + } + log.warn(""); + PrintUtils.printLine(" Success: 执行成功 ", PrintUtils.GREEN); + log.warn(""); + System.out.println("# 是否结束? Y / N #"); + Scanner scannerExit = new Scanner(System.in); + if (BusinessConstant.Y.toLowerCase().equals(scannerExit.nextLine().toLowerCase())) { + System.exit(MagicNumConstant.ZERO); + } + } + + + /** + * @param fileNames 图片集合 + * @param oneSize 每次处理次数 + * @param fileBaseDir 文件根目录 + * @param imagePath 图片文件路径 + * @param datasetId 数据集ID + * @throws Exception + */ + public void dealFileList(List fileNames, int oneSize, String fileBaseDir, String imagePath, Long datasetId, int batchNumber) throws Exception { + int dealSize = MagicNumConstant.ZERO; + Dataset dataset = datasetService.queryDatasetById(datasetId); List need = new ArrayList<>(); - AtomicInteger atomicInteger = new AtomicInteger(0); - for (String fileName : fileNames) { - need.add(fileName); - if (need.size() == oneSize || atomicInteger.intValue() == fileNames.size() - 1) { - List now = new ArrayList<>(need); - need.clear(); - partitions.add(() -> run(datasetId, now, fileBaseDir, imagePath)); + List> partitions = new ArrayList<>(); + //初始化进度条 + for (int i = 0; i < fileNames.size(); i++) { + String suffixFileName = fileNames.get(i).substring(fileNames.get(i).lastIndexOf(BusinessConstant.SPOT)); + if(dataset.getDataType().equals(DatatypeEnum.TXT.getValue())){ + if (imageConfig.getTxtFormat().contains(suffixFileName.toLowerCase())) { + need.add(fileNames.get(i)); + if (need.size() == oneSize || i == fileNames.size() - MagicNumConstant.ONE) { + List now = new ArrayList<>(need); + dealSize += now.size(); + need.clear(); + partitions.add(() -> run(datasetId, now, fileBaseDir, imagePath)); + } + } + } else { + if (imageConfig.getImageFormat().contains(suffixFileName.toLowerCase())) { + need.add(fileNames.get(i)); + if (need.size() == oneSize || i == fileNames.size() - MagicNumConstant.ONE) { + List now = new ArrayList<>(need); + dealSize += now.size(); + need.clear(); + partitions.add(() -> run(datasetId, now, fileBaseDir, imagePath)); + } + } } - atomicInteger.getAndIncrement(); } ThreadUtils.runMultiThread(partitions); + if (!CollectionUtils.isEmpty(defectsFile)) { + log.error(""); + log.warn("#-------------系统共排查出缺陷文件【" + defectsFile.size() + "】个-------------#"); + log.error(""); + log.warn("缺陷文件列表 " + defectsFile.toString() + ""); + log.error(""); + defectsFile.clear(); + } + + } + /** * 插入数据库数据 * @@ -98,43 +204,156 @@ public class DatasetImageUploadHandle { * @param fileNames 文件Name * @param fileBaseDir 文件路径 * @param imagePath 文件地址 - * @return java.lang.Integer 成功数量 + * @return Integer 成功数量 */ public Integer run(Long datasetId, List fileNames, String fileBaseDir, String imagePath) { - Integer success = 0; + Integer success = MagicNumConstant.ZERO; Dataset dataset = datasetService.findCreateUserIdById(datasetId); List dataFiles = new ArrayList<>(); List dataVersionFiles = new ArrayList<>(); for (int i = 0; i < fileNames.size(); i++) { try { - minioUtil.upLoadFile(fileBaseDir + fileNames.get(i), FileUtil.getInputStream(imagePath + BusinessConstant.FILE_SEPARATOR + fileNames.get(i))); - BufferedImage read = ImageIO.read(new File(imagePath + BusinessConstant.FILE_SEPARATOR + fileNames.get(i))); - success++; - dataFiles.add(new DataFile(fileNames.get(i), datasetId, minioUtil.getUrl(fileBaseDir + fileNames.get(i)), dataset.getCreateUserId(), read)); + String fileName = StringUtils.substring(fileNames.get(i), MagicNumConstant.ZERO, fileNames.get(i).lastIndexOf(BusinessConstant.SPOT)) + System.nanoTime(); + String suffixFileName = fileNames.get(i).substring(fileNames.get(i).lastIndexOf(BusinessConstant.SPOT)); + minioUtil.upLoadFile(imagePath + BusinessConstant.FILE_SEPARATOR + fileNames.get(i), fileBaseDir + fileName + suffixFileName); + DataFile dataFile = new DataFile(fileName, datasetId, minioUtil.getUrl(fileBaseDir + fileName + suffixFileName), + dataset.getCreateUserId(), FileStateCodeConstant.NOT_ANNOTATION_FILE_STATE); - if (dataFiles.size() % 500 == 0 || i == fileNames.size() - 1) { - long startDataFileIndex = generatorKeyUtil.getSequenceByBusinessCode(BusinessConstant.DATA_FILE, dataFiles.size()); + if (dataset.getDataType().compareTo(DatatypeEnum.IMAGE.getValue()) == 0) { + BufferedImage read; + try { + read = ImageIO.read(new File(imagePath + BusinessConstant.FILE_SEPARATOR + fileNames.get(i))); + } catch (ArrayIndexOutOfBoundsException e) { + defectsFile.add(fileNames.get(i)); + throw new ImportDatasetException("该图片文件内部错误 " + fileNames.get(i) + ",请重新审核后再去上传此图片,当前已经跳过此图片"); + } + dataFile.setWidth(read.getWidth()); + dataFile.setHeight(read.getHeight()); + } + success++; + dataFiles.add(dataFile); + if (dataFiles.size() % MagicNumConstant.FIVE_HUNDRED == MagicNumConstant.ZERO || i == fileNames.size() - MagicNumConstant.ONE) { + Queue dataFileIds = generatorKeyUtil.getSequenceByBusinessCode(BusinessConstant.DATA_FILE, dataFiles.size()); for (DataFile dataFileEntity : dataFiles) { - dataFileEntity.setId(startDataFileIndex++); + dataFileEntity.setId(dataFileIds.poll()); } - dataFileService.saveBatchDataFile(dataFiles); for (DataFile file : dataFiles) { - dataVersionFiles.add(new DataVersionFile(datasetId, file.getId(), 101, 0)); + dataVersionFiles.add(new DataVersionFile(datasetId, file.getId(), DataStateCodeConstant.NOT_ANNOTATION_STATE, MagicNumConstant.ZERO, file.getName())); + if(dataset.getDataType().equals(DatatypeEnum.TXT.getValue())){ + try{ + String bucketName = StringUtils.substringBefore(file.getUrl(),"/"); + String fullFilePath = StringUtils.substringAfter(file.getUrl(), "/"); + String content = minioUtil.readString(bucketName, fullFilePath); + Map jsonMap = new HashMap<>(); + jsonMap.put("content",content); + jsonMap.put("name", file.getName()); + jsonMap.put("status",FileStateCodeConstant.NOT_ANNOTATION_FILE_STATE.toString()); + jsonMap.put("datasetId",dataset.getId().toString()); + jsonMap.put("createUserId",file.getCreateUserId()==null?null:file.getCreateUserId().toString()); + jsonMap.put("createTime",file.getCreateTime()==null?null:file.getCreateTime().toString()); + jsonMap.put("updateUserId",file.getUpdateUserId()==null?null:file.getUpdateUserId().toString()); + jsonMap.put("updateTime",file.getUpdateTime()==null?null:file.getUpdateTime().toString()); + jsonMap.put("fileType",file.getFileType()==null?null:file.getFileType().toString()); + jsonMap.put("enhanceType",file.getEnhanceType()==null?null:file.getEnhanceType().toString()); + jsonMap.put("originUserId",file.getOriginUserId().toString()); + jsonMap.put("versionName", StringUtils.isEmpty(dataset.getCurrentVersionName())?"V0000" : dataset.getCurrentVersionName()); + bulkProcessor.add(new IndexRequest(esIndex, "_doc", file.getId().toString()).source(jsonMap)); + } catch (Exception e){ + LogUtil.error(LogEnum.BIZ_DATASET, "上传es失败: {} ", e); + } + } + } + if(dataset.getDataType().equals(DatatypeEnum.TXT.getValue())){ + bulkProcessor.flush(); } - long startDataFileVersionIndex = generatorKeyUtil.getSequenceByBusinessCode(BusinessConstant.DATA_VERSION_FILE, dataVersionFiles.size()); + Queue dataFileVersionIds = generatorKeyUtil.getSequenceByBusinessCode(BusinessConstant.DATA_VERSION_FILE, dataVersionFiles.size()); for (DataVersionFile dataVersionFile : dataVersionFiles) { - dataVersionFile.setId(startDataFileVersionIndex++); + dataVersionFile.setId(dataFileVersionIds.poll()); } dataVersionFileService.saveBatchDataFileVersion(dataVersionFiles); + ProcessBarUtil.processBar01((long) dataVersionFiles.size()); dataVersionFiles.clear(); dataFiles.clear(); } } catch (Exception e) { - log.error("{}", e); + log.error(fileNames.get(i) + "{}", e); + log.error("运行异常: {}", e.getMessage()); } } return success; } + + /** + * 校验数据集ID + * + * @param scanner 控制台输入参数 + * @return Dataset 数据集 + */ + public Dataset verificationDatasetId(Scanner scanner) { + boolean flag = false; + Dataset dataset = new Dataset(); + while (!flag) { + System.out.println(" "); + System.out.println("# 请输入数据集ID #"); + String datasetIdStr = scanner.nextLine(); + long datasetId; + try { + datasetId = Long.parseLong(datasetIdStr.trim()); + } catch (Exception e) { + log.error(""); + PrintUtils.printLine(" Error: 数据集ID非法,请重新输入", PrintUtils.RED); + log.error(""); + continue; + } + dataset = datasetService.findDatasetByIdNormal(datasetId); + if (dataset == null) { + log.error(""); + PrintUtils.printLine(" Error: 数据集ID不存在,请重新输入", PrintUtils.RED); + log.error(""); + continue; + } else { + flag = true; + } + } + return dataset; + } + + /** + * 校验文件路径及格式 + * + * @param scanner 输入控制台 + * @param dataset 数据集 + * @return String 字符串 + */ + public String verificationFilePath(Scanner scanner,Dataset dataset) { + boolean flag = false; + String filePath = ""; + while (!flag) { + System.out.println(" "); + System.out.println("# 请输入待上传本地文件的绝对路径 #"); + filePath = scanner.nextLine(); + File file = new File(filePath.trim()); + if (!file.exists()) { + log.error(""); + PrintUtils.printLine(" 【" + filePath + "】文件路径不存在,请重新输入", PrintUtils.RED); + log.error(""); + continue; + } + File fileNames = new File(filePath); + File[] imageFiles = fileNames.listFiles(); + if (imageFiles == null || imageFiles.length == MagicNumConstant.ZERO) { + log.error(""); + PrintUtils.printLine(" 【" + filePath + "】目录下不存在文件 ", PrintUtils.RED); + log.error(""); + continue; + } else { + flag = true; + } + } + + return filePath; + } + } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/handle/DatasetImportHandle.java b/dataset-util/src/main/java/org/dubhe/datasetutil/handle/DatasetImportHandle.java index f456043..ab5e8c7 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/handle/DatasetImportHandle.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/handle/DatasetImportHandle.java @@ -1,12 +1,12 @@ /** * Copyright 2020 Zhejiang Lab. All Rights Reserved. - * + *

* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + *

* http://www.apache.org/licenses/LICENSE-2.0 - * + *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -16,35 +16,45 @@ */ package org.dubhe.datasetutil.handle; +import cn.hutool.core.io.FileUtil; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.TypeReference; -import com.xiaoleilu.hutool.io.FileUtil; +import com.google.common.collect.Lists; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.dubhe.datasetutil.common.base.MagicNumConstant; +import org.dubhe.datasetutil.common.config.ImageConfig; import org.dubhe.datasetutil.common.config.MinioConfig; +import org.dubhe.datasetutil.common.constant.AnnotateTypeEnum; import org.dubhe.datasetutil.common.constant.BusinessConstant; import org.dubhe.datasetutil.common.constant.FileStateCodeConstant; +import org.dubhe.datasetutil.common.enums.DatatypeEnum; +import org.dubhe.datasetutil.common.enums.LogEnum; import org.dubhe.datasetutil.common.exception.ImportDatasetException; import org.dubhe.datasetutil.common.util.*; +import org.dubhe.datasetutil.domain.dto.AnnotationDTO; import org.dubhe.datasetutil.domain.entity.*; -import org.dubhe.datasetutil.domain.dto.DataVersionFile; +import org.dubhe.datasetutil.domain.entity.DataVersionFile; import org.dubhe.datasetutil.service.*; +import org.elasticsearch.action.bulk.BulkProcessor; +import org.elasticsearch.action.index.IndexRequest; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Component; - +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.CollectionUtils; +import javax.annotation.Resource; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; +import java.io.BufferedReader; import java.io.File; +import java.io.FileReader; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import java.util.*; import java.util.concurrent.Callable; -import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; /** @@ -55,6 +65,12 @@ import java.util.stream.Collectors; @Component public class DatasetImportHandle { + /** + * esSearch索引 + */ + @Value("${es.index}") + private String esIndex; + @Autowired private DatasetService datasetService; @@ -76,6 +92,9 @@ public class DatasetImportHandle { @Autowired private DataVersionFileService dataVersionFileService; + @Autowired + private DataFileAnnotationService dataFileAnnotationService; + @Autowired private MinioUtil minioUtil; @@ -85,10 +104,11 @@ public class DatasetImportHandle { @Autowired private GeneratorKeyUtil generatorKeyUtil; - /** - * 可支持的图片格式集合 - */ - private static final List SUFFIX_LIST = new ArrayList<>(); + @Autowired + private ImageConfig imageConfig; + + @Resource + private BulkProcessor bulkProcessor; /** * 标注文件中JSON的key @@ -99,11 +119,6 @@ public class DatasetImportHandle { * 加载静态集合数据 */ static { - SUFFIX_LIST.add(".jpg"); - SUFFIX_LIST.add(".png"); - SUFFIX_LIST.add(".bmp"); - SUFFIX_LIST.add(".jpeg"); - annotationFileContextKey.add("score"); annotationFileContextKey.add("area"); annotationFileContextKey.add("name"); @@ -120,19 +135,21 @@ public class DatasetImportHandle { public void importDataset(Scanner scanner) throws Exception { Dataset dataset = verificationDatasetId(scanner); String filePath = verificationFilePath(scanner); - File labelJsonFile = verificationFile(filePath); + File labelJsonFile = verificationFile(filePath, dataset); DataLabelGroup dataLabelGroup = saveDataLabelGroup(HandleFileUtil.getLabelGroupName(labelJsonFile.getName()), dataset); List dataLabelList = readLabelContext(labelJsonFile); saveDataLabel(dataset, dataLabelList, dataLabelGroup.getId()); + log.info("........数据校验完成,即将执行下一步操作,请勿关闭窗口................."); executeUploadAndSave(dataLabelList, filePath, dataset); + dataset.setLabelGroupId(dataLabelGroup.getId()); datasetService.updateDatasetStatus(dataset); log.warn(""); PrintUtils.printLine(" Success: 执行成功 ", PrintUtils.GREEN); log.warn(""); - log.warn("# 是否结束? Y / N #"); + System.out.println("# 是否结束? Y / N #"); Scanner scannerExit = new Scanner(System.in); if (BusinessConstant.Y.toLowerCase().equals(scannerExit.nextLine().toLowerCase())) { - System.exit(0); + System.exit(MagicNumConstant.ZERO); } } @@ -140,14 +157,15 @@ public class DatasetImportHandle { * 检查文件结构 、类型 * * @param globalFilePath 文件路径 + * @param dataset 数据集 * @return file 标签文件 */ - public File verificationFile(String globalFilePath) throws IOException { + public File verificationFile(String globalFilePath, Dataset dataset) throws IOException { File labelRootFiles = new File(globalFilePath); File imageRootFiles = new File(globalFilePath + HandleFileUtil.generateFilePath(BusinessConstant.IMAGE_ORIGIN)); File annotationRootFiles = new File(globalFilePath + HandleFileUtil.generateFilePath(BusinessConstant.ANNOTATION)); if (imageRootFiles.list() == null || annotationRootFiles.listFiles() == null) { - throw new ImportDatasetException(" 【" + globalFilePath + "】目录中的图片目录(origin)或者标注文件目录(annotation)的文件夹为空 "); + throw new ImportDatasetException("【" + globalFilePath + "】目录中的文件目录(origin)或者标注文件目录(annotation)的文件夹为空 "); } File labelJsonFile = null; for (File file : Objects.requireNonNull(labelRootFiles.listFiles())) { @@ -158,35 +176,49 @@ public class DatasetImportHandle { } } if (labelJsonFile == null) { - throw new ImportDatasetException(" 【" + globalFilePath + "】目录中未找到标签组文件"); + throw new ImportDatasetException("【" + globalFilePath + "】目录中未找到标签组文件"); } dealLabelGroup(labelJsonFile.getName()); List dataLabelList = readLabelContext(labelJsonFile); Map> dataLabelMap = dataLabelList.stream().collect(Collectors.groupingBy(DataLabel::getName)); for (Map.Entry> entry : dataLabelMap.entrySet()) { - if (entry.getValue().size() > 1) { + if (entry.getValue().size() > MagicNumConstant.ONE) { throw new ImportDatasetException(" 标签组中标签存在重复标签:【" + entry.getKey() + "】"); } } File[] imageFiles = imageRootFiles.listFiles(); - if (imageFiles == null || imageFiles.length == 0) { - throw new ImportDatasetException(" 图片文件下不存在图片文件 "); + if (imageFiles == null || imageFiles.length == MagicNumConstant.ZERO) { + throw new ImportDatasetException(" 文件夹下不存在文件 "); } + log.info("........校验文件格式,请勿关闭窗口.............."); for (File imageFile : imageFiles) { String suffixFileName = imageFile.getName().substring(imageFile.getName().lastIndexOf(BusinessConstant.SPOT)); - if (!SUFFIX_LIST.contains(suffixFileName.toLowerCase())) { - throw new ImportDatasetException(" 图片文件文件夹中存在非法格式 "); + if (dataset.getDataType().compareTo(DatatypeEnum.IMAGE.getValue()) == 0) { + if (!imageConfig.getImageFormat().contains(suffixFileName.toLowerCase())) { + throw new ImportDatasetException(" 图片文件文件夹中存在非法格式 "); + } + } else { + if (!imageConfig.getTxtFormat().contains(suffixFileName.toLowerCase())) { + throw new ImportDatasetException(" 文本文件文件夹中存在非法格式 "); + } } + } File[] annotationFiles = annotationRootFiles.listFiles(); - if (annotationFiles == null || annotationFiles.length == 0) { - throw new ImportDatasetException(" 图片文件下不存在标注文件 "); + if (annotationFiles == null || annotationFiles.length == MagicNumConstant.ZERO) { + throw new ImportDatasetException(" 文件下不存在标注文件 "); } + log.info("........校验文件格式完成,即将执行下一步操作,请勿关闭窗口........."); + log.info("........校验标注文件格式,请勿关闭窗口.............."); for (File annotationFile : annotationFiles) { if (!annotationFile.getName().toLowerCase().endsWith(BusinessConstant.SUFFIX_JSON.toLowerCase())) { throw new ImportDatasetException(" 标注文件文件夹中存在非法格式 "); } + if (!containsJsonKey(annotationFile)) { + throw new ImportDatasetException(" 标注文件【" + annotationFile.getName() + "】 未包含'name'节点 "); + } } + log.info("........校验标注文件格式完成,即将执行下一步操作,请勿关闭窗口.............."); return labelJsonFile; } @@ -199,20 +231,44 @@ public class DatasetImportHandle { public void executeUploadAndSave(List dataLabelList, String filePath, Dataset dataset) throws Exception { String localImageFilePath = filePath + HandleFileUtil.generateFilePath(BusinessConstant.IMAGE_ORIGIN); List imageFileNameList = FileUtil.listFileNames(localImageFilePath); - log.info("需要处理: 【" + imageFileNameList.size() + "】张图片"); + log.warn("........系统需要处理:【" + imageFileNameList.size() + "】个文件,请勿关闭窗口........."); + int batchNumber = MagicNumConstant.ZERO; int oneSize = ThreadUtils.createThread(imageFileNameList.size()); - log.info("需要创建线程数: 【" + oneSize + "】 条"); + ProcessBarUtil.initProcess("数据集导入", (long) imageFileNameList.size()); + if (imageFileNameList.size() > MagicNumConstant.TEN_THOUSAND) { + log.warn("........系统处理中........."); + List> partitionList = Lists.partition(imageFileNameList, MagicNumConstant.FIVE_THOUSAND); + for (List imageFileNameList1 : partitionList) { + batchNumber++; + dealFileList(imageFileNameList1, oneSize, dataLabelList, filePath, dataset, batchNumber); + } + } else { + log.warn("........系统处理中........."); + batchNumber++; + dealFileList(imageFileNameList, oneSize, dataLabelList, filePath, dataset, batchNumber); + } + } + + /** + * @param imageFileNameList 图片集合 + * @param oneSize 每次处理次数 + * @param dataLabelList 数据集标签集合 + * @param filePath 文件路径 + * @param dataset 数据集 + * @throws Exception + */ + public void dealFileList(List imageFileNameList, int oneSize, List dataLabelList, String filePath, Dataset dataset, int batchNumber) throws Exception { + int dealSize = MagicNumConstant.ZERO; List> partitions = new ArrayList<>(); List need = new ArrayList<>(); - AtomicInteger atomicInteger = new AtomicInteger(MagicNumConstant.ZERO); - for (String fileName : imageFileNameList) { - need.add(fileName); - if (need.size() == oneSize || atomicInteger.intValue() == imageFileNameList.size() - MagicNumConstant.ONE) { + for (int i = 0; i < imageFileNameList.size(); i++) { + need.add(imageFileNameList.get(i)); + if (need.size() == oneSize || i == imageFileNameList.size() - MagicNumConstant.ONE) { List fileNameList = new ArrayList<>(need); + dealSize += fileNameList.size(); need.clear(); partitions.add(() -> runTask(dataLabelList, dataset, fileNameList, filePath)); } - atomicInteger.getAndIncrement(); } ThreadUtils.runMultiThread(partitions); } @@ -220,71 +276,157 @@ public class DatasetImportHandle { /** * 实际执行任务 * - * @param dataset 数据集 - * @param fileNameList 文件名字集合 - * @param dataSetRootFilePath 文件路径 + * @param dataset 数据集 + * @param fileNameList 文件名字集合 + * @param dataSetRootFilePath 文件路径 * @return Integer 执行次数 */ private Integer runTask(List dataLabelList, Dataset dataset, List fileNameList, String dataSetRootFilePath) throws Exception { + Integer success = MagicNumConstant.ZERO; List dataFilesList = new ArrayList<>(); - AtomicInteger atomicInteger = new AtomicInteger(MagicNumConstant.ZERO); String imageFileBaseDir = BusinessConstant.MINIO_ROOT_PATH + BusinessConstant.FILE_SEPARATOR + dataset.getId() + BusinessConstant.FILE_SEPARATOR + BusinessConstant.IMAGE_ORIGIN + BusinessConstant.FILE_SEPARATOR; String annotationFileBaseDir = BusinessConstant.MINIO_ROOT_PATH + BusinessConstant.FILE_SEPARATOR + dataset.getId() + BusinessConstant.FILE_SEPARATOR + BusinessConstant.ANNOTATION + BusinessConstant.FILE_SEPARATOR; - for (String fileName : fileNameList) { - String imageUploadFile = imageFileBaseDir + fileName; - String annotationFileName = HandleFileUtil.readFileName(fileName); - File annotationFile = new File(dataSetRootFilePath + HandleFileUtil.generateFilePath(BusinessConstant.ANNOTATION) + BusinessConstant.FILE_SEPARATOR + annotationFileName + BusinessConstant.SUFFIX_JSON.toLowerCase()); + for (int i = 0; i < fileNameList.size(); i++) { + String imageUploadFile = imageFileBaseDir + fileNameList.get(i); + String annotationFileName = HandleFileUtil.readFileName(fileNameList.get(i)); + File annotationFile = new File(dataSetRootFilePath + HandleFileUtil.generateFilePath(BusinessConstant.ANNOTATION) + BusinessConstant.FILE_SEPARATOR + annotationFileName + BusinessConstant.SUFFIX_JSON.toLowerCase()); JSONArray jsonArray = replaceJsonNode(annotationFile, dataLabelList); - minioUtil.upLoadFile(imageUploadFile, FileUtil.getInputStream(dataSetRootFilePath + HandleFileUtil.generateFilePath(BusinessConstant.IMAGE_ORIGIN) + BusinessConstant.FILE_SEPARATOR + fileName)); - minioUtil.upLoadFile(annotationFileBaseDir + annotationFileName, IOUtils.toInputStream(jsonArray.toString(), StandardCharsets.UTF_8.name())); - DataFile dataFile = new DataFile(); - dataFile.setName(annotationFileName); - dataFile.setUrl(minioConfig.getBucketName() + BusinessConstant.FILE_SEPARATOR + imageUploadFile); - dataFile.setStatus(FileStateCodeConstant.ANNOTATION_COMPLETE_FILE_STATE); - dataFile.setDatasetId(dataset.getId()); - dataFile.setFileType(MagicNumConstant.ZERO); - dataFile.setPid(MagicNumConstant.ZERO_LONG); - dataFile.setCreateUserId(dataset.getCreateUserId()); - try { - BufferedImage image = ImageIO.read(new File(dataSetRootFilePath + HandleFileUtil.generateFilePath(BusinessConstant.IMAGE_ORIGIN) + BusinessConstant.FILE_SEPARATOR + fileName)); + minioUtil.upLoadFile(dataSetRootFilePath + HandleFileUtil.generateFilePath(BusinessConstant.IMAGE_ORIGIN) + BusinessConstant.FILE_SEPARATOR + fileNameList.get(i), imageUploadFile); + String tempFilePath = annotationFile.getAbsolutePath() + "_temp.json"; + FileUtil.appendString(jsonArray.toJSONString(), tempFilePath, "UTF-8"); + minioUtil.upLoadFileByInputStream(annotationFileBaseDir + annotationFileName, tempFilePath); + FileUtil.del(tempFilePath); + datasetService.updateDatasetStatusIsImport(dataset); + DataFile dataFile = new DataFile(annotationFileName, dataset.getId(), minioConfig.getBucketName() + BusinessConstant.FILE_SEPARATOR + imageUploadFile, dataset.getCreateUserId(), + FileStateCodeConstant.ANNOTATION_COMPLETE_FILE_STATE, MagicNumConstant.ZERO, MagicNumConstant.ZERO_LONG, dataset.getCreateUserId()); + if (dataset.getDataType().compareTo(DatatypeEnum.IMAGE.getValue()) == 0) { + BufferedImage image; + try { + image = ImageIO.read(new File(dataSetRootFilePath + HandleFileUtil.generateFilePath(BusinessConstant.IMAGE_ORIGIN) + BusinessConstant.FILE_SEPARATOR + fileNameList.get(i))); + } catch (IOException e) { + throw new ImportDatasetException(" 读取图片高和宽失败 "); + } dataFile.setWidth(image.getWidth()); dataFile.setHeight(image.getHeight()); - } catch (IOException e) { - throw new ImportDatasetException(" 读取图片高和宽失败 "); } - dataFile.setOriginUserId(dataset.getCreateUserId()); dataFilesList.add(dataFile); - if (dataFilesList.size() % MagicNumConstant.FIVE_HUNDRED == MagicNumConstant.ZERO || atomicInteger.intValue() == fileNameList.size() - MagicNumConstant.ONE) { - long startDataFileIndex = generatorKeyUtil.getSequenceByBusinessCode(BusinessConstant.DATA_FILE, dataFilesList.size()); - for (DataFile dataFileEntity : dataFilesList) { - dataFileEntity.setId(startDataFileIndex++); + if (dataFilesList.size() % MagicNumConstant.FIVE_HUNDRED == MagicNumConstant.ZERO || i == fileNameList.size() - MagicNumConstant.ONE) { + if(!CollectionUtils.isEmpty(dataFilesList)){ + Queue dataFileIds = generatorKeyUtil.getSequenceByBusinessCode(BusinessConstant.DATA_FILE, dataFilesList.size()); + for (DataFile dataFileEntity : dataFilesList) { + dataFileEntity.setId(dataFileIds.poll()); + } + saveDataFile(dataFilesList); } - saveDataFile(dataFilesList); List dataVersionFileList = new ArrayList<>(); for (DataFile file : dataFilesList) { - DataVersionFile dataVersionFile = new DataVersionFile(); - dataVersionFile.setDatasetId(dataset.getId()); - dataVersionFile.setFileId(file.getId()); - dataVersionFile.setStatus(MagicNumConstant.ZERO); - dataVersionFile.setAnnotationStatus(FileStateCodeConstant.ANNOTATION_COMPLETE_FILE_STATE); + File annotationFileTxt = new File(dataSetRootFilePath + HandleFileUtil.generateFilePath(BusinessConstant.ANNOTATION) + BusinessConstant.FILE_SEPARATOR + file.getName() + BusinessConstant.SUFFIX_JSON.toLowerCase()); + JSONArray jsonArrayTxt = replaceJsonNode(annotationFileTxt, dataLabelList); + DataVersionFile dataVersionFile = new DataVersionFile(dataset.getId(), file.getId(), FileStateCodeConstant.ANNOTATION_COMPLETE_FILE_STATE, MagicNumConstant.ZERO, file.getName()); dataVersionFileList.add(dataVersionFile); + if (DatatypeEnum.TXT.getValue().equals(dataset.getDataType())) { + try { + String bucketName = StringUtils.substringBefore(file.getUrl(), "/"); + String fullFilePath = StringUtils.substringAfter(file.getUrl(), "/"); + String content = minioUtil.readString(bucketName, fullFilePath); + Map jsonMap = new HashMap<>(); + jsonMap.put("content", content); + jsonMap.put("name", file.getName()); + jsonMap.put("status", FileStateCodeConstant.ANNOTATION_COMPLETE_FILE_STATE.toString()); + jsonMap.put("datasetId", dataset.getId().toString()); + jsonMap.put("createUserId", file.getCreateUserId() == null ? null : file.getCreateUserId().toString()); + jsonMap.put("createTime", file.getCreateTime() == null ? null : file.getCreateTime().toString()); + jsonMap.put("updateUserId", file.getUpdateUserId() == null ? null : file.getUpdateUserId().toString()); + jsonMap.put("updateTime", file.getUpdateTime() == null ? null : file.getUpdateTime().toString()); + jsonMap.put("fileType", file.getFileType() == null ? null : file.getFileType().toString()); + jsonMap.put("enhanceType", file.getEnhanceType() == null ? null : file.getEnhanceType().toString()); + jsonMap.put("originUserId", file.getOriginUserId().toString()); + jsonMap.put("prediction", jsonArrayTxt.getJSONObject(0).get("score").toString()); + jsonMap.put("labelId", jsonArrayTxt.getJSONObject(0).get("category_id").toString()); + jsonMap.put("versionName", StringUtils.isEmpty(dataset.getCurrentVersionName())?"V0000" : dataset.getCurrentVersionName()); + bulkProcessor.add(new IndexRequest(esIndex, "_doc", file.getId().toString()).source(jsonMap)); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_DATASET, "上传es失败: {} ", e); + } + } } - long startDataFileVersionIndex = generatorKeyUtil.getSequenceByBusinessCode(BusinessConstant.DATA_VERSION_FILE, dataVersionFileList.size()); - for (DataVersionFile dataVersionFile : dataVersionFileList) { - dataVersionFile.setId(startDataFileVersionIndex++); + if(!CollectionUtils.isEmpty(dataVersionFileList)){ + Queue dataFileVersionIds = generatorKeyUtil.getSequenceByBusinessCode(BusinessConstant.DATA_VERSION_FILE, dataVersionFileList.size()); + for (DataVersionFile dataVersionFile : dataVersionFileList) { + dataVersionFile.setId(dataFileVersionIds.poll()); + } + saveDataVersionFile(dataVersionFileList); } - saveDataVersionFile(dataVersionFileList); + List dataFileAnnotations = new ArrayList<>(); + for (DataVersionFile dataVersionFile : dataVersionFileList) { + File annotationFileDb = new File(dataSetRootFilePath + HandleFileUtil.generateFilePath(BusinessConstant.ANNOTATION) + BusinessConstant.FILE_SEPARATOR + dataVersionFile.getFileName() + BusinessConstant.SUFFIX_JSON.toLowerCase()); + JSONArray jsonArrayDb = replaceJsonNode(annotationFileDb, dataLabelList); + List annotationDTOSDb = JSONObject.parseArray(jsonArrayDb.toJSONString(), AnnotationDTO.class); + if(!CollectionUtils.isEmpty(jsonArrayDb)){ + if (AnnotateTypeEnum.CLASSIFICATION.getValue().equals(dataset.getAnnotateType()) || AnnotateTypeEnum.TEXT_CLASSIFICATION.getValue().equals(dataset.getAnnotateType())) { + AnnotationDTO annotationDTO = annotationDTOSDb.stream().max(Comparator.comparingDouble(AnnotationDTO::getScore)).get(); + Long labelId1 = annotationDTO.getCategoryId(); + Double perdiction = annotationDTO.getScore(); + dataFileAnnotations.add(new DataFileAnnotation(dataset.getId(), labelId1, dataVersionFile.getId(), perdiction, dataset.getCreateUserId(), dataVersionFile.getFileName())); + } + if (AnnotateTypeEnum.OBJECT_DETECTION.getValue().equals(dataset.getAnnotateType()) || AnnotateTypeEnum.OBJECT_TRACK.getValue().equals(dataset.getAnnotateType()) + || AnnotateTypeEnum.SEMANTIC_CUP.getValue().equals(dataset.getAnnotateType())) { + for (int j = 0; j < jsonArrayDb.size(); j++) { + Object perdictionObject = jsonArrayDb.getJSONObject(j).get("score"); + Double perdiction = null; + if (!Objects.isNull(perdictionObject)) { + perdiction = Double.parseDouble(String.valueOf(perdictionObject)); + } + Long labelId = (Long) jsonArrayDb.getJSONObject(j).get("category_id"); + DataFileAnnotation dataFileAnnotation = new DataFileAnnotation(dataset.getId(), labelId, dataVersionFile.getId(), perdiction, dataset.getCreateUserId(), dataVersionFile.getFileName()); + dataFileAnnotations.add(dataFileAnnotation); + } + } + + } + if(!CollectionUtils.isEmpty(dataFileAnnotations)){ + Queue dataFileAnnotationIds = generatorKeyUtil.getSequenceByBusinessCode(BusinessConstant.DATA_FILE_ANNOTATION, dataFileAnnotations.size()); + for (DataFileAnnotation dataFileAnnotation : dataFileAnnotations) { + dataFileAnnotation.setId(dataFileAnnotationIds.poll()); + } + saveDataFileAnnotation(dataFileAnnotations); + } + dataFileAnnotations.clear(); + } + ProcessBarUtil.processBar01((long) dataVersionFileList.size()); dataVersionFileList.clear(); dataFilesList.clear(); } - atomicInteger.getAndIncrement(); + success++; } - return atomicInteger.getAndIncrement(); + return success; } + /** + * 截取文本摘要信息 + * + * @param file 文本file + * @return String 文本摘要信息 + */ + public String InterceptingText(File file) { + String result = ""; + try { + BufferedReader br = new BufferedReader(new FileReader(file)); + String s = null; + while ((s = br.readLine()) != null) { + result = result + s; + } + br.close(); + } catch (Exception e) { + e.printStackTrace(); + } + String abstractTxt = StringUtils.substring(result, MagicNumConstant.ZERO, MagicNumConstant.FOUR_HUNDRED); + return abstractTxt; + } + /** * 检查并且替换JSON中的节点 * @@ -350,6 +492,34 @@ public class DatasetImportHandle { return listKey.stream().distinct().collect(Collectors.toList()); } + /** + * 校验json文件中是否包含name + * + * @param file 标注文件 + * @return true/false true 包含 false 不包含 + */ + public boolean containsJsonKey(File file) { + boolean flag = true; + String annotationFileContext; + try { + annotationFileContext = HandleFileUtil.readFile(file); + } catch (IOException e) { + throw new ImportDatasetException(" 解析【" + file.getName() + "】文件出错,请确认内容是否正确"); + } + if (!StringUtils.isEmpty(annotationFileContext)) { + JSONArray jsonArray = JSONArray.parseArray(annotationFileContext); + for (Object object : jsonArray) { + LinkedHashMap jsonMap = JSON.parseObject(object.toString(), new TypeReference>() { + }); + if (!jsonMap.containsKey("name")) { + flag = false; + } + } + } + return flag; + } + + /** * 读取标签文件中标签数据 * @@ -376,7 +546,7 @@ public class DatasetImportHandle { String groupName = HandleFileUtil.getLabelGroupName(labelGroupName); int count = dataLabelGroupService.selectByLabelGroupName(groupName); if (count > MagicNumConstant.ZERO) { - throw new ImportDatasetException(" 标签组名称【" + groupName + "】已存在,请修改label_xxx.json文件名 "); + throw new ImportDatasetException(" 标签组名称【" + groupName + "】已存在,请修改label_{name}.json文件名 "); } } @@ -390,6 +560,11 @@ public class DatasetImportHandle { public DataLabelGroup saveDataLabelGroup(String labelGroupName, Dataset dataset) { long timeStamp = System.currentTimeMillis(); DataLabelGroup dataLabelGroup = new DataLabelGroup(); + if (dataset.getDataType().equals(DatatypeEnum.TXT.getValue())) { + dataLabelGroup.setLabelGroupType(MagicNumConstant.ONE); + } else { + dataLabelGroup.setLabelGroupType(MagicNumConstant.ZERO); + } dataLabelGroup.setName(labelGroupName); dataLabelGroup.setOriginUserId(dataset.getCreateUserId()); dataLabelGroup.setType(MagicNumConstant.ZERO_LONG); @@ -422,7 +597,7 @@ public class DatasetImportHandle { List listDataGroupLabel = new ArrayList<>(); for (DatasetDataLabel datasetDataLabel : listDatasetDataLabel) { DataGroupLabel dataGroupLabel = new DataGroupLabel(); - dataGroupLabel.setLabelId(datasetDataLabel.getId()); + dataGroupLabel.setLabelId(datasetDataLabel.getLabelId()); dataGroupLabel.setLabelGroupId(dataLabelGroupId); listDataGroupLabel.add(dataGroupLabel); } @@ -435,6 +610,7 @@ public class DatasetImportHandle { * * @param listDataFile file集合 */ + @Transactional(rollbackFor = Exception.class) public void saveDataFile(List listDataFile) { dataFileService.saveBatchDataFile(listDataFile); } @@ -444,6 +620,7 @@ public class DatasetImportHandle { * * @param listDataVersionFile 文件版本数据 */ + @Transactional(rollbackFor = Exception.class) public void saveDataVersionFile(List listDataVersionFile) { dataVersionFileService.saveBatchDataFileVersion(listDataVersionFile); } @@ -454,6 +631,7 @@ public class DatasetImportHandle { * * @param listDatasetDataLabel 标签与数据集关系表 */ + @Transactional(rollbackFor = Exception.class) public void saveDatasetDataLabel(List listDatasetDataLabel) { datasetDataLabelService.saveBatchDatasetDataLabel(listDatasetDataLabel); } @@ -461,12 +639,23 @@ public class DatasetImportHandle { /** * 批量保存标签与标签组的关系 * - * @param listDataGroupLabel 标签与标签组集合 + * @param listDataGroupLabel 标签与标签组集合 */ + @Transactional(rollbackFor = Exception.class) public void saveDatasetDataGroupLabel(List listDataGroupLabel) { dataGroupLabelService.saveDataGroupLabel(listDataGroupLabel); } + /** + * 批量保存nlp中间表 + * + * @param dataFileAnnotations nlp集合 + */ + @Transactional(rollbackFor = Exception.class) + public void saveDataFileAnnotation(List dataFileAnnotations) { + dataFileAnnotationService.saveDataFileAnnotation(dataFileAnnotations); + } + /** * 查询数据集 * @@ -480,26 +669,42 @@ public class DatasetImportHandle { /** * 校验数据集ID * - * @param scanner 控制台输入参数 + * @param scanner 控制台输入参数 * @return Dataset 数据集 */ public Dataset verificationDatasetId(Scanner scanner) { - log.warn("# 请输入数据集ID #"); - String datasetIdStr = scanner.nextLine(); - long datasetId; - try { - datasetId = Long.parseLong(datasetIdStr.trim()); - } catch (Exception e) { - throw new ImportDatasetException(" 数据集ID非法,请重新输入 "); - } - Dataset dataset = findDataset(datasetId); - if (dataset == null) { - throw new ImportDatasetException(" 数据集ID不存在,请重新输入 "); - } - int countDataLabel = datasetService.findDataLabelById(dataset.getId()); - int countDataFile = datasetService.findDataFileById(dataset.getId()); - if (countDataLabel > MagicNumConstant.ZERO || countDataFile > MagicNumConstant.ZERO) { - throw new ImportDatasetException(" 当前数据集文件已存在,请勿重新导入 "); + boolean flag = false; + Dataset dataset = new Dataset(); + while (!flag) { + System.out.println(" "); + System.out.println("# 请输入数据集ID #"); + String datasetIdStr = scanner.nextLine(); + long datasetId = 0; + try { + datasetId = Long.parseLong(datasetIdStr.trim()); + } catch (Exception e) { + log.error(""); + PrintUtils.printLine(" Error: 数据集ID非法,请重新输入", PrintUtils.RED); + log.error(""); + continue; + } + dataset = findDataset(datasetId); + if (dataset == null) { + log.error(""); + PrintUtils.printLine(" Error: 数据集ID不存在,请重新输入", PrintUtils.RED); + log.error(""); + continue; + } + int countDataLabel = datasetService.findDataLabelById(dataset.getId()); + int countDataFile = datasetService.findDataFileById(dataset.getId()); + if (countDataLabel > MagicNumConstant.ZERO || countDataFile > MagicNumConstant.ZERO) { + log.error(""); + PrintUtils.printLine(" Error: 当前数据集文件已存在,请勿重新导入 ", PrintUtils.RED); + log.error(""); + continue; + } else { + flag = true; + } } return dataset; } @@ -511,11 +716,21 @@ public class DatasetImportHandle { * @return String 字符串 */ public String verificationFilePath(Scanner scanner) { - log.warn("# 请输入待导入数据集绝对路径地址 #"); - String filePath = scanner.nextLine(); - File file = new File(filePath.trim()); - if (!file.exists()) { - throw new ImportDatasetException(" 【" + filePath + "】 文件路径不存在,请重新输入"); + boolean flag = false; + String filePath = ""; + while (!flag) { + System.out.println(" "); + System.out.println("# 请输入待导入本地数据集绝对路径 #"); + filePath = scanner.nextLine(); + File file = new File(filePath.trim()); + if (!file.exists()) { + log.error(""); + PrintUtils.printLine(" 【" + filePath + "】文件路径不存在,请重新输入", PrintUtils.RED); + log.error(""); + continue; + } else { + flag = true; + } } return filePath; } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/handle/PresetDatasetImportHandle.java b/dataset-util/src/main/java/org/dubhe/datasetutil/handle/PresetDatasetImportHandle.java new file mode 100644 index 0000000..5a49407 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/handle/PresetDatasetImportHandle.java @@ -0,0 +1,938 @@ +/** + * Copyright 2020 Zhejiang Lab. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.datasetutil.handle; + +import cn.hutool.core.io.FileUtil; +import cn.hutool.core.util.RandomUtil; +import cn.hutool.core.util.StrUtil; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; +import com.google.common.collect.Lists; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.dubhe.datasetutil.common.base.MagicNumConstant; +import org.dubhe.datasetutil.common.config.MinioConfig; +import org.dubhe.datasetutil.common.constant.BusinessConstant; +import org.dubhe.datasetutil.common.constant.FileStateCodeConstant; +import org.dubhe.datasetutil.common.enums.DatatypeEnum; +import org.dubhe.datasetutil.common.enums.LogEnum; +import org.dubhe.datasetutil.common.enums.PresetDatasetEnum; +import org.dubhe.datasetutil.common.exception.ImportDatasetException; +import org.dubhe.datasetutil.common.util.*; +import org.dubhe.datasetutil.domain.dto.FileAnnotationDTO; +import org.dubhe.datasetutil.domain.entity.*; +import org.dubhe.datasetutil.service.*; +import org.elasticsearch.action.bulk.BulkProcessor; +import org.elasticsearch.action.index.IndexRequest; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.CollectionUtils; +import org.springframework.util.ObjectUtils; + +import javax.annotation.Resource; +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.LocalDateTime; +import java.util.*; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import static org.dubhe.datasetutil.common.constant.BusinessConstant.FILE_SEPARATOR; + +/** + * @description 导入预置数据集工具类 + * @date 2020-10-12 + */ +@Slf4j +@Component +public class PresetDatasetImportHandle { + + /** + * esSearch索引 + */ + @Value("${es.index}") + private String esIndex; + + @Autowired + private DatasetService datasetService; + + @Autowired + private DataFileService dataFileService; + + @Autowired + private DataLabelService dataLabelService; + + @Autowired + private DatasetDataLabelService datasetDataLabelService; + + @Autowired + private DataVersionFileService dataVersionFileService; + + @Autowired + private DataFileAnnotationService dataFileAnnotationService; + + @Autowired + private MinioUtil minioUtil; + + @Autowired + private MinioConfig minioConfig; + + @Autowired + private GeneratorKeyUtil generatorKeyUtil; + + @Resource + private BulkProcessor bulkProcessor; + + private final AtomicInteger fileCount = new AtomicInteger(); + + + private final List annotationFiles = new LinkedList<>(); + + private final List originFiles = new LinkedList<>(); + + private final Map fileAnnotationMap = new ConcurrentHashMap<>(); + + + @Value("${minio.dosAddress}") + private String dosAddress; + + private final static Set datasetIds = new HashSet<>(); + + + private volatile List labels = new ArrayList<>(); + + static { + PresetDatasetEnum[] values = PresetDatasetEnum.values(); + for (PresetDatasetEnum datasetEnum : values) { + datasetIds.add(datasetEnum.getType()); + } + } + + /** + * 导入预置数据集 + * + * @param scanner 控制台输入数据 + */ + public synchronized void importPresetDataset(Scanner scanner) { + //校验数据集信息 + long datasetId = verificationDatasetId(scanner); + try { + LocalDateTime startTime = LocalDateTime.now(); + //校验文件目录并保存sql文件信息 + String rootPath = verificationFilePathAndSaveSqlData(scanner, datasetId); + //构建上传文件路径数据 + Dataset dataset = findDataset(datasetId); + if (Objects.isNull(dataset)) { + throw new ImportDatasetException("数据集ID: " + datasetId + "不存在!"); + } + //上传文件到 minio + executeUploadToMinio(dataset, rootPath); + executeUploadToDB(dataset); + LocalDateTime endTime = LocalDateTime.now(); + Duration between = Duration.between(startTime, endTime); + log.warn(""); + PrintUtils.printLine(" Success: 执行成功 ", PrintUtils.GREEN); + PrintUtils.printLine(" 执行开始时间:{" + startTime + "} 执行结束时间:{" + endTime + "} 执行总时长(分钟){" + between.toMinutes() + "}", PrintUtils.YELLOW); + log.warn(""); + System.out.println("# 是否结束? Y / N #"); + Scanner scannerExit = new Scanner(System.in); + if (BusinessConstant.Y.toLowerCase().equals(scannerExit.nextLine().toLowerCase())) { + System.exit(MagicNumConstant.ZERO); + } + } catch (Exception e) { + log.error(""); + PrintUtils.printLine(" Error:" + e.getMessage(), PrintUtils.RED); + log.error(""); + Dataset dataset = findDataset(datasetId); + if (!Objects.isNull(dataset)) { + PrintUtils.printLine(" 执行异常,正在清理异常数据,请勿关闭窗口 ", PrintUtils.RED); + //删除minio数据 + delDatasetMinioInfo(dataset.getUri()); + //删除数据集信息 + delDatasetInfoById(datasetId, dataset.getDataType()); + } + } finally { + originFiles.clear(); + annotationFiles.clear(); + labels.clear(); + } + + } + + + /** + * 实际上传文件到Minio + * + * @param dataset 数据集实体 + * @param rootPath 文件根路径 + * @throws Exception 上传异常 + */ + private void executeUploadToMinio(Dataset dataset, String rootPath) throws Exception { + List allFileList = new LinkedList<>(annotationFiles); + allFileList.addAll(originFiles); + log.warn("........系统需要处理:【" + allFileList.size() + "】份文件,请勿关闭窗口........."); + int batchNumber = MagicNumConstant.ZERO; + int oneSize = ThreadUtils.createThread(allFileList.size()); + ProcessBarUtil.initProcess("预置数据集导入", (long) allFileList.size()); + if (allFileList.size() > MagicNumConstant.TEN_THOUSAND) { + log.warn("........系统处理中........."); + List> partitionList = Lists.partition(allFileList, MagicNumConstant.FIVE_THOUSAND); + for (List imageFileNameList1 : partitionList) { + batchNumber++; + dealFileListToMinio(imageFileNameList1, oneSize, dataset, batchNumber, rootPath); + } + } else { + log.warn("........系统处理中........."); + batchNumber++; + dealFileListToMinio(allFileList, oneSize, dataset, batchNumber, rootPath); + } + + } + + + /** + * 实际上传文件到数据库 + * + * @param dataset 数据集实体 + * @throws Exception 上传异常 + */ + private void executeUploadToDB(Dataset dataset) throws Exception { + log.warn("........系统需要处理:【" + originFiles.size() + "】份文件到数据库,请勿关闭窗口........."); + int batchNumber = MagicNumConstant.ZERO; + int oneSize = ThreadUtils.createThread(originFiles.size()); + //视频数据导入单线程顺序处理 + if (DatatypeEnum.VIDEO.getValue().compareTo(dataset.getDataType()) == 0) { + sortByName(originFiles); + runTaskSql(originFiles, dataset); + log.warn("#-------------系统已总共成功处理文件 【" + oneSize + "】个-------------#"); + return; + } + if (originFiles.size() > MagicNumConstant.TEN_THOUSAND) { + List> partitionList = Lists.partition(originFiles, MagicNumConstant.FIVE_THOUSAND); + for (List imageFileNameList1 : partitionList) { + batchNumber++; + LogUtil.info(LogEnum.BIZ_DATASET, "第: 【" + batchNumber + "】批次,需要处理:【" + imageFileNameList1.size() + "】 文件: "); + dealFileListToSql(imageFileNameList1, oneSize, dataset, batchNumber); + } + } else { + batchNumber++; + dealFileListToSql(originFiles, oneSize, dataset, batchNumber); + } + + } + + + /** + * 多线程上传数据到minio + * + * @param allFileList 文件数据 + * @param oneSize 每次处理次数 + * @param dataset 数据集实体 + * @param batchNumber 上传批次 + * @param rootPath 根路径 + * @throws Exception 上传异常 + */ + public void dealFileListToMinio(List allFileList, int oneSize, Dataset dataset, int batchNumber, String rootPath) throws Exception { + List> partitions = new LinkedList<>(); + List need = new LinkedList<>(); + for (int i = 0; i < allFileList.size(); i++) { + need.add(allFileList.get(i)); + if (need.size() == oneSize || i == allFileList.size() - MagicNumConstant.ONE) { + List fileNameList = new LinkedList<>(need); + + need.clear(); + partitions.add(() -> runTask(fileNameList, dataset)); + } + } + ThreadUtils.runMultiThread(partitions); + } + + + /** + * 多线程上传数据到sql + * + * @param allFileList 文件数据 + * @param oneSize 每次处理次数 + * @param dataset 数据集实体 + * @param batchNumber 上传批次 + * @throws Exception 上传异常 + */ + public void dealFileListToSql(List allFileList, int oneSize, Dataset dataset, int batchNumber) throws Exception { + int dealSize = MagicNumConstant.ZERO; + List> partitions = new LinkedList<>(); + List need = new LinkedList<>(); + for (int i = 0; i < allFileList.size(); i++) { + need.add(allFileList.get(i)); + if (need.size() == oneSize || i == allFileList.size() - MagicNumConstant.ONE) { + List fileNameList = new LinkedList<>(need); + dealSize += fileNameList.size(); + LogUtil.info(LogEnum.BIZ_DATASET, "系统将处理第: 【" + batchNumber + "】批次,需要处理:【" + dealSize + "】个文件至数据库"); + need.clear(); + partitions.add(() -> runTaskSql(fileNameList, dataset)); + } + } + ThreadUtils.runMultiThread(partitions); + } + + + /** + * 实际实际上传执行方法 + * + * @param files 上传文件 + * @param dataset 数据集实体 + * @return 执行次数 + */ + private Integer runTaskSql(List files, Dataset dataset) { + Integer success = MagicNumConstant.ZERO; + List dataFilesList = new LinkedList<>(); + for (int i = 0; i < files.size(); i++) { + File file = files.get(i); + //绝对路径 + String absolutePath = file.getAbsolutePath(); + //根目录 /${datasetID}/ + String rootName = BusinessConstant.FILE_SEPARATOR + dataset.getId() + BusinessConstant.FILE_SEPARATOR; + // dubhe-dev/dataset/${datasetID}/origin/${a.jpg} + String fileName = minioConfig.getBucketName() + File.separator + BusinessConstant.MINIO_ROOT_PATH + rootName + + StringUtils.substringAfter(absolutePath, File.separator + dataset.getId() + File.separator); + //转换 Linux 斜杠 + String targetFilePath = StringUtils.replaceChars(fileName, "\\", "/"); + //构建 dataset对象 + DataFile dataFile = new DataFile(); + dataFile.setName(HandleFileUtil.readFileName(file.getName())); + dataFile.setUrl(targetFilePath); + dataFile.setStatus(FileStateCodeConstant.ANNOTATION_COMPLETE_FILE_STATE); + dataFile.setDatasetId(dataset.getId()); + dataFile.setFileType(MagicNumConstant.ZERO); + dataFile.setPid(MagicNumConstant.ZERO_LONG); + dataFile.setCreateUserId(dataset.getCreateUserId()); + dataFile.setOriginUserId(MagicNumConstant.ZERO_LONG); + if (dataset.getDataType().compareTo(DatatypeEnum.IMAGE.getValue()) == 0) { + try { + BufferedImage image = ImageIO.read(file); + dataFile.setWidth(image.getWidth()); + dataFile.setHeight(image.getHeight()); + } catch (IOException e) { + throw new ImportDatasetException(" 读取图片高和宽失败 "); + } + } + dataFile.setOriginUserId(MagicNumConstant.ZERO_LONG); + dataFilesList.add(dataFile); + // 500 写一次库 或者最后写一次库 + if (dataFilesList.size() % MagicNumConstant.FIVE_HUNDRED == MagicNumConstant.ZERO || i == files.size() - MagicNumConstant.ONE) { + Queue dataFileIds = generatorKeyUtil.getSequenceByBusinessCode(BusinessConstant.DATA_FILE, dataFilesList.size()); + for (DataFile dataFileEntity : dataFilesList) { + dataFileEntity.setId(dataFileIds.poll()); + } + //写 dataset_file 表 + dataFileService.saveBatchDataFile(dataFilesList); + //构建 DatasetVersionFile对象 + List dataVersionFileList = new ArrayList<>(); + for (DataFile datasetFile : dataFilesList) { + DataVersionFile dataVersionFile = new DataVersionFile(); + dataVersionFile.setDatasetId(dataset.getId()); + dataVersionFile.setFileId(datasetFile.getId()); + dataVersionFile.setStatus(MagicNumConstant.TWO); + dataVersionFile.setVersionName(dataset.getDataType().compareTo(DatatypeEnum.TXT.getValue()) == 0 ? null : BusinessConstant.V0001); + dataVersionFile.setAnnotationStatus(FileStateCodeConstant.ANNOTATION_COMPLETE_FILE_STATE); + dataVersionFile.setFileName(datasetFile.getName()); + dataVersionFileList.add(dataVersionFile); + } + Queue dataFileVersionIds = generatorKeyUtil.getSequenceByBusinessCode(BusinessConstant.DATA_VERSION_FILE, dataVersionFileList.size()); + for (DataVersionFile dataVersionFile : dataVersionFileList) { + dataVersionFile.setId(dataFileVersionIds.poll()); + } + //写 dataset_version_file 表 + dataVersionFileService.saveBatchDataFileVersion(dataVersionFileList); + + List dataFileAnnotations = dataVersionFileList.stream().map(dataVersionFile -> { + + FileAnnotationDTO fileAnnotationDTO = null; + try { + fileAnnotationDTO = fileAnnotationMap.get(dataVersionFile.getFileName()); + //构建 datasetFileAnnotation 对象 + DataFileAnnotation dataFileAnnotation = DataFileAnnotation.builder() + .datasetId(dataset.getId()) + .LabelId(ObjectUtils.isEmpty(fileAnnotationDTO) ? null : fileAnnotationDTO.getCategoryId()) + .prediction(1D) + .versionFileId(dataVersionFile.getId()) + .build(); + if (DatatypeEnum.TXT.getValue().equals(dataset.getDataType())) { + try { + String bucketName = StringUtils.substringBefore(dataFile.getUrl(), "/"); + String fullFilePath = StringUtils.substringAfter(dataFile.getUrl(), "/"); + String content = minioUtil.readString(bucketName, fullFilePath); + Map jsonMap = new HashMap<>(); + jsonMap.put("content", content); + jsonMap.put("name", dataFile.getName()); + jsonMap.put("status", FileStateCodeConstant.ANNOTATION_COMPLETE_FILE_STATE.toString()); + jsonMap.put("datasetId", dataset.getId().toString()); + jsonMap.put("createUserId", dataFile.getCreateUserId() == null ? null : dataFile.getCreateUserId().toString()); + jsonMap.put("createTime", dataFile.getCreateTime() == null ? null : dataFile.getCreateTime().toString()); + jsonMap.put("updateUserId", dataFile.getUpdateUserId() == null ? null : dataFile.getUpdateUserId().toString()); + jsonMap.put("updateTime", dataFile.getUpdateTime() == null ? null : dataFile.getUpdateTime().toString()); + jsonMap.put("fileType", dataFile.getFileType() == null ? null : dataFile.getFileType().toString()); + jsonMap.put("enhanceType", dataFile.getEnhanceType() == null ? null : dataFile.getEnhanceType().toString()); + jsonMap.put("originUserId", dataFile.getOriginUserId().toString()); + jsonMap.put("prediction", "1"); + jsonMap.put("labelId", dataFileAnnotation.getLabelId().toString()); + jsonMap.put("versionName", StringUtils.isEmpty(dataset.getCurrentVersionName())?"V0000" : dataset.getCurrentVersionName()); + IndexRequest request = new IndexRequest(esIndex); + request.source(jsonMap); + request.id(dataVersionFile.getFileId().toString()); + bulkProcessor.add(request); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_DATASET, "上传es失败: {} ", e); + } + } + return ObjectUtils.isEmpty(dataFileAnnotation.getLabelId()) ? null : dataFileAnnotation; + } catch (Exception e) { + e.printStackTrace(); + } + return null; + } + + ).filter(dataVersionFile -> !ObjectUtils.isEmpty(dataVersionFile)).collect(Collectors.toList()); + Queue dataFileAnnotationIds = generatorKeyUtil.getSequenceByBusinessCode(BusinessConstant.DATA_FILE_ANNOTATION, dataFileAnnotations.size()); + for (DataFileAnnotation dataFileAnnotation : dataFileAnnotations) { + dataFileAnnotation.setId(dataFileAnnotationIds.poll()); + } + //写 dataset_file_annotation 表 + dataFileAnnotationService.saveDataFileAnnotation(dataFileAnnotations); + + dataFileAnnotations.clear(); + + dataVersionFileList.clear(); + dataFilesList.clear(); + } + success++; + } + bulkProcessor.flush(); + return success; + + } + + /** + * 实际执行任务 + * + * @param files 上传文件 + * @param dataset 数据集 + * @return Integer 执行次数 + */ + private Integer runTask(List files, Dataset dataset) throws Exception { + Integer success = MagicNumConstant.ZERO; + + for (int i = 0; i < files.size(); i++) { + File file = files.get(i); + File parentFile = file.getParentFile(); + String absolutePath = file.getAbsolutePath(); + String rootName = BusinessConstant.FILE_SEPARATOR + dataset.getId() + BusinessConstant.FILE_SEPARATOR; + String fileName = StringUtils.substringAfter(absolutePath, File.separator + dataset.getId() + File.separator); + String targetFilePath = StringUtils.replaceChars(BusinessConstant.MINIO_ROOT_PATH + rootName + fileName, "\\", "/"); + + if (BusinessConstant.ANNOTATION.equals(parentFile.getName()) || ( + BusinessConstant.ANNOTATION.equals(parentFile.getParentFile().getName()) && + BusinessConstant.V0001.equals(parentFile.getName()) + )) { + targetFilePath = buildFileName(targetFilePath); + JSONArray jsonArray = replaceJsonNode(file, labels, dataset); + String tempFilePath = absolutePath + "_temp.json"; + FileUtil.appendString(jsonArray.toJSONString(), tempFilePath, "UTF-8"); + minioUtil.upLoadFileByInputStream(targetFilePath, tempFilePath); + FileUtil.del(tempFilePath); + } else { + minioUtil.upLoadFile(absolutePath, targetFilePath); + } + ProcessBarUtil.processBar01(1L); + success++; + } + + return success; + + } + + + /** + * 构建文件名称 + * + * @param fileName 文件名称 + * @return 构建后文件名称 + */ + public String buildFileName(String fileName) { + if (fileName.toLowerCase().endsWith(BusinessConstant.SUFFIX_JSON.toLowerCase())) { + fileName = StringUtils.substringBefore(fileName, BusinessConstant.SUFFIX_JSON.toLowerCase()); + } + return fileName; + } + + /** + * 校验数据集ID + * + * @param scanner 控制台输入参数 + */ + public long verificationDatasetId(Scanner scanner) { + boolean flag = false; + long datasetId = 0; + while (!flag) { + System.out.println(""); + System.out.println("# 请选择预置数据集 (参考文档: " + dosAddress + ") #"); + System.out.println(""); + for (PresetDatasetEnum presetDatasetEnum : PresetDatasetEnum.values()) { + StringBuffer sb = new StringBuffer(); + sb.append("# ").append(presetDatasetEnum.getType()).append(":").append(presetDatasetEnum.getDesc()).append(" "); + System.out.println(sb.toString()); + } + String datasetIdStr = scanner.nextLine(); + + try { + datasetId = Long.parseLong(datasetIdStr.trim()); + } catch (Exception e) { + log.error(""); + PrintUtils.printLine(" Error: 数据集ID非法,请重新输入", PrintUtils.RED); + log.error(""); + continue; + } + + long finalDatasetId = datasetId; + Optional datasetEnum = Arrays.stream(PresetDatasetEnum.values()).filter(a -> a.getType().equals(String.valueOf(finalDatasetId))).findAny(); + if (!datasetEnum.isPresent()) { + log.error(""); + PrintUtils.printLine(" Error: 数据集ID不属于预置数据集ID", PrintUtils.RED); + log.error(""); + continue; + } + + + Dataset dataset = findDataset(datasetId); + if (!Objects.isNull(dataset)) { + log.error(""); + PrintUtils.printLine(" Error: 数据集已存在,请重新选择", PrintUtils.RED); + log.error(""); + continue; + } + + + flag = true; + } + + return datasetId; + } + + /** + * 读取标签文件中标签数据 + * + * @param file 标签文件 + * @return List 标签数据集合 + */ + public List readLabelContext(File file) throws IOException { + String fileContext = HandleFileUtil.readFile(file); + List dataLabelList = JSONArray.parseArray(fileContext, DataLabel.class); + for (DataLabel dataLabel : dataLabelList) { + if (StringUtils.isEmpty(dataLabel.getName()) || StringUtils.isEmpty(dataLabel.getColor())) { + throw new ImportDatasetException(" 标签文件不规范,未能读到 'name' 或者 'color' "); + } + } + return dataLabelList; + } + + + /** + * 查询数据集 + * + * @param datasetId 数据集Id + * @return Dataset 根据数据集ID查询返回的数据集 + */ + private Dataset findDataset(Long datasetId) { + return datasetService.findDatasetByIdNormal(datasetId); + } + + + /** + * 校验文件路径 + * + * @param scanner 输入控制台 + * @param datasetId 数据集ID + * @return String 字符串 + */ + public String verificationFilePathAndSaveSqlData(Scanner scanner, Long datasetId) throws Exception { + boolean flag = false; + String filePath = ""; + while (!flag) { + System.out.println(" "); + System.out.println("# 请输入待上传本地预置数据集的完整路径 #"); + filePath = scanner.nextLine(); + File file = new File(filePath.trim()); + + if (!file.exists()) { + log.error(""); + PrintUtils.printLine(" 【" + filePath + "】 文件路径不存在,请重新输入", PrintUtils.RED); + log.error(""); + continue; + } else { + //校验文件目录是否合法并保存sql文件数据 + log.info("........数据校验开始,请勿关闭窗口................."); + checkFileDirectoryAndSaveSqlData(filePath, datasetId); + log.info("........数据校验完成,即将执行下一步操作,请勿关闭窗口................."); + flag = true; + } + } + return filePath; + } + + + /** + * 读取并保存sql文件中数据 + * + * @param file sql文件 + */ + @Transactional(rollbackFor = Exception.class) + public void readAndSaveSqlData(File file) throws Exception { + List list = HandleFileUtil.readFileInfo(file); + if (!CollectionUtils.isEmpty(list)) { + datasetService.saveBatch(list); + } + } + + + /** + * 检查并且替换JSON中的节点 + * + * @param annotationFile 标注文件 + * @param dataLabelList 数据集集合 + * @param dataset 数据集实体 + * @return 标签json数据 + * @throws IOException + */ + public JSONArray replaceJsonNode(File annotationFile, List dataLabelList, Dataset dataset) throws IOException { + JSONArray jsonArray = new JSONArray(); + if (annotationFile.exists()) { + String annotationFileContext = HandleFileUtil.readFile(annotationFile); + jsonArray = JSONArray.parseArray(annotationFileContext); + if (!jsonArray.isEmpty()) { + replaceAllNode(jsonArray, dataLabelList, dataset, annotationFile.getName()); + } + } + return jsonArray; + } + + /** + * 替换节点值 + * + * @param jsonArray 标注文件集合 + * @param dataLabelList 标签集合 + * @param dataset 数据集实体 + * @param fileName 文件名称 + */ + public void replaceAllNode(JSONArray jsonArray, List dataLabelList, Dataset dataset, String fileName) { + for (int i = MagicNumConstant.ZERO; i < jsonArray.size(); i++) { + JSONObject jsonObject = jsonArray.getJSONObject(i); + jsonObject.put("category_id", findDataLabelId(dataLabelList, jsonObject.get("name").toString())); + FileAnnotationDTO annotationDTO = jsonObject.toJavaObject(FileAnnotationDTO.class); + fileAnnotationMap.put(buildFileName(fileName), annotationDTO); + jsonObject.put("category_id",jsonObject.get("name")); + jsonObject.remove("name"); + } + } + + /** + * 查询需要替换的节点 + * + * @param dataLabelList 标签集合 + * @param objectValue 替换的节点值 + * @return long 替换标签的Id + */ + public long findDataLabelId(List dataLabelList, String objectValue) { + Optional matchedDataLabel = dataLabelList.stream().filter(dataLabel -> objectValue.equals(dataLabel.getName())).findAny(); + if (!matchedDataLabel.isPresent()) { + throw new ImportDatasetException(" 标注文件中name的值不存在于标签中!"); + } + return matchedDataLabel.get().getId(); + } + + + /** + * 校验文件目录 + * + * @param strPath 文件地址 + * @param datasetId 数据集ID + */ + public void checkFileDirectoryAndSaveSqlData(String strPath, Long datasetId) throws Exception { + File f = new File(strPath); + if (f.isDirectory()) { + File[] files = f.listFiles(); + if (files == null || Objects.requireNonNull(files).length == 0) { + throw new ImportDatasetException(" 文件目录 【" + strPath + "】下不存在文件 "); + } + for (File file : files) { + //是文件夹则一层剥一层的去校验 + if (file.isDirectory()) { + //校验文件目录 + checkoutDirectoryName(file); + checkFileDirectoryAndSaveSqlData(file.getPath(), datasetId); + // /Downloads/COCO2017-val/1/ 在此目录文件夹下 + // annotation dataset.sql label_COCO2017-val.json origin versionFile + } else if (datasetIds.contains(file.getParentFile().getName())) { + //读取并保存 sql文件 + if (file.getName().toLowerCase().endsWith(BusinessConstant.SUFFIX_SQL.toLowerCase())) { + readAndSaveSqlData(file); + } + // 判断是否为 .json 结尾的标签文件 + if (file.getName().toLowerCase().endsWith(BusinessConstant.SUFFIX_JSON.toLowerCase())) { + labels = readLabelContext(file); + if (!CollectionUtils.isEmpty(labels)) { + dataLabelService.saveBatchDataLabel(labels); + List dataLabels = labels.stream().map(a -> + DatasetDataLabel.builder().datasetId(datasetId).labelId(a.getId()).build()).collect(Collectors.toList()); + datasetDataLabelService.saveBatchDatasetDataLabel(dataLabels); + } + } + // /Downloads/COCO2017-val/1/ 不在此目录文件夹下(在/1/目录下的子文件夹中) + } else if (!datasetIds.contains(file.getParentFile().getName())) { + ///Downloads/COCO2017-val/1/origin/ + File parentFile = file.getParentFile(); + // 在 origin 目录中 + if ( + BusinessConstant.IMAGE_ORIGIN.equals(parentFile.getName()) && + String.valueOf(datasetId).equals(parentFile.getParentFile().getName()) + ) { + originFiles.add(file); + } else { + annotationFiles.add(file); + } + //文件计数 + fileCount.getAndIncrement(); + } + + + } + } + } + + + /** + * 校验文件目录名称 + * + * @param file 文件 + */ + public void checkoutDirectoryName(File file) { + //获取文件名 + String fileName = file.getName(); + //获取文件路径 + String path = file.getPath(); + //获取当前文件所在文件夹的名称 + String parentFileName = file.getParentFile().getName(); + //筛选出当前文件夹中符合预置数据集名称的文件 + Optional optional = Arrays.stream(PresetDatasetEnum.values()).filter(a -> a.getType().equals(parentFileName)).findAny(); + //文件路径如果输入 /Downloads/COCO2017-val/1/xxx/xxx 则错误 + //以下均为文件路径的校验 + if (optional.isPresent() && + !(BusinessConstant.IMAGE_ORIGIN.equals(fileName) || BusinessConstant.VERSION_FILE.equals(fileName) + || BusinessConstant.ANNOTATION.equals(fileName) || BusinessConstant.VIDEO.equals(fileName)) + ) { + log.error(""); + PrintUtils.printLine(" 【" + path + "】 文件路径不合法,请重新输入", PrintUtils.RED); + log.error(""); + } else if (BusinessConstant.ANNOTATION.equals(parentFileName) && !(BusinessConstant.V0001.equals(fileName))) { + log.error(""); + PrintUtils.printLine(" 【" + path + "】 文件路径不合法,请重新输入", PrintUtils.RED); + log.error(""); + + } else if (BusinessConstant.VERSION_FILE.equals(parentFileName) && !(BusinessConstant.V0001.equals(fileName))) { + log.error(""); + PrintUtils.printLine(" 【" + path + "】 文件路径不合法,请重新输入", PrintUtils.RED); + log.error(""); + + } else if (BusinessConstant.OFRECORD.equals(parentFileName) && !(BusinessConstant.TRAIN.equals(fileName))) { + log.error(""); + PrintUtils.printLine(" 【" + path + "】 文件路径不合法,请重新输入", PrintUtils.RED); + log.error(""); + + } else if (BusinessConstant.V0001.equals(parentFileName) && + !(BusinessConstant.IMAGE_ORIGIN.equals(fileName) || BusinessConstant.ANNOTATION.equals(fileName) || BusinessConstant.OFRECORD.equals(fileName)) + ) { + log.error(""); + PrintUtils.printLine(" 【" + path + "】 文件路径不合法,请重新输入", PrintUtils.RED); + log.error(""); + + } + } + + + /** + * 根据文件路径删除minio文件数据 + * + * @param uri 文件路径 + */ + private void delDatasetMinioInfo(String uri) { + if (!Objects.isNull(uri)) { + String path = minioConfig.getNfsRootPath() + minioConfig.getBucketName() + StrUtil.SLASH + uri; + deleteFileByCMD(path); + } + + } + + /** + * 删除数据集信息 + * + * @param datasetId 数据集ID + * @param dataType 数据类型 + */ + @Transactional(rollbackFor = Exception.class) + public void delDatasetInfoById(long datasetId, Integer dataType) { + datasetService.deleteDatasetById(datasetId); + dataFileService.deleteFileByDatasetId(datasetId); + dataVersionFileService.deleteVersionByDatasetId(datasetId); + dataLabelService.deleteLabelByDatasetId(datasetId); + datasetDataLabelService.deleteDatasetLabelByDatasetId(datasetId); + if (DatatypeEnum.TXT.getValue().compareTo(dataType) == 0) { + dataFileAnnotationService.delDataFileAnnotationById(datasetId); + } + } + + /** + * 按名称排序 + * + * @param list 文件集合 + */ + private void sortByName(List list) { + for (int i = 0; i < list.size() - 1; i++) { + for (int j = 1; j < list.size() - i; j++) { + File a; + if (compareByName(list.get(j - 1), list.get(j)) > 0) { + a = list.get(j - 1); + list.set((j - 1), list.get(j)); + list.set(j, a); + } + } + } + } + + /** + * 文件名称排序 + * + * @param fileOne 文件名称 + * @param fileTwo 文件名称 + * @return 排序大小 + */ + private int compareByName(File fileOne, File fileTwo) { + return buildImgName(fileOne).compareTo(buildImgName(fileTwo)); + } + + + /** + * 构建图片名称 + * + * @param file 文件 + * @return 图片名称 + */ + private Integer buildImgName(File file) { + int value = MagicNumConstant.ZERO; + try { + value = Integer.parseInt(StringUtils.substringBefore(StringUtils.substringAfterLast(file.getName(), "_"), ".")); + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_DATASET, "文件: 【" + file.getName() + "】名称格式错误"); + } + return value; + } + + /** + * 文件删除 + * + * @param path 删除路径 + */ + public void deleteFileByCMD(String path) { + String sourcePath = formatPath(path); + //判断该路径是否存在文件或文件夹 + String emptyDir = ""; + String nfsBucket = minioConfig.getNfsRootPath() + minioConfig.getBucketName() + StrUtil.SLASH; + sourcePath = sourcePath.endsWith(StrUtil.SLASH) ? sourcePath : sourcePath + StrUtil.SLASH; + //校验回收文件是否存在以及回收文件必须至少在当前环境目录下还有一层目录,如:/nfs/dubhe-test/xxxx/ + try { + if (sourcePath.startsWith((nfsBucket)) + && sourcePath.length() > nfsBucket.length()) { + emptyDir = "/tmp/empty_" + RandomUtil.randomNumbers(10) + StrUtil.SLASH; + LogUtil.info(LogEnum.BIZ_DATASET, "recycle task sourcePath:{},emptyDir:{}", sourcePath, emptyDir); + String exec = "/bin/sh"; + String c = "-c"; + if (System.getProperty("os.name").toLowerCase().contains("windows")) { + exec = "cmd.exe"; + c = "/C"; + } + Process process = Runtime.getRuntime().exec(new String[]{exec, c, + String.format(BusinessConstant.DEL_COMMAND, minioConfig.getServerUserName(), minioConfig.getEndpoint(), emptyDir, emptyDir, sourcePath, emptyDir, sourcePath)}); + recycleSourceIsOk(process); + } + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_DATASET, "minio 文件流删除文件失败: {} ", e); + } + } + + /** + * 判断执行服务器命名是否成功退出 + * + * @param process Process对象 + * @return boolean linux命令是否执行成功正常退出 + */ + public boolean recycleSourceIsOk(Process process) { + InputStreamReader stream = new InputStreamReader(process.getErrorStream()); + BufferedReader reader = new BufferedReader(stream); + StringBuffer errMessage = new StringBuffer(); + boolean recycleIsOk = true; + try { + while (reader.read() != MagicNumConstant.NEGATIVE_ONE) { + errMessage.append(reader.readLine()); + } + int status = process.waitFor(); + if (status != 0) { + LogUtil.error(LogEnum.BIZ_DATASET, "文件流删除文件失败: {} ", errMessage.toString()); + recycleIsOk = false; + } + } catch (Exception e) { + LogUtil.error(LogEnum.BIZ_DATASET, "文件流删除文件失败: {} ", e); + recycleIsOk = false; + } finally { + IOUtil.close(reader, stream); + } + return recycleIsOk; + } + + + /** + * 替换路劲中多余的 "/" + * + * @param path 路径 + * @return String + */ + public String formatPath(String path) { + if (!StringUtils.isEmpty(path)) { + return path.replaceAll("///*", FILE_SEPARATOR); + } + return path; + } + + +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataFileAnnotationService.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataFileAnnotationService.java new file mode 100644 index 0000000..1ef1f6f --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataFileAnnotationService.java @@ -0,0 +1,43 @@ +/** + * Copyright 2020 Zhejiang Lab. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.datasetutil.service; + +import org.dubhe.datasetutil.domain.entity.DataFileAnnotation; +import org.springframework.transaction.annotation.Transactional; + +import java.util.List; + +/** + * @description nlp文件 服务实现类 + * @date 2021-01-07 + */ +public interface DataFileAnnotationService { + + /** + * 批量保存nlp中间表 + * + * @param dataFileAnnotations nlp集合 + */ + void saveDataFileAnnotation(List dataFileAnnotations); + + /** + * 删除数据集文件标注数据通过数据集ID + * + * @param datasetId 数据集ID + */ + void delDataFileAnnotationById(long datasetId); +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataFileService.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataFileService.java index c100a35..927ac6f 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataFileService.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataFileService.java @@ -32,4 +32,17 @@ public interface DataFileService { */ void saveBatchDataFile(List dataFiles); + /** + * 创建新表 + * + * @param tableName 表名称 + */ + void createNewTable(String tableName); + + /** + * 删除数据集文件通过数据集ID + * + * @param datasetId 数据集ID + */ + void deleteFileByDatasetId(long datasetId); } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataLabelService.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataLabelService.java index a4a98fa..092c953 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataLabelService.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataLabelService.java @@ -19,6 +19,7 @@ package org.dubhe.datasetutil.service; import org.dubhe.datasetutil.domain.entity.DataLabel; import java.util.List; +import java.util.Map; /** * @description 数据集标签服务接口 @@ -31,4 +32,21 @@ public interface DataLabelService { * @param listDataLabel 数据集标签集合 */ void saveBatchDataLabel(List listDataLabel); + + + /** + * 根据预置标签组获取预置标签 + * + * @param groupIds 预置标签组IDS + * @return 预置标签map key: 预置标签名称 value:预置标签ID + */ + Map getPresetLabelList(List groupIds); + + + /** + * 删除标签 + * + * @param datasetId 数据集ID + */ + void deleteLabelByDatasetId(long datasetId); } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataSequenceService.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataSequenceService.java index 5199174..3720861 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataSequenceService.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataSequenceService.java @@ -53,4 +53,13 @@ public interface DataSequenceService { * @param tableId 表ID */ void createTable(String tableId); -} + + /** + * 扩容可用数量 + * + * @param businessCode 业务编码 + * @return DataSequence 数据ID序列 + */ + DataSequence expansionUsedNumber(String businessCode); + +} \ No newline at end of file diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataVersionFileService.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataVersionFileService.java index 5b3d5a9..66bfc8c 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataVersionFileService.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DataVersionFileService.java @@ -16,7 +16,7 @@ */ package org.dubhe.datasetutil.service; -import org.dubhe.datasetutil.domain.dto.DataVersionFile; +import org.dubhe.datasetutil.domain.entity.DataVersionFile; import java.util.List; @@ -31,4 +31,19 @@ public interface DataVersionFileService { * @param dataVersionFiles 数据集文件数据集合 */ void saveBatchDataFileVersion(List dataVersionFiles); + + + /** + * 创建新表 + * + * @param tableName 表名称 + */ + void createNewTable(String tableName); + + /** + * 删除数据集版本通过数据集ID + * + * @param datasetId 数据集ID + */ + void deleteVersionByDatasetId(long datasetId); } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DatasetDataLabelService.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DatasetDataLabelService.java index 6752391..0de0820 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DatasetDataLabelService.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DatasetDataLabelService.java @@ -31,4 +31,11 @@ public interface DatasetDataLabelService { * @param listDatasetDataLabel 数据集标签集合 */ void saveBatchDatasetDataLabel(List listDatasetDataLabel); + + /** + * 删除数据集标签关系通过数据集ID + * + * @param datasetId 数据集ID + */ + void deleteDatasetLabelByDatasetId(long datasetId); } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DatasetService.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DatasetService.java index 06d2b8b..b36cb3e 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DatasetService.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DatasetService.java @@ -18,6 +18,8 @@ package org.dubhe.datasetutil.service; import org.dubhe.datasetutil.domain.entity.Dataset; +import java.util.List; + /** * @description 数据集服务 * @date 2020-9-17 @@ -39,6 +41,14 @@ public interface DatasetService { */ Dataset findDatasetById(Long datasetId); + /** + * 根据ID查询数据集 + * + * @param datasetId 数据集Id + * @return Dataset 数据集 + */ + Dataset queryDatasetById(Long datasetId); + /** * 更新数据集状态 * @@ -62,4 +72,43 @@ public interface DatasetService { * @return int 数量 */ int findDataFileById(Long datasetId); + + /** + * 根据Id查询数据集 + * + * @param datasetId 数据集ID + * @return Dataset 数据集 + */ + Dataset findDatasetByIdNormal(Long datasetId); + + + /** + * 新增数据集 + * + * @param insertSql sql语句 + */ + void saveBatch(List insertSql); + + /** + * 删除数据集通过数据集ID + * + * @param datasetId 数据集ID + */ + void deleteDatasetById(long datasetId); + + /** + * 更新数据集状态 + * + * @param dataset 数据集 + */ + void updateDatasetStatusIsImport(Dataset dataset); + + /** + * 更新数据集 + * + * @param dataset 数据集信息 + * @return int 数量 + */ + int updateDataset(Dataset dataset); + } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/DatasetVersionService.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DatasetVersionService.java new file mode 100644 index 0000000..0c0c2ca --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/DatasetVersionService.java @@ -0,0 +1,15 @@ +package org.dubhe.datasetutil.service; + +import org.dubhe.datasetutil.domain.entity.DatasetVersion; + +/** + * @description TODO + * @date 2021-03-23 + */ +public interface DatasetVersionService { + + DatasetVersion getByDatasetIdAndVersionNum(Long datasetId, String versionNum); + + void insertVersion(Long datasetId, String versionNum, String versionNote); + +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataFileAnnotationServiceImpl.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataFileAnnotationServiceImpl.java new file mode 100644 index 0000000..798092d --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataFileAnnotationServiceImpl.java @@ -0,0 +1,53 @@ +/** + * Copyright 2020 Zhejiang Lab. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================= + */ +package org.dubhe.datasetutil.service.impl; + +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import org.dubhe.datasetutil.dao.DataFileAnnotationMapper; +import org.dubhe.datasetutil.domain.entity.DataFileAnnotation; +import org.dubhe.datasetutil.service.DataFileAnnotationService; +import org.springframework.stereotype.Service; + +import java.util.List; + +/** + * @description nlp文件 服务实现类 + * @date 2021-01-07 + */ +@Service +public class DataFileAnnotationServiceImpl extends ServiceImpl implements DataFileAnnotationService { + + /** + * 批量保存nlp中间表 + * + * @param dataFileAnnotations nlp集合 + */ + @Override + public void saveDataFileAnnotation(List dataFileAnnotations) { + baseMapper.saveDataFileAnnotation(dataFileAnnotations); + } + + /** + * 删除数据集文件标注数据通过数据集ID + * + * @param datasetId 数据集ID + */ + @Override + public void delDataFileAnnotationById(long datasetId) { + baseMapper.delDataFileAnnotationById(datasetId); + } +} diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataFileServiceImpl.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataFileServiceImpl.java index 1186efe..6308d1b 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataFileServiceImpl.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataFileServiceImpl.java @@ -57,4 +57,34 @@ public class DataFileServiceImpl extends ServiceImpl i } } + + + /** + * 创建新表 + * + * @param tableName 表名称 + */ + @Override + public void createNewTable(String tableName){ + int count = baseMapper.selectCountByTableName(tableName); + if(count == 0 ){ + if((BusinessConstant.DATASET_FILE+BusinessConstant.TABLE_SUFFIX).equals(tableName)){ + baseMapper.createNewTableOne(); + }else { + baseMapper.createNewTableTwo(); + } + } + } + + + /** + * 删除数据集文件通过数据集ID + * + * @param datasetId 数据集ID + */ + @Override + public void deleteFileByDatasetId(long datasetId) { + baseMapper.deleteFileByDatasetId(datasetId); + } + } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataLabelServiceImpl.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataLabelServiceImpl.java index 0d2fa0b..edf52e8 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataLabelServiceImpl.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataLabelServiceImpl.java @@ -23,15 +23,18 @@ import org.dubhe.datasetutil.domain.entity.DataLabel; import org.dubhe.datasetutil.service.DataLabelService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import org.springframework.util.CollectionUtils; +import java.util.HashMap; import java.util.List; +import java.util.Map; /** * @description 数据集标签服务接口实现 * @date 2020-10-14 */ @Service -public class DataLabelServiceImpl implements DataLabelService { +public class DataLabelServiceImpl implements DataLabelService { @Autowired private DataLabelMapper dataLabelMapper; @@ -53,4 +56,36 @@ public class DataLabelServiceImpl implements DataLabelService { dataLabelMapper.saveBatchDataLabel(listDataLabel); } } + + + + /** + * 根据预置标签组获取预置标签 + * + * @param groupIds 预置标签组IDS + * @return 预置标签 key: 预置标签名称 value:预置标签ID + */ + @Override + public Map getPresetLabelList(List groupIds) { + List labels = dataLabelMapper.getPresetLabelList(groupIds); + Map map = new HashMap<>(labels.size()); + if(!CollectionUtils.isEmpty(labels)){ + labels.forEach(a->{ + map.put(a.getName(),a.getId()); + }); + } + return map; + } + + /** + * 删除标签关系通过数据集ID + * + * @param datasetId 数据集ID + */ + @Override + public void deleteLabelByDatasetId(long datasetId) { + dataLabelMapper.deleteLabelByDatasetId(datasetId); + } + + } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataSequenceServiceImpl.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataSequenceServiceImpl.java index 22f1dbb..ce85f93 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataSequenceServiceImpl.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataSequenceServiceImpl.java @@ -16,11 +16,13 @@ */ package org.dubhe.datasetutil.service.impl; +import org.dubhe.datasetutil.common.exception.DataSequenceException; import org.dubhe.datasetutil.dao.DataSequenceMapper; import org.dubhe.datasetutil.domain.entity.DataSequence; import org.dubhe.datasetutil.service.DataSequenceService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; /** @@ -35,7 +37,7 @@ public class DataSequenceServiceImpl implements DataSequenceService { @Override public DataSequence getSequence(String businessCode) { - return dataSequenceMapper.selectByBusiness(businessCode); + return dataSequenceMapper.selectDataSequenceById(dataSequenceMapper.selectByBusiness(businessCode).getId()); } /** @@ -45,6 +47,7 @@ public class DataSequenceServiceImpl implements DataSequenceService { * @return int 数量 */ @Override + @Transactional(rollbackFor = Exception.class) public int updateSequenceStart(String businessCode) { return dataSequenceMapper.updateStartByBusinessCode(businessCode); } @@ -75,4 +78,23 @@ public class DataSequenceServiceImpl implements DataSequenceService { String oldTableName = tableName.substring(0,tableName.lastIndexOf("_")); dataSequenceMapper.createNewTable(tableName,oldTableName); } -} + + + /** + * 扩容可用数量 + * + * @param businessCode 业务编码 + * @return DataSequence 数据ID序列 + */ + @Override + @Transactional(rollbackFor = Exception.class) + public DataSequence expansionUsedNumber(String businessCode) { + DataSequence dataSequenceNew = getSequence(businessCode); + if (dataSequenceNew == null || dataSequenceNew.getStart() == null || dataSequenceNew.getStep() == null) { + throw new DataSequenceException("配置出错,请检查data_sequence表配置"); + } + updateSequenceStart(businessCode); + return dataSequenceNew; + } + +} \ No newline at end of file diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataVersionFileServiceImpl.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataVersionFileServiceImpl.java index a032328..1e9be65 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataVersionFileServiceImpl.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DataVersionFileServiceImpl.java @@ -17,10 +17,12 @@ package org.dubhe.datasetutil.service.impl; import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import org.dubhe.datasetutil.common.constant.BusinessConstant; import org.dubhe.datasetutil.dao.DataVersionFileMapper; -import org.dubhe.datasetutil.domain.dto.DataVersionFile; +import org.dubhe.datasetutil.domain.entity.DataVersionFile; import org.dubhe.datasetutil.service.DataVersionFileService; import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; import java.util.List; @@ -29,7 +31,7 @@ import java.util.List; * @date 2020-09-17 */ @Service -public class DataVersionFileServiceImpl extends ServiceImpl implements DataVersionFileService { +public class DataVersionFileServiceImpl extends ServiceImpl implements DataVersionFileService { /** @@ -38,7 +40,38 @@ public class DataVersionFileServiceImpl extends ServiceImpl listDataVersionFile) { - baseMapper.saveBatchDataFileVersion(listDataVersionFile); + baseMapper.saveBatchDataFileVersion(listDataVersionFile); + } + + + /** + * 创建新表 + * + * @param tableName 表名称 + */ + @Override + public void createNewTable(String tableName){ + int count = baseMapper.selectCountByTableName(tableName); + if(count == 0){ + if((BusinessConstant.DATA_DATASET_VERSION_FILE+BusinessConstant.TABLE_SUFFIX).equals(tableName)){ + baseMapper.createNewTableOne(); + }else { + baseMapper.createNewTableTwo(); + } + + } + } + + /** + * 删除数据集版本通过数据集ID + * + * @param datasetId 数据集ID + */ + @Override + public void deleteVersionByDatasetId(long datasetId) { + baseMapper.deleteVersionByDatasetId(datasetId); + baseMapper.deleteVersionFileByDatasetId(datasetId); } } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DatasetDataLabelServiceImpl.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DatasetDataLabelServiceImpl.java index 6f0cf52..bb6fe85 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DatasetDataLabelServiceImpl.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DatasetDataLabelServiceImpl.java @@ -54,4 +54,17 @@ public class DatasetDataLabelServiceImpl implements DatasetDataLabelService { datasetDataLabelMapper.saveBatchDatasetDataLabel(listDatasetDataLabel); } } + + + /** + * 删除数据集标签关系通过数据集ID + * + * @param datasetId 数据集ID + */ + @Override + public void deleteDatasetLabelByDatasetId(long datasetId) { + datasetDataLabelMapper.deleteDatasetLabelByDatasetId(datasetId); + } + + } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DatasetServiceImpl.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DatasetServiceImpl.java index 10ab8cc..1653e78 100644 --- a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DatasetServiceImpl.java +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DatasetServiceImpl.java @@ -23,6 +23,10 @@ import org.dubhe.datasetutil.domain.entity.Dataset; import org.dubhe.datasetutil.service.DatasetService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import org.springframework.util.CollectionUtils; + +import java.util.List; +import java.util.Objects; /** * @description 数据集 服务实现类 @@ -56,6 +60,11 @@ public class DatasetServiceImpl extends ServiceImpl impl return datasetMapper.findDatasetById(datasetId); } + @Override + public Dataset queryDatasetById(Long datasetId) { + return baseMapper.selectById(datasetId); + } + /** * 更新数据集状态 * @@ -89,4 +98,65 @@ public class DatasetServiceImpl extends ServiceImpl impl public int findDataFileById(Long datasetId) { return datasetMapper.findDataFileById(datasetId); } + + /** + * 根据ID 查询数据集 + * + * @param datasetId 数据集ID + * @return Dataset 数据集 + */ + @Override + public Dataset findDatasetByIdNormal(Long datasetId) { + return datasetMapper.findDatasetByIdNormal(datasetId); + } + + /** + * 新增数据集 + * + * @param insertSql sql语句 + */ + @Override + public void saveBatch(List insertSql) { + if(!CollectionUtils.isEmpty(insertSql)){ + insertSql.forEach(sql->{ + if(!Objects.isNull(sql)){ + baseMapper.saveBatch(sql); + } + }); + } + + } + + /** + * 删除数据集通过数据集ID + * + * @param datasetId 数据集ID + */ + @Override + public void deleteDatasetById(long datasetId) { + baseMapper.deleteDatasetById(datasetId); + } + + /** + * 更新数据集状态 + * + * @param dataset 数据集 + */ + @Override + public void updateDatasetStatusIsImport(Dataset dataset) { + dataset.setStatus(DataStateCodeConstant.IN_THE_IMPORT_STATE); + datasetMapper.updateById(dataset); + } + + /** + * 更新数据集 + * + * @param dataset 数据集信息 + * @return int 数量 + */ + @Override + public int updateDataset(Dataset dataset) { + return datasetMapper.updateById(dataset); + } + } diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DatasetVersionServiceImpl.java b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DatasetVersionServiceImpl.java new file mode 100644 index 0000000..0b91826 --- /dev/null +++ b/dataset-util/src/main/java/org/dubhe/datasetutil/service/impl/DatasetVersionServiceImpl.java @@ -0,0 +1,30 @@ +package org.dubhe.datasetutil.service.impl; + +import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; +import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl; +import org.dubhe.datasetutil.dao.DatasetVersionMapper; +import org.dubhe.datasetutil.domain.entity.DatasetVersion; +import org.dubhe.datasetutil.service.DatasetVersionService; +import org.springframework.stereotype.Service; + +/** + * @description 版本数据处理 + * @date 2021-03-23 + */ +@Service +public class DatasetVersionServiceImpl extends ServiceImpl implements DatasetVersionService { + + @Override + public DatasetVersion getByDatasetIdAndVersionNum(Long datasetId, String versionNum) { + QueryWrapper queryWrapper = new QueryWrapper(); + queryWrapper.eq("dataset_id", datasetId); + queryWrapper.eq("version_name", versionNum); + return baseMapper.selectOne(queryWrapper); + } + + @Override + public void insertVersion(Long datasetId, String versionNum, String versionNote) { + baseMapper.insert(new DatasetVersion(datasetId, versionNum, versionNote)); + } + +} diff --git a/dataset-util/src/main/resources/application-dev.yml b/dataset-util/src/main/resources/application-dev.yml deleted file mode 100644 index 423bf89..0000000 --- a/dataset-util/src/main/resources/application-dev.yml +++ /dev/null @@ -1,32 +0,0 @@ -#应用名称 -spring: - datasource: - #数据源URL - url: jdbc:mysql://127.0.0.1:3306/dubhe-dev?serverTimezone=Asia/Shanghai&characterEncoding=utf8&useSSL=false&allowMultiQueries=true&useInformationSchema=true - #数据源用户名 - username: * - #数据源密码 - password: * - - #配置Sharding-JDBC数据源 - shardingsphere: - datasource: - master: - #数据源URL - url: jdbc:log4jdbc:mysql://127.0.0.1:3306/dubhe-dev?serverTimezone=Asia/Shanghai&characterEncoding=utf8&useSSL=false&allowMultiQueries=true&useInformationSchema=true - #数据源用户名 - username: * - #数据源密码 - password: * - -minio: - #minio地址 - endpoint: 127.0.0.1 - #minio端口号 - port: 9000 - #minio accessKey - accessKey: abcd - #minio secretKey - secretKey: abcd - #minio bucketName - bucketName: dubhe-dev diff --git a/dataset-util/src/main/resources/application-prod.yml b/dataset-util/src/main/resources/application-prod.yml new file mode 100644 index 0000000..39e3c61 --- /dev/null +++ b/dataset-util/src/main/resources/application-prod.yml @@ -0,0 +1,41 @@ +#应用名称 +spring: + #配置Sharding-JDBC数据源 + shardingsphere: + datasource: + master: + #数据源URL + url: jdbc:log4jdbc:mysql://127.0.0.1:3306/dubhe-cloud-prod?serverTimezone=Asia/Shanghai&characterEncoding=utf8&useSSL=false&allowMultiQueries=true&useInformationSchema=true + #数据源用户名 + username: root + #数据源密码 + password: 123456 + +minio: + #minio地址 + endpoint: 127.0.0.1 + #minio端口号 + port: 9000 + #minio accessKey + accessKey: admin + #minio secretKey + secretKey: 123@abc.com + #minio bucketName + bucketName: dubhe-prod + #minio nfsRootPath + nfsRootPath: /nfs/ + # 文件存储服务器用户名 + serverUserName: root + # 数据集文档说明地址 + dosAddress: http://docs.dubhe.ai/docs/module/dataset/preset-dataset/ +#图片后缀名 +suffix: + imageFormat: .jpg,.png,.bmp,.jpeg + txtFormat: .txt +# ES服务地址及端口 +es: + host: 127.0.0.1 + serverPort: 9200 + transportPort: 9300 + clusterName: kubernetes-logging + index: dataset_text \ No newline at end of file diff --git a/dataset-util/src/main/resources/application-test.yml b/dataset-util/src/main/resources/application-test.yml deleted file mode 100644 index a03e5a1..0000000 --- a/dataset-util/src/main/resources/application-test.yml +++ /dev/null @@ -1,32 +0,0 @@ -#应用名称 -spring: - datasource: - #数据源URL - url: jdbc:mysql://127.0.0.1:3306/dubhe-test?serverTimezone=Asia/Shanghai&characterEncoding=utf8&useSSL=false&allowMultiQueries=true&useInformationSchema=true - #数据源用户名 - username: * - #数据源密码 - password: * - - #配置Sharding-JDBC数据源 - shardingsphere: - datasource: - master: - #数据源URL - url: jdbc:log4jdbc:mysql://127.0.0.1:3306/dubhe-test?serverTimezone=Asia/Shanghai&characterEncoding=utf8&useSSL=false&allowMultiQueries=true&useInformationSchema=true - #数据源用户名 - username: * - #数据源密码 - password: * - -minio: - #minio地址 - endpoint: 127.0.0.1 - #minio端口号 - port: 9000 - #minio accessKey - accessKey: abcd - #minio secretKey - secretKey: abcd - #minio bucketName - bucketName: dubhe-test diff --git a/dataset-util/src/main/resources/application.yml b/dataset-util/src/main/resources/application.yml index 03000e0..224b93c 100644 --- a/dataset-util/src/main/resources/application.yml +++ b/dataset-util/src/main/resources/application.yml @@ -1,33 +1,11 @@ spring: + main: + allow-bean-definition-overriding: true profiles: - active: dev + active: prod application: name: dataset-util - datasource: - type: com.alibaba.druid.pool.DruidDataSource - driver-class-name: com.mysql.jdbc.Driver - - # 初始化配置 - initial-size: 3 - # 最小连接数 - min-idle: 3 - # 最大连接数 - max-active: 15 - # 获取连接超时时间 - max-wait: 5000 - # 连接有效性检测时间 - time-between-eviction-runs-millis: 90000 - # 最大空闲时间 - min-evictable-idle-time-millis: 1800000 - test-while-idle: true - test-on-borrow: false - test-on-return: false - - validation-query: select 1 - # 配置允许后面的Bean覆盖前面名称重复的Bean - main: - allow-bean-definition-overriding: true # 配置Sharding-JDBC数据源名称,可配置多个 shardingsphere: datasource: @@ -37,19 +15,40 @@ spring: driver-class-name: net.sf.log4jdbc.sql.jdbcapi.DriverSpy test-while-idle: true validation-query: select 1 + # 初始化配置 + initial-size: 3 + # 最小连接数 + min-idle: 3 + # 最大连接数 + max-active: 15 + # 获取连接超时时间 + max-wait: 5000 + # 连接有效性检测时间 + time-between-eviction-runs-millis: 90000 + # 最大空闲时间 + min-evictable-idle-time-millis: 1800000 + test-on-borrow: false + test-on-return: false #指定data_dataset表 主键id 生成策略 sharding: tables: data_file: - actual-data-nodes: master.data_file_$->{1..100000} + actual-data-nodes: master.data_file_$->{1..300} table-strategy: standard: sharding-column: dataset_id precise-algorithm-class-name: org.dubhe.datasetutil.common.util.MyPreciseShardingAlgorithm data_dataset_version_file: - actual-data-nodes: master.data_dataset_version_file_$->{1..100000} + actual-data-nodes: master.data_dataset_version_file_$->{1..300} + table-strategy: + standard: + sharding-column: dataset_id + precise-algorithm-class-name: org.dubhe.datasetutil.common.util.MyPreciseShardingAlgorithm + + data_file_annotation: + actual-data-nodes: master.data_file_annotation_$->{1..300} table-strategy: standard: sharding-column: dataset_id @@ -60,11 +59,19 @@ spring: minio: secure: false + blockingCoefficient: 0.5 #logback logging.config: - classpath:logback-spring-dev.xml + classpath:logback-spring-${spring.profiles.active}.xml mybatis-plus: global-config: - banner: false \ No newline at end of file + banner: false + +# ES服务地址及端口 +es: + host: 127.0.0.1 + serverPort: 9200 + transportPort: 9300 + index: dataset_text_test \ No newline at end of file diff --git a/dataset-util/src/main/resources/logback-spring-dev.xml b/dataset-util/src/main/resources/logback-spring-prod.xml similarity index 100% rename from dataset-util/src/main/resources/logback-spring-dev.xml rename to dataset-util/src/main/resources/logback-spring-prod.xml diff --git a/dataset-util/src/main/resources/logback-spring-test.xml b/dataset-util/src/main/resources/logback-spring-test.xml deleted file mode 100644 index 038359c..0000000 --- a/dataset-util/src/main/resources/logback-spring-test.xml +++ /dev/null @@ -1,248 +0,0 @@ - - - dubhe - - - - - - - - - ${log.pattern} - - - - - - logs/${log.path}/info/dubhe-info.log - - logs/${log.path}/info/dubhe-${app.active}-info-%d{yyyy-MM-dd}.%i.log - - - 50MB - 7 - 250MB - - - %m%n - - - true - - INFO - INFO,K8S_CALLBACK - ACCEPT - DENY - - - - - - logs/${log.path}/debug/dubhe-debug.log - - logs/${log.path}/debug/dubhe-${app.active}-debug-%d{yyyy-MM-dd}.%i.log - - - 50MB - 7 - 250MB - - - %m%n - - - true - - DEBUG - DEBUG - ACCEPT - DENY - - - - - - logs/${log.path}/error/dubhe-error.log - - logs/${log.path}/error/dubhe-${app.active}-error-%d{yyyy-MM-dd}.%i.log - - - 50MB - 7 - 250MB - - - %m%n - - - true - - ERROR - ERROR - ACCEPT - DENY - - - - - - logs/${log.path}/warn/dubhe-warn.log - - logs/${log.path}/warn/dubhe-${app.active}-warn-%d{yyyy-MM-dd}.%i.log - - - 50MB - 7 - 250MB - - - %m%n - - - true - - - WARN - WARN - ACCEPT - DENY - - - - - - logs/${log.path}/trace/dubhe-trace.log - - logs/${log.path}/trace/dubhe-${app.active}-trace-%d{yyyy-MM-dd}.%i.log - - - 50MB - 7 - 250MB - - - %m%n - - - true - - TRACE - TRACE - ACCEPT - DENY - - - - - - - logs/${log.path}/info/dubhe-schedule.log - - logs/${log.path}/info/dubhe-${app.active}-schedule-%d{yyyy-MM-dd}.%i.log - - - 50MB - 7 - 250MB - - - %m%n - - - true - - INFO - SCHEDULE - ACCEPT - DENY - - - - - - logs/${log.path}/info/dubhe-request.log - - logs/${log.path}/info/dubhe-${app.active}-request-%d{yyyy-MM-dd}.%i.log - - - 50MB - 7 - 250MB - - - %m%n - - - true - - INFO - - GLOBAL_REQUEST - ACCEPT - DENY - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/dataset-util/src/main/resources/mapper/DataFileAnnotationMapper.xml b/dataset-util/src/main/resources/mapper/DataFileAnnotationMapper.xml new file mode 100644 index 0000000..16196b9 --- /dev/null +++ b/dataset-util/src/main/resources/mapper/DataFileAnnotationMapper.xml @@ -0,0 +1,13 @@ + + + + + + insert into data_file_annotation (id,dataset_id,label_id,version_file_id,prediction,create_user_id,file_name) + values + + (#{file.id},#{file.datasetId},#{file.labelId},#{file.versionFileId}, + #{file.prediction},#{file.createUserId}, #{file.fileName}) + + + \ No newline at end of file diff --git a/dataset-util/src/main/resources/mapper/DataFileMapper.xml b/dataset-util/src/main/resources/mapper/DataFileMapper.xml index 1f7b10f..e361b56 100644 --- a/dataset-util/src/main/resources/mapper/DataFileMapper.xml +++ b/dataset-util/src/main/resources/mapper/DataFileMapper.xml @@ -2,8 +2,8 @@ - - insert into data_file (id,`name`,dataset_id,status,url,enhance_type,width,height,origin_user_id,create_user_id,pid) + + insert into data_file (id,name,dataset_id,status,url,enhance_type,width,height,origin_user_id,create_user_id,pid) values (#{file.id},#{file.name},#{file.datasetId},#{file.status}, diff --git a/dataset-util/src/main/resources/mapper/DataLabelMapper.xml b/dataset-util/src/main/resources/mapper/DataLabelMapper.xml index 20d80cb..0f5753a 100644 --- a/dataset-util/src/main/resources/mapper/DataLabelMapper.xml +++ b/dataset-util/src/main/resources/mapper/DataLabelMapper.xml @@ -9,4 +9,18 @@ ( #{dataLabel.name},#{dataLabel.color},#{dataLabel.createUserId}) + + + \ No newline at end of file diff --git a/dataset-util/src/main/resources/mapper/DataVersionFileMapper.xml b/dataset-util/src/main/resources/mapper/DataVersionFileMapper.xml index 5eb4245..57a23ce 100644 --- a/dataset-util/src/main/resources/mapper/DataVersionFileMapper.xml +++ b/dataset-util/src/main/resources/mapper/DataVersionFileMapper.xml @@ -2,11 +2,11 @@ - - insert into data_dataset_version_file (id,dataset_id,file_id,annotation_status,status) + + insert into data_dataset_version_file (id,dataset_id,file_id,annotation_status,status,version_name,file_name) values - (#{temp.id},#{temp.datasetId},#{temp.fileId},#{temp.annotationStatus},#{temp.status}) + (#{temp.id},#{temp.datasetId},#{temp.fileId},#{temp.annotationStatus},#{temp.status},#{temp.versionName},#{temp.fileName}) \ No newline at end of file