+ * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *
+ * http://www.apache.org/licenses/LICENSE-2.0 + *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================
+ */
+package org.dubhe.datasetutil.domain.dto;
+
+import lombok.Data;
+
+import java.io.Serializable;
+
+/**
+ * @description 标注DTO
+ * @date 2021-04-14
+ */
+@Data
+public class AnnotationDTO implements Serializable {
+
+ /**
+ * 标签ID
+ */
+ private Long categoryId;
+
+ /**
+ * 预估分
+ */
+ private Double score;
+}
+
diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/EsTransportDTO.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/EsTransportDTO.java
new file mode 100644
index 0000000..639a3ba
--- /dev/null
+++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/EsTransportDTO.java
@@ -0,0 +1,95 @@
+/**
+ * Copyright 2020 Tianshu AI Platform. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================
+ */
+
+package org.dubhe.datasetutil.domain.dto;
+
+import lombok.Data;
+
+import java.sql.Timestamp;
+
+/**
+ * @description ES数据同步DTO
+ * @date 2020-03-24
+ */
+@Data
+public class EsTransportDTO {
+
+ /**
+ * 状态
+ */
+ private Integer annotationStatus;
+
+ /**
+ * 文件名称
+ */
+ private String fileName;
+
+ /**
+ * 文件url
+ */
+ private String url;
+
+ /**
+ * 创建人ID
+ */
+ private Long createUserId;
+
+ /**
+ * 创建时间
+ */
+ private Timestamp createTime;
+
+ /**
+ * 更新人ID
+ */
+ private Long updateUserId;
+
+ /**
+ * 更新时间
+ */
+ private Timestamp updateTime;
+
+ /**
+ * 文件类型
+ */
+ private Integer fileType;
+
+ /**
+ * 增强类型
+ */
+ private Integer enhanceType;
+
+ /**
+ * 用户ID
+ */
+ private Long originUserId;
+
+ /**
+ * 预测值
+ */
+ private Double prediction;
+
+ /**
+ * 文件ID
+ */
+ private Long id;
+
+ /**
+ * 标签ID
+ */
+ private Long labelId;
+}
diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/FileAnnotationDTO.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/FileAnnotationDTO.java
new file mode 100644
index 0000000..6c7b793
--- /dev/null
+++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/FileAnnotationDTO.java
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2020 Zhejiang Lab. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * =============================================================
+ */
+package org.dubhe.datasetutil.domain.dto;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import java.io.Serializable;
+
+/**
+ * @description 文件标注DTO
+ * @date 2020-01-07
+ */
+@Data
+@NoArgsConstructor
+@AllArgsConstructor
+@Builder
+public class FileAnnotationDTO implements Serializable {
+
+ /**
+ * 标签ID
+ */
+ private Long categoryId;
+
+ /**
+ * 分数
+ */
+ private String score;
+
+}
diff --git a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/IdAlloc.java b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/IdAlloc.java
index 87ae460..2960d7e 100644
--- a/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/IdAlloc.java
+++ b/dataset-util/src/main/java/org/dubhe/datasetutil/domain/dto/IdAlloc.java
@@ -17,7 +17,10 @@
package org.dubhe.datasetutil.domain.dto;
import lombok.Data;
-import org.dubhe.datasetutil.common.base.MagicNumConstant;
+import org.dubhe.datasetutil.domain.entity.DataSequence;
+
+import java.util.LinkedList;
+import java.util.Queue;
/**
* @description ID策略实体
@@ -26,25 +29,34 @@ import org.dubhe.datasetutil.common.base.MagicNumConstant;
@Data
public class IdAlloc {
- /**
- * 起始位置
- */
- private long startNumber;
+ private Queue
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -16,35 +16,45 @@
*/
package org.dubhe.datasetutil.handle;
+import cn.hutool.core.io.FileUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.TypeReference;
-import com.xiaoleilu.hutool.io.FileUtil;
+import com.google.common.collect.Lists;
import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.dubhe.datasetutil.common.base.MagicNumConstant;
+import org.dubhe.datasetutil.common.config.ImageConfig;
import org.dubhe.datasetutil.common.config.MinioConfig;
+import org.dubhe.datasetutil.common.constant.AnnotateTypeEnum;
import org.dubhe.datasetutil.common.constant.BusinessConstant;
import org.dubhe.datasetutil.common.constant.FileStateCodeConstant;
+import org.dubhe.datasetutil.common.enums.DatatypeEnum;
+import org.dubhe.datasetutil.common.enums.LogEnum;
import org.dubhe.datasetutil.common.exception.ImportDatasetException;
import org.dubhe.datasetutil.common.util.*;
+import org.dubhe.datasetutil.domain.dto.AnnotationDTO;
import org.dubhe.datasetutil.domain.entity.*;
-import org.dubhe.datasetutil.domain.dto.DataVersionFile;
+import org.dubhe.datasetutil.domain.entity.DataVersionFile;
import org.dubhe.datasetutil.service.*;
+import org.elasticsearch.action.bulk.BulkProcessor;
+import org.elasticsearch.action.index.IndexRequest;
import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
-
+import org.springframework.transaction.annotation.Transactional;
+import org.springframework.util.CollectionUtils;
+import javax.annotation.Resource;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
+import java.io.BufferedReader;
import java.io.File;
+import java.io.FileReader;
import java.io.IOException;
-import java.nio.charset.StandardCharsets;
import java.sql.Timestamp;
import java.util.*;
import java.util.concurrent.Callable;
-import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
/**
@@ -55,6 +65,12 @@ import java.util.stream.Collectors;
@Component
public class DatasetImportHandle {
+ /**
+ * esSearch索引
+ */
+ @Value("${es.index}")
+ private String esIndex;
+
@Autowired
private DatasetService datasetService;
@@ -76,6 +92,9 @@ public class DatasetImportHandle {
@Autowired
private DataVersionFileService dataVersionFileService;
+ @Autowired
+ private DataFileAnnotationService dataFileAnnotationService;
+
@Autowired
private MinioUtil minioUtil;
@@ -85,10 +104,11 @@ public class DatasetImportHandle {
@Autowired
private GeneratorKeyUtil generatorKeyUtil;
- /**
- * 可支持的图片格式集合
- */
- private static final List> partitionList = Lists.partition(fileNames, MagicNumConstant.FIVE_THOUSAND);
+ for (List
> partitionList = Lists.partition(imageFileNameList, MagicNumConstant.FIVE_THOUSAND);
+ for (List
> partitionList = Lists.partition(allFileList, MagicNumConstant.FIVE_THOUSAND);
+ for (List
> partitionList = Lists.partition(originFiles, MagicNumConstant.FIVE_THOUSAND);
+ for (List