فهرست منبع

add struct excel embedding

tycoding 1 سال پیش
والد
کامیت
e73afd07f5
17فایلهای تغییر یافته به همراه498 افزوده شده و 4 حذف شده
  1. 7 0
      langchat-aigc/pom.xml
  2. 19 0
      langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/endpoint/EmbeddingEndpoint.java
  3. 164 0
      langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/listener/ImportExcelHelper.java
  4. 92 0
      langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/listener/ImportExcelListener.java
  5. 29 0
      langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/listener/StructExcelListener.java
  6. 44 0
      langchat-core/src/main/java/cn/tycoding/langchat/core/entity/AigcStructCol.java
  7. 44 0
      langchat-core/src/main/java/cn/tycoding/langchat/core/entity/AigcStructRow.java
  8. 15 0
      langchat-core/src/main/java/cn/tycoding/langchat/core/mapper/AigcStructColMapper.java
  9. 15 0
      langchat-core/src/main/java/cn/tycoding/langchat/core/mapper/AigcStructRowMapper.java
  10. 13 0
      langchat-core/src/main/java/cn/tycoding/langchat/core/service/AigcStructColService.java
  11. 13 0
      langchat-core/src/main/java/cn/tycoding/langchat/core/service/AigcStructRowService.java
  12. 2 2
      langchat-core/src/main/java/cn/tycoding/langchat/core/service/LangDocService.java
  13. 19 0
      langchat-core/src/main/java/cn/tycoding/langchat/core/service/impl/AigcStructColServiceImpl.java
  14. 20 0
      langchat-core/src/main/java/cn/tycoding/langchat/core/service/impl/AigcStructRowServiceImpl.java
  15. 1 1
      langchat-core/src/main/java/cn/tycoding/langchat/core/service/impl/LangDocServiceImpl.java
  16. 1 1
      langchat-ui/src/api/aigc/embedding.ts
  17. 0 0
      langchat-upms/src/main/resources/mapper/AigcStructColService.xml

+ 7 - 0
langchat-aigc/pom.xml

@@ -39,6 +39,13 @@
             <groupId>com.mysql</groupId>
             <artifactId>mysql-connector-j</artifactId>
         </dependency>
+
+        <!-- Excel -->
+        <dependency>
+            <groupId>com.pig4cloud.excel</groupId>
+            <artifactId>excel-spring-boot-starter</artifactId>
+            <version>3.2.1</version>
+        </dependency>
     </dependencies>
 
 </project>

+ 19 - 0
langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/endpoint/EmbeddingEndpoint.java

@@ -5,6 +5,7 @@ import cn.tycoding.langchat.aigc.entity.AigcDocs;
 import cn.tycoding.langchat.aigc.entity.AigcDocsSlice;
 import cn.tycoding.langchat.aigc.entity.AigcOss;
 import cn.tycoding.langchat.aigc.enums.DocsTypeEnum;
+import cn.tycoding.langchat.aigc.listener.StructExcelListener;
 import cn.tycoding.langchat.aigc.service.AigcKnowledgeService;
 import cn.tycoding.langchat.aigc.service.AigcOssService;
 import cn.tycoding.langchat.common.dto.DocR;
@@ -12,6 +13,8 @@ import cn.tycoding.langchat.common.dto.EmbeddingR;
 import cn.tycoding.langchat.common.exception.ServiceException;
 import cn.tycoding.langchat.common.utils.R;
 import cn.tycoding.langchat.core.service.LangDocService;
+import com.alibaba.excel.EasyExcel;
+import java.io.IOException;
 import java.util.List;
 import lombok.AllArgsConstructor;
 import org.springframework.scheduling.annotation.Async;
@@ -85,4 +88,20 @@ public class EmbeddingEndpoint {
         aigcKnowledgeService.updateDocs(new AigcDocs().setId(data.getId()).setSliceStatus(true).setSliceNum(list.size()));
         return R.ok();
     }
+
+    @PostMapping("/struct/excel/{knowledgeId}")
+    public void structExcel(MultipartFile file, @PathVariable String knowledgeId)
+            throws IOException {
+//        AigcOss oss = aigcOssService.upload(file);
+//        AigcDocs data = new AigcDocs()
+//                .setName(oss.getFileName())
+//                .setSliceStatus(false)
+//                .setSize(oss.getSize())
+//                .setType(DocsTypeEnum.UPLOAD.name())
+//                .setKnowledgeId(knowledgeId);
+//        aigcKnowledgeService.addDocs(data);
+
+
+        EasyExcel.read(file.getInputStream(), new StructExcelListener()).sheet().doRead();
+    }
 }

+ 164 - 0
langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/listener/ImportExcelHelper.java

@@ -0,0 +1,164 @@
+package cn.tycoding.langchat.aigc.listener;
+
+import com.alibaba.excel.EasyExcel;
+import com.alibaba.excel.annotation.ExcelProperty;
+import com.alibaba.excel.enums.CellExtraTypeEnum;
+import com.alibaba.excel.metadata.CellExtra;
+import java.lang.reflect.Field;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicReference;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.web.multipart.MultipartFile;
+
+@Slf4j
+public class ImportExcelHelper<T> {
+
+    /**
+     * 返回解析后的List
+     *
+     * @return java.util.List<T> 解析后的List
+     * @param: fileName 文件名
+     * @param: clazz Excel对应属性名
+     * @param: sheetNo 要解析的sheet
+     * @param: headRowNumber 正文起始行
+     */
+    public List<T> getList(MultipartFile file, Class<T> clazz, Integer sheetNo,
+            Integer headRowNumber) {
+        ImportExcelListener<T> listener = new ImportExcelListener<>(headRowNumber);
+        CountDownLatch latch = new CountDownLatch(1);
+        new Thread(() -> {
+            try {
+                EasyExcel.read(file.getInputStream(), clazz, listener)
+                        .extraRead(CellExtraTypeEnum.MERGE).sheet(sheetNo)
+                        .headRowNumber(headRowNumber).doRead();
+            } catch (Exception e) {
+                log.error(e.getMessage());
+            }
+            latch.countDown();
+        }).start();
+        try {
+            latch.await();
+        } catch (InterruptedException e) {
+        }
+        List<CellExtra> extraMergeInfoList = listener.getExtraMergeInfoList();
+        //没有合并单元格情况,直接返回即可
+        if (isEmpty(extraMergeInfoList)) {
+            return listener.getData();
+        }
+        CountDownLatch computerLatch = new CountDownLatch(1);
+        AtomicReference<List<T>> data = new AtomicReference<>();
+        new Thread(() -> {
+            //存在有合并单元格时,自动获取值,并校对
+            data.set(explainMergeData(listener.getData(), extraMergeInfoList, headRowNumber));
+            computerLatch.countDown();
+        }).start();
+        try {
+            computerLatch.await();
+        } catch (InterruptedException e) {
+        }
+        return data.get();
+    }
+
+    /**
+     * 处理合并单元格
+     *
+     * @param data               解析数据
+     * @param extraMergeInfoList 合并单元格信息
+     * @param headRowNumber      起始行
+     * @return 填充好的解析数据
+     */
+    private List<T> explainMergeData(List<T> data, List<CellExtra> extraMergeInfoList,
+            Integer headRowNumber) {
+        //循环所有合并单元格信息
+        extraMergeInfoList.forEach(cellExtra -> {
+            int firstRowIndex = cellExtra.getFirstRowIndex() - headRowNumber;
+            int lastRowIndex = cellExtra.getLastRowIndex() - headRowNumber;
+            int firstColumnIndex = cellExtra.getFirstColumnIndex();
+            int lastColumnIndex = cellExtra.getLastColumnIndex();
+            //获取初始值
+            Object initValue = getInitValueFromList(firstRowIndex, firstColumnIndex, data);
+            //设置值
+            for (int i = firstRowIndex; i <= lastRowIndex; i++) {
+                for (int j = firstColumnIndex; j <= lastColumnIndex; j++) {
+                    setInitValueToList(initValue, i, j, data);
+                }
+            }
+        });
+        return data;
+    }
+
+    /**
+     * 设置合并单元格的值
+     *
+     * @param filedValue  值
+     * @param rowIndex    行
+     * @param columnIndex 列
+     * @param data        解析数据
+     */
+    private void setInitValueToList(Object filedValue, Integer rowIndex, Integer columnIndex,
+            List<T> data) {
+        if (rowIndex >= data.size()) {
+            return;
+        }
+        T object = data.get(rowIndex);
+
+        for (Field field : object.getClass().getDeclaredFields()) {
+            //提升反射性能,关闭安全检查
+            field.setAccessible(true);
+            ExcelProperty annotation = field.getAnnotation(ExcelProperty.class);
+            if (annotation != null) {
+                if (annotation.index() == columnIndex) {
+                    try {
+                        field.set(object, filedValue);
+                        break;
+                    } catch (IllegalAccessException e) {
+                        log.error("设置合并单元格的值异常:{}", e.getMessage());
+                    }
+                }
+            }
+        }
+    }
+
+
+    /**
+     * 获取合并单元格的初始值 rowIndex对应list的索引 columnIndex对应实体内的字段
+     *
+     * @param firstRowIndex    起始行
+     * @param firstColumnIndex 起始列
+     * @param data             列数据
+     * @return 初始值
+     */
+    private Object getInitValueFromList(Integer firstRowIndex, Integer firstColumnIndex,
+            List<T> data) {
+        Object filedValue = null;
+        T object = data.get(firstRowIndex);
+        for (Field field : object.getClass().getDeclaredFields()) {
+            //提升反射性能,关闭安全检查
+            field.setAccessible(true);
+            ExcelProperty annotation = field.getAnnotation(ExcelProperty.class);
+            if (annotation != null) {
+                if (annotation.index() == firstColumnIndex) {
+                    try {
+                        filedValue = field.get(object);
+                        break;
+                    } catch (IllegalAccessException e) {
+                        log.error("设置合并单元格的初始值异常:{}", e.getMessage());
+                    }
+                }
+            }
+        }
+        return filedValue;
+    }
+
+    /**
+     * 判断集合是否为空
+     *
+     * @param collection
+     * @return
+     */
+    private boolean isEmpty(Collection<?> collection) {
+        return collection == null || collection.isEmpty();
+    }
+}

+ 92 - 0
langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/listener/ImportExcelListener.java

@@ -0,0 +1,92 @@
+package cn.tycoding.langchat.aigc.listener;
+
+import com.alibaba.excel.context.AnalysisContext;
+import com.alibaba.excel.event.AnalysisEventListener;
+import com.alibaba.excel.metadata.CellExtra;
+import com.alibaba.fastjson.JSON;
+import java.util.ArrayList;
+import java.util.List;
+import lombok.extern.slf4j.Slf4j;
+
+/**
+ * Excel模板的读取监听类
+ * @param <T>
+ */
+@Slf4j
+public class ImportExcelListener<T> extends AnalysisEventListener<T> {
+
+    /**
+     * 解析的数据
+     */
+    private List<T> list = new ArrayList<>();
+
+    /**
+     * 正文起始行
+     */
+    private Integer headRowNumber;
+    /**
+     * 合并单元格
+     */
+    private List<CellExtra> extraMergeInfoList = new ArrayList<>();
+
+    public ImportExcelListener(Integer headRowNumber) {
+        this.headRowNumber = headRowNumber;
+    }
+
+    /**
+     * 这个每一条数据解析都会来调用
+     *
+     * @param data    one row value. Is is same as {@link AnalysisContext#readRowHolder()}
+     * @param context context
+     */
+    @Override
+    public void invoke(T data, AnalysisContext context) {
+        log.info("解析到一条数据: {}", JSON.toJSONString(data));
+        list.add(data);
+    }
+
+    /**
+     * 所有数据解析完成了 都会来调用
+     *
+     * @param context context
+     */
+    @Override
+    public void doAfterAllAnalysed(AnalysisContext context) {
+        log.info("所有数据解析完成!");
+    }
+
+    /**
+     * 返回解析出来的List
+     */
+    public List<T> getData() {
+        return list;
+    }
+
+    /**
+     * 读取额外信息:合并单元格
+     */
+    @Override
+    public void extra(CellExtra extra, AnalysisContext context) {
+        log.info("读取到了一条额外信息:{}", JSON.toJSONString(extra));
+        switch (extra.getType()) {
+            case MERGE: {
+                log.info(
+                        "额外信息是合并单元格,而且覆盖了一个区间,在firstRowIndex:{},firstColumnIndex;{},lastRowIndex:{},lastColumnIndex:{}",
+                        extra.getFirstRowIndex(), extra.getFirstColumnIndex(), extra.getLastRowIndex(),
+                        extra.getLastColumnIndex());
+                if (extra.getRowIndex() >= headRowNumber) {
+                    extraMergeInfoList.add(extra);
+                }
+                break;
+            }
+            default:
+        }
+    }
+
+    /**
+     * 返回解析出来的合并单元格List
+     */
+    public List<CellExtra> getExtraMergeInfoList() {
+        return extraMergeInfoList;
+    }
+}

+ 29 - 0
langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/listener/StructExcelListener.java

@@ -0,0 +1,29 @@
+package cn.tycoding.langchat.aigc.listener;
+
+import com.alibaba.excel.context.AnalysisContext;
+import com.alibaba.excel.event.AnalysisEventListener;
+import com.alibaba.excel.metadata.data.ReadCellData;
+import java.util.Map;
+
+/**
+ * @author tycoding
+ * @since 2024/4/27
+ */
+public class StructExcelListener extends AnalysisEventListener<Map<Integer, String>> {
+
+    @Override
+    public void invoke(Map<Integer, String> data, AnalysisContext analysisContext) {
+
+        System.out.println("----");
+    }
+
+    @Override
+    public void doAfterAllAnalysed(AnalysisContext context) {
+
+    }
+
+    @Override
+    public void invokeHead(Map<Integer, ReadCellData<?>> headMap, AnalysisContext context) {
+        System.out.println("----");
+    }
+}

+ 44 - 0
langchat-core/src/main/java/cn/tycoding/langchat/core/entity/AigcStructCol.java

@@ -0,0 +1,44 @@
+package cn.tycoding.langchat.core.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableId;
+import java.io.Serializable;
+import lombok.Data;
+import lombok.experimental.Accessors;
+
+/**
+ * @author tycoding
+ * @since 2024/4/15
+ */
+@Data
+@Accessors(chain = true)
+public class AigcStructCol implements Serializable {
+    private static final long serialVersionUID = 548724967827903685L;
+
+    /**
+     * 主键
+     */
+    @TableId(type = IdType.INPUT)
+    private String id;
+
+    /**
+     * 知识库ID
+     */
+    private String knowledgeId;
+
+    /**
+     * 文档ID
+     */
+    private String docsId;
+
+    /**
+     * 列名称
+     */
+    private String label;
+
+    /**
+     * 列索引
+     */
+    private Integer index;
+}
+

+ 44 - 0
langchat-core/src/main/java/cn/tycoding/langchat/core/entity/AigcStructRow.java

@@ -0,0 +1,44 @@
+package cn.tycoding.langchat.core.entity;
+
+import com.baomidou.mybatisplus.annotation.IdType;
+import com.baomidou.mybatisplus.annotation.TableId;
+import java.io.Serializable;
+import lombok.Data;
+import lombok.experimental.Accessors;
+
+/**
+ * @author tycoding
+ * @since 2024/4/15
+ */
+@Data
+@Accessors(chain = true)
+public class AigcStructRow implements Serializable {
+    private static final long serialVersionUID = 548724967827903685L;
+
+    /**
+     * 主键
+     */
+    @TableId(type = IdType.INPUT)
+    private String id;
+
+    /**
+     * 知识库ID
+     */
+    private String knowledgeId;
+
+    /**
+     * 文档ID
+     */
+    private String docsId;
+
+    /**
+     * 列ID
+     */
+    private String colId;
+
+    /**
+     * 行值
+     */
+    private String value;
+}
+

+ 15 - 0
langchat-core/src/main/java/cn/tycoding/langchat/core/mapper/AigcStructColMapper.java

@@ -0,0 +1,15 @@
+package cn.tycoding.langchat.core.mapper;
+
+import cn.tycoding.langchat.core.entity.AigcStructCol;
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * @author tycoding
+ * @since 2024/4/15
+ */
+@Mapper
+public interface AigcStructColMapper extends BaseMapper<AigcStructCol> {
+
+}
+

+ 15 - 0
langchat-core/src/main/java/cn/tycoding/langchat/core/mapper/AigcStructRowMapper.java

@@ -0,0 +1,15 @@
+package cn.tycoding.langchat.core.mapper;
+
+import cn.tycoding.langchat.core.entity.AigcStructRow;
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * @author tycoding
+ * @since 2024/4/15
+ */
+@Mapper
+public interface AigcStructRowMapper extends BaseMapper<AigcStructRow> {
+
+}
+

+ 13 - 0
langchat-core/src/main/java/cn/tycoding/langchat/core/service/AigcStructColService.java

@@ -0,0 +1,13 @@
+package cn.tycoding.langchat.core.service;
+
+import cn.tycoding.langchat.core.entity.AigcStructCol;
+import com.baomidou.mybatisplus.extension.service.IService;
+
+/**
+ * @author tycoding
+ * @since 2024/1/19
+ */
+public interface AigcStructColService extends IService<AigcStructCol> {
+
+}
+

+ 13 - 0
langchat-core/src/main/java/cn/tycoding/langchat/core/service/AigcStructRowService.java

@@ -0,0 +1,13 @@
+package cn.tycoding.langchat.core.service;
+
+import cn.tycoding.langchat.core.entity.AigcStructRow;
+import com.baomidou.mybatisplus.extension.service.IService;
+
+/**
+ * @author tycoding
+ * @since 2024/1/19
+ */
+public interface AigcStructRowService extends IService<AigcStructRow> {
+
+}
+

+ 2 - 2
langchat-core/src/main/java/cn/tycoding/langchat/core/service/LangDocService.java

@@ -22,9 +22,9 @@ public interface LangDocService {
     List<EmbeddingR> embeddingDocs(DocR req);
 
     /**
-     * 解析结构化文件向量
+     * 解析结构化文件
      */
-    void embeddingExcel(DocR req);
+    void embeddingStruct(DocR req);
 
     TokenStream search(DocR req);
 

+ 19 - 0
langchat-core/src/main/java/cn/tycoding/langchat/core/service/impl/AigcStructColServiceImpl.java

@@ -0,0 +1,19 @@
+package cn.tycoding.langchat.core.service.impl;
+
+import cn.tycoding.langchat.core.entity.AigcStructCol;
+import cn.tycoding.langchat.core.mapper.AigcStructColMapper;
+import cn.tycoding.langchat.core.service.AigcStructColService;
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import lombok.RequiredArgsConstructor;
+import org.springframework.stereotype.Service;
+
+/**
+ * @author tycoding
+ * @since 2024/1/19
+ */
+@Service
+@RequiredArgsConstructor
+public class AigcStructColServiceImpl extends ServiceImpl<AigcStructColMapper, AigcStructCol> implements AigcStructColService {
+
+}
+

+ 20 - 0
langchat-core/src/main/java/cn/tycoding/langchat/core/service/impl/AigcStructRowServiceImpl.java

@@ -0,0 +1,20 @@
+package cn.tycoding.langchat.core.service.impl;
+
+import cn.tycoding.langchat.core.entity.AigcStructRow;
+import cn.tycoding.langchat.core.mapper.AigcStructRowMapper;
+import cn.tycoding.langchat.core.service.AigcStructRowService;
+import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
+import lombok.RequiredArgsConstructor;
+import org.springframework.stereotype.Service;
+
+/**
+ * @author tycoding
+ * @since 2024/1/19
+ */
+@Service
+@RequiredArgsConstructor
+public class AigcStructRowServiceImpl extends ServiceImpl<AigcStructRowMapper, AigcStructRow> implements
+        AigcStructRowService {
+
+}
+

+ 1 - 1
langchat-core/src/main/java/cn/tycoding/langchat/core/service/impl/LangDocServiceImpl.java

@@ -85,7 +85,7 @@ public class LangDocServiceImpl implements LangDocService {
     }
 
     @Override
-    public void embeddingExcel(DocR req) {
+    public void embeddingStruct(DocR req) {
 
     }
 

+ 1 - 1
langchat-ui/src/api/aigc/embedding.ts

@@ -31,7 +31,7 @@ export function embeddingExcel(
   onUploadProgress?: (progressEvent: AxiosProgressEvent) => void
 ) {
   return http.request({
-    url: `/aigc/embedding/excel/${knowledgeId}`,
+    url: `/aigc/embedding/struct/excel/${knowledgeId}`,
     method: 'post',
     data,
     headers: {

langchat-upms/src/main/resources/mapper/SysMenuMapper.xml → langchat-upms/src/main/resources/mapper/AigcStructColService.xml