Browse Source

add docs embedding

tycoding 1 year ago
parent
commit
33a77c23bb
19 changed files with 332 additions and 89 deletions
  1. 2 1
      langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/controller/AigcDocsSliceController.java
  2. 46 4
      langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/endpoint/EmbeddingEndpoint.java
  3. 3 1
      langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/entity/AigcDocs.java
  4. 1 1
      langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/entity/AigcOss.java
  5. 11 0
      langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/enums/DocsTypeEnum.java
  6. 4 0
      langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/service/AigcKnowledgeService.java
  7. 11 1
      langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/service/impl/AigcKnowledgeServiceImpl.java
  8. 2 1
      langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/service/impl/AigcOssServiceImpl.java
  9. 2 3
      langchat-common/src/main/java/cn/tycoding/langchat/common/dto/DocR.java
  10. 2 1
      langchat-core/src/main/java/cn/tycoding/langchat/core/service/LangDocService.java
  11. 19 12
      langchat-core/src/main/java/cn/tycoding/langchat/core/service/impl/LangDocServiceImpl.java
  12. 6 11
      langchat-server/src/main/java/cn/tycoding/langchat/aigc/endpoint/DocsEndpoint.java
  13. 92 4
      langchat-ui/package-lock.json
  14. 17 0
      langchat-ui/src/api/aigc/embedding.ts
  15. 0 17
      langchat-ui/src/api/aigc/slice.ts
  16. 34 6
      langchat-ui/src/views/aigc/knowledge/components/DocsSlice/columns.ts
  17. 26 6
      langchat-ui/src/views/aigc/knowledge/components/DocsSlice/index.vue
  18. 9 16
      langchat-ui/src/views/aigc/knowledge/components/ImportFile/components/DocImport.vue
  19. 45 4
      langchat-ui/src/views/aigc/knowledge/components/ImportFile/components/ExcelImport.vue

+ 2 - 1
langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/controller/AigcDocsSliceController.java

@@ -8,6 +8,7 @@ import cn.tycoding.langchat.common.utils.QueryPage;
 import cn.tycoding.langchat.common.utils.R;
 import com.baomidou.mybatisplus.core.toolkit.Wrappers;
 import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
+import java.util.Date;
 import lombok.RequiredArgsConstructor;
 import org.springframework.web.bind.annotation.*;
 
@@ -46,7 +47,7 @@ public class AigcDocsSliceController {
 
     @PostMapping
     public R add(@RequestBody AigcDocsSlice data) {
-        data.setCreateTime(String.valueOf(System.currentTimeMillis()));
+        data.setCreateTime(new Date());
         docsSliceMapper.insert(data);
         return R.ok();
     }

+ 46 - 4
langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/endpoint/EmbeddingEndpoint.java

@@ -3,16 +3,24 @@ package cn.tycoding.langchat.aigc.endpoint;
 import cn.hutool.core.util.StrUtil;
 import cn.tycoding.langchat.aigc.entity.AigcDocs;
 import cn.tycoding.langchat.aigc.entity.AigcDocsSlice;
+import cn.tycoding.langchat.aigc.entity.AigcOss;
+import cn.tycoding.langchat.aigc.enums.DocsTypeEnum;
 import cn.tycoding.langchat.aigc.service.AigcKnowledgeService;
+import cn.tycoding.langchat.aigc.service.AigcOssService;
 import cn.tycoding.langchat.common.dto.DocR;
 import cn.tycoding.langchat.common.dto.EmbeddingR;
 import cn.tycoding.langchat.common.exception.ServiceException;
+import cn.tycoding.langchat.common.utils.R;
 import cn.tycoding.langchat.core.service.LangDocService;
+import java.util.List;
 import lombok.AllArgsConstructor;
+import org.springframework.scheduling.annotation.Async;
+import org.springframework.web.bind.annotation.PathVariable;
 import org.springframework.web.bind.annotation.PostMapping;
 import org.springframework.web.bind.annotation.RequestBody;
 import org.springframework.web.bind.annotation.RequestMapping;
 import org.springframework.web.bind.annotation.RestController;
+import org.springframework.web.multipart.MultipartFile;
 
 /**
  * @author tycoding
@@ -25,16 +33,20 @@ public class EmbeddingEndpoint {
 
     private final LangDocService langDocService;
     private final AigcKnowledgeService aigcKnowledgeService;
+    private final AigcOssService aigcOssService;
 
+    @Async
     @PostMapping("/text")
-    public void text(@RequestBody AigcDocs data) {
+    public R text(@RequestBody AigcDocs data) {
         if (StrUtil.isBlankIfStr(data.getContent())) {
             throw new ServiceException("文档内容不能为空");
         }
+        data.setType(DocsTypeEnum.INPUT.name()).setSliceStatus(false);
         aigcKnowledgeService.addDocs(data);
-        EmbeddingR embeddingR = langDocService.embeddingText(new DocR().setMessage(data.getContent())
-                .setId(data.getId())
-                .setKnowledgeId(data.getKnowledgeId()));
+        EmbeddingR embeddingR = langDocService.embeddingText(
+                new DocR().setMessage(data.getContent())
+                        .setId(data.getId())
+                        .setKnowledgeId(data.getKnowledgeId()));
         aigcKnowledgeService.addDocsSlice(new AigcDocsSlice()
                 .setKnowledgeId(data.getKnowledgeId())
                 .setDocsId(data.getId())
@@ -42,5 +54,35 @@ public class EmbeddingEndpoint {
                 .setName(data.getName())
                 .setContent(embeddingR.getText())
         );
+
+        aigcKnowledgeService.updateDocs(new AigcDocs().setId(data.getId()).setSliceStatus(true).setSliceNum(1));
+        return R.ok();
+    }
+
+    @PostMapping("/docs/{knowledgeId}")
+    public R docs(MultipartFile file, @PathVariable String knowledgeId) {
+        AigcOss oss = aigcOssService.upload(file);
+        AigcDocs data = new AigcDocs()
+                .setName(oss.getFileName())
+                .setSliceStatus(false)
+                .setSize(oss.getSize())
+                .setType(DocsTypeEnum.UPLOAD.name())
+                .setKnowledgeId(knowledgeId);
+        aigcKnowledgeService.addDocs(data);
+
+        List<EmbeddingR> list = langDocService.embeddingDocs(
+                new DocR().setKnowledgeId(knowledgeId).setPath(oss.getPath()));
+        list.forEach(i -> {
+            aigcKnowledgeService.addDocsSlice(new AigcDocsSlice()
+                    .setKnowledgeId(data.getKnowledgeId())
+                    .setDocsId(data.getId())
+                    .setVectorId(i.getVectorId())
+                    .setName(data.getName())
+                    .setContent(i.getText())
+            );
+        });
+
+        aigcKnowledgeService.updateDocs(new AigcDocs().setId(data.getId()).setSliceStatus(true).setSliceNum(list.size()));
+        return R.ok();
     }
 }

+ 3 - 1
langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/entity/AigcDocs.java

@@ -6,12 +6,14 @@ import lombok.Data;
 
 import java.io.Serializable;
 import java.util.Date;
+import lombok.experimental.Accessors;
 
 /**
  * @author tycoding
  * @since 2024/4/15
  */
 @Data
+@Accessors(chain = true)
 public class AigcDocs implements Serializable {
     private static final long serialVersionUID = 548724967827903685L;
 
@@ -44,7 +46,7 @@ public class AigcDocs implements Serializable {
     /**
      * 文件大小
      */
-    private Integer size;
+    private Long size;
 
     /**
      * 切片数量

+ 1 - 1
langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/entity/AigcOss.java

@@ -25,7 +25,7 @@ public class AigcOss extends OssR {
     /**
      * 用户ID
      */
-    private Long userId;
+    private String userId;
 
     /**
      * 文件描述

+ 11 - 0
langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/enums/DocsTypeEnum.java

@@ -0,0 +1,11 @@
+package cn.tycoding.langchat.aigc.enums;
+
+/**
+ * @author tycoding
+ * @since 2024/4/26
+ */
+public enum DocsTypeEnum {
+
+    INPUT,
+    UPLOAD,;
+}

+ 4 - 0
langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/service/AigcKnowledgeService.java

@@ -16,9 +16,13 @@ public interface AigcKnowledgeService extends IService<AigcKnowledge> {
      */
     void addDocs(AigcDocs data);
 
+    void updateDocs(AigcDocs data);
+
     /**
      * 在指定文档中添加Embedding后的切片数据
      */
     void addDocsSlice(AigcDocsSlice data);
+
+    void updateDocsSlice(AigcDocsSlice data);
 }
 

+ 11 - 1
langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/service/impl/AigcKnowledgeServiceImpl.java

@@ -27,12 +27,17 @@ public class AigcKnowledgeServiceImpl extends ServiceImpl<AigcKnowledgeMapper, A
     @Override
     public void addDocs(AigcDocs data) {
         data.setCreateTime(new Date());
-        data.setSize(0);
+        data.setSize(0L);
         data.setSliceNum(0);
         data.setSliceStatus(false);
         aigcDocsMapper.insert(data);
     }
 
+    @Override
+    public void updateDocs(AigcDocs data) {
+        aigcDocsMapper.updateById(data);
+    }
+
     @Override
     public void addDocsSlice(AigcDocsSlice data) {
         data.setCreateTime(new Date())
@@ -41,5 +46,10 @@ public class AigcKnowledgeServiceImpl extends ServiceImpl<AigcKnowledgeMapper, A
         ;
         aigcDocsSliceMapper.insert(data);
     }
+
+    @Override
+    public void updateDocsSlice(AigcDocsSlice data) {
+        aigcDocsSliceMapper.updateById(data);
+    }
 }
 

+ 2 - 1
langchat-aigc/src/main/java/cn/tycoding/langchat/aigc/service/impl/AigcOssServiceImpl.java

@@ -3,6 +3,7 @@ package cn.tycoding.langchat.aigc.service.impl;
 import cn.tycoding.langchat.aigc.entity.AigcOss;
 import cn.tycoding.langchat.aigc.mapper.AigcOssMapper;
 import cn.tycoding.langchat.aigc.service.AigcOssService;
+import cn.tycoding.langchat.aigc.utils.AigcAuthUtil;
 import cn.tycoding.langchat.common.dto.OssR;
 import cn.tycoding.langchat.common.properties.OssProps;
 import cn.tycoding.langchat.common.utils.OssUtil;
@@ -27,7 +28,7 @@ public class AigcOssServiceImpl extends ServiceImpl<AigcOssMapper, AigcOss> impl
         OssR ossR = OssUtil.transfer(ossProps, file);
         AigcOss oss = new AigcOss();
         BeanUtils.copyProperties(ossR, oss);
-        //TODO 增加userId
+        oss.setUserId(AigcAuthUtil.getUserId());
         this.save(oss);
         return oss;
     }

+ 2 - 3
langchat-common/src/main/java/cn/tycoding/langchat/common/dto/DocR.java

@@ -3,7 +3,6 @@ package cn.tycoding.langchat.common.dto;
 import cn.tycoding.langchat.common.utils.StreamEmitter;
 import dev.langchain4j.model.input.Prompt;
 import lombok.Data;
-import lombok.EqualsAndHashCode;
 import lombok.experimental.Accessors;
 
 /**
@@ -12,13 +11,13 @@ import lombok.experimental.Accessors;
  */
 @Data
 @Accessors(chain = true)
-@EqualsAndHashCode(callSuper = true)
-public class DocR extends OssR {
+public class DocR {
     private static final long serialVersionUID = 4885562458437352308L;
 
     private String id;
 
     private String knowledgeId;
+    private String path;
 
     private Prompt prompt;
 

+ 2 - 1
langchat-core/src/main/java/cn/tycoding/langchat/core/service/LangDocService.java

@@ -3,6 +3,7 @@ package cn.tycoding.langchat.core.service;
 import cn.tycoding.langchat.common.dto.DocR;
 import cn.tycoding.langchat.common.dto.EmbeddingR;
 import dev.langchain4j.service.TokenStream;
+import java.util.List;
 
 /**
  * @author tycoding
@@ -18,7 +19,7 @@ public interface LangDocService {
     /**
      * 解析文本文件向量
      */
-    void embeddingDocs(DocR req);
+    List<EmbeddingR> embeddingDocs(DocR req);
 
     /**
      * 解析结构化文件向量

+ 19 - 12
langchat-core/src/main/java/cn/tycoding/langchat/core/service/impl/LangDocServiceImpl.java

@@ -1,12 +1,16 @@
 package cn.tycoding.langchat.core.service.impl;
 
+import static dev.langchain4j.data.document.Metadata.metadata;
+import static dev.langchain4j.model.openai.OpenAiModelName.GPT_3_5_TURBO;
+import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.metadataKey;
+
 import cn.tycoding.langchat.common.dto.DocR;
 import cn.tycoding.langchat.common.dto.EmbeddingR;
 import cn.tycoding.langchat.core.enums.ModelConst;
 import cn.tycoding.langchat.core.provider.EmbedProvider;
 import cn.tycoding.langchat.core.provider.ModelProvider;
-import cn.tycoding.langchat.core.service.LangDocService;
 import cn.tycoding.langchat.core.service.Assistant;
+import cn.tycoding.langchat.core.service.LangDocService;
 import dev.langchain4j.data.document.Document;
 import dev.langchain4j.data.document.DocumentSplitter;
 import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
@@ -25,17 +29,13 @@ import dev.langchain4j.service.AiServices;
 import dev.langchain4j.service.TokenStream;
 import dev.langchain4j.store.embedding.filter.Filter;
 import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Function;
 import lombok.AllArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.stereotype.Service;
 
-import java.util.List;
-import java.util.function.Function;
-
-import static dev.langchain4j.data.document.Metadata.metadata;
-import static dev.langchain4j.model.openai.OpenAiModelName.GPT_3_5_TURBO;
-import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.metadataKey;
-
 /**
  * @author tycoding
  * @since 2024/4/4
@@ -61,11 +61,12 @@ public class LangDocServiceImpl implements LangDocService {
     }
 
     @Override
-    public void embeddingDocs(DocR req) {
-        EmbeddingModel model = provider.embed();
+    public List<EmbeddingR> embeddingDocs(DocR req) {
+//        EmbeddingModel model = provider.embed();
+        EmbeddingModel model = new AllMiniLmL6V2EmbeddingModel();
 
         Document document = FileSystemDocumentLoader.loadDocument(req.getPath(), new ApacheTikaDocumentParser());
-        document.metadata().add("id", req.getId());
+        document.metadata().add("knowledgeId", req.getKnowledgeId());
 
         DocumentSplitter splitter = DocumentSplitters.recursive(
                 100,
@@ -74,7 +75,13 @@ public class LangDocServiceImpl implements LangDocService {
         );
         List<TextSegment> segments = splitter.split(document);
         List<Embedding> embeddings = model.embedAll(segments).content();
-        milvusEmbeddingStore.addAll(embeddings, segments);
+        List<String> ids = milvusEmbeddingStore.addAll(embeddings, segments);
+
+        List<EmbeddingR> list = new ArrayList<>();
+        for (int i = 0; i < ids.size(); i++) {
+            list.add(new EmbeddingR().setVectorId(ids.get(i)).setText(segments.get(i).text()));
+        }
+        return list;
     }
 
     @Override

+ 6 - 11
langchat-server/src/main/java/cn/tycoding/langchat/aigc/endpoint/DocsEndpoint.java

@@ -1,6 +1,5 @@
 package cn.tycoding.langchat.aigc.endpoint;
 
-import cn.tycoding.langchat.aigc.entity.AigcOss;
 import cn.tycoding.langchat.aigc.service.AigcOssService;
 import cn.tycoding.langchat.aigc.service.ChatService;
 import cn.tycoding.langchat.common.component.AsyncFuture;
@@ -10,8 +9,11 @@ import cn.tycoding.langchat.common.utils.PromptUtil;
 import cn.tycoding.langchat.common.utils.R;
 import cn.tycoding.langchat.common.utils.StreamEmitter;
 import lombok.AllArgsConstructor;
-import org.springframework.web.bind.annotation.*;
-import org.springframework.web.multipart.MultipartFile;
+import org.springframework.web.bind.annotation.GetMapping;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
 import org.springframework.web.servlet.mvc.method.annotation.SseEmitter;
 
 /**
@@ -36,14 +38,7 @@ public class DocsEndpoint {
         return emitter.get();
     }
 
-    @PostMapping("/upload")
-    public R upload(MultipartFile file) {
-        AigcOss oss = aigcOssService.upload(file);
-        asyncFuture.async(() -> {
-            chatService.docsEmbed(oss);
-        }, "111", oss.getId());
-        return R.ok(oss);
-    }
+
 
     @GetMapping("/task")
     public R task() {

+ 92 - 4
langchat-ui/package-lock.json

@@ -9,6 +9,7 @@
       "version": "1.9.0",
       "dependencies": {
         "@iconify/vue": "^4.1.1",
+        "@traptitech/markdown-it-katex": "^3.6.0",
         "@types/uuid": "^9.0.2",
         "@vicons/antd": "^0.12.0",
         "@vicons/ionicons5": "^0.12.0",
@@ -18,7 +19,10 @@
         "date-fns": "^2.30.0",
         "echarts": "^5.4.3",
         "element-resize-detector": "^1.2.4",
+        "highlight.js": "^11.9.0",
         "lodash-es": "^4.17.21",
+        "markdown-it": "^14.1.0",
+        "markdown-it-link-attributes": "^4.0.1",
         "mitt": "^3.0.1",
         "mockjs": "^1.1.0",
         "naive-ui": "^2.36.0",
@@ -28,7 +32,8 @@
         "vfonts": "^0.0.3",
         "vue": "^3.3.4",
         "vue-router": "^4.2.4",
-        "vue-types": "^4.2.1"
+        "vue-types": "^4.2.1",
+        "vue3-tree-org": "^4.2.2"
       },
       "devDependencies": {
         "@commitlint/cli": "^17.7.0",
@@ -2226,6 +2231,14 @@
         "@sinonjs/commons": "^3.0.0"
       }
     },
+    "node_modules/@traptitech/markdown-it-katex": {
+      "version": "3.6.0",
+      "resolved": "https://registry.npmmirror.com/@traptitech/markdown-it-katex/-/markdown-it-katex-3.6.0.tgz",
+      "integrity": "sha512-CnJzTWxsgLGXFdSrWRaGz7GZ1kUUi8g3E9HzJmeveX1YwVJavrKYqysktfHZQsujdnRqV5O7g8FPKEA/aeTkOQ==",
+      "dependencies": {
+        "katex": "^0.16.0"
+      }
+    },
     "node_modules/@tsconfig/node10": {
       "version": "1.0.11",
       "resolved": "https://registry.npmmirror.com/@tsconfig/node10/-/node10-1.0.11.tgz",
@@ -3153,8 +3166,7 @@
     "node_modules/argparse": {
       "version": "2.0.1",
       "resolved": "https://registry.npmmirror.com/argparse/-/argparse-2.0.1.tgz",
-      "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
-      "dev": true
+      "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
     },
     "node_modules/array-ify": {
       "version": "1.0.0",
@@ -4078,7 +4090,6 @@
       "version": "3.36.1",
       "resolved": "https://registry.npmmirror.com/core-js/-/core-js-3.36.1.tgz",
       "integrity": "sha512-BTvUrwxVBezj5SZ3f10ImnX2oRByMxql3EimVqMysepbC9EeMUOpLwdy6Eoili2x6E4kf+ZUB5k/+Jv55alPfA==",
-      "dev": true,
       "hasInstallScript": true
     },
     "node_modules/cosmiconfig": {
@@ -7633,6 +7644,25 @@
         "node": "*"
       }
     },
+    "node_modules/katex": {
+      "version": "0.16.10",
+      "resolved": "https://registry.npmmirror.com/katex/-/katex-0.16.10.tgz",
+      "integrity": "sha512-ZiqaC04tp2O5utMsl2TEZTXxa6WSC4yo0fv5ML++D3QZv/vx2Mct0mTlRx3O+uUkjfuAgOkzsCmq5MiUEsDDdA==",
+      "dependencies": {
+        "commander": "^8.3.0"
+      },
+      "bin": {
+        "katex": "cli.js"
+      }
+    },
+    "node_modules/katex/node_modules/commander": {
+      "version": "8.3.0",
+      "resolved": "https://registry.npmmirror.com/commander/-/commander-8.3.0.tgz",
+      "integrity": "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==",
+      "engines": {
+        "node": ">= 12"
+      }
+    },
     "node_modules/keyv": {
       "version": "4.5.4",
       "resolved": "https://registry.npmmirror.com/keyv/-/keyv-4.5.4.tgz",
@@ -7776,6 +7806,14 @@
       "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==",
       "dev": true
     },
+    "node_modules/linkify-it": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmmirror.com/linkify-it/-/linkify-it-5.0.0.tgz",
+      "integrity": "sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ==",
+      "dependencies": {
+        "uc.micro": "^2.0.0"
+      }
+    },
     "node_modules/lint-staged": {
       "version": "13.3.0",
       "resolved": "https://registry.npmmirror.com/lint-staged/-/lint-staged-13.3.0.tgz",
@@ -8331,12 +8369,38 @@
         "node": ">=8"
       }
     },
+    "node_modules/markdown-it": {
+      "version": "14.1.0",
+      "resolved": "https://registry.npmmirror.com/markdown-it/-/markdown-it-14.1.0.tgz",
+      "integrity": "sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==",
+      "dependencies": {
+        "argparse": "^2.0.1",
+        "entities": "^4.4.0",
+        "linkify-it": "^5.0.0",
+        "mdurl": "^2.0.0",
+        "punycode.js": "^2.3.1",
+        "uc.micro": "^2.1.0"
+      },
+      "bin": {
+        "markdown-it": "bin/markdown-it.mjs"
+      }
+    },
+    "node_modules/markdown-it-link-attributes": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmmirror.com/markdown-it-link-attributes/-/markdown-it-link-attributes-4.0.1.tgz",
+      "integrity": "sha512-pg5OK0jPLg62H4k7M9mRJLT61gUp9nvG0XveKYHMOOluASo9OEF13WlXrpAp2aj35LbedAy3QOCgQCw0tkLKAQ=="
+    },
     "node_modules/mathml-tag-names": {
       "version": "2.1.3",
       "resolved": "https://registry.npmmirror.com/mathml-tag-names/-/mathml-tag-names-2.1.3.tgz",
       "integrity": "sha512-APMBEanjybaPzUrfqU0IMU5I0AswKMH7k8OTLs0vvV4KZpExkTkY87nR/zpbuTPj+gARop7aGUbl11pnDfW6xg==",
       "dev": true
     },
+    "node_modules/mdurl": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmmirror.com/mdurl/-/mdurl-2.0.0.tgz",
+      "integrity": "sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w=="
+    },
     "node_modules/meow": {
       "version": "8.1.2",
       "resolved": "https://registry.npmmirror.com/meow/-/meow-8.1.2.tgz",
@@ -9568,6 +9632,14 @@
         "node": ">=6"
       }
     },
+    "node_modules/punycode.js": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmmirror.com/punycode.js/-/punycode.js-2.3.1.tgz",
+      "integrity": "sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
     "node_modules/pure-rand": {
       "version": "6.1.0",
       "resolved": "https://registry.npmmirror.com/pure-rand/-/pure-rand-6.1.0.tgz",
@@ -11282,6 +11354,11 @@
         "node": ">=4.2.0"
       }
     },
+    "node_modules/uc.micro": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmmirror.com/uc.micro/-/uc.micro-2.1.0.tgz",
+      "integrity": "sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A=="
+    },
     "node_modules/undici-types": {
       "version": "5.26.5",
       "resolved": "https://registry.npmmirror.com/undici-types/-/undici-types-5.26.5.tgz",
@@ -12268,6 +12345,17 @@
         "vue": "^2.0.0 || ^3.0.0"
       }
     },
+    "node_modules/vue3-tree-org": {
+      "version": "4.2.2",
+      "resolved": "https://registry.npmmirror.com/vue3-tree-org/-/vue3-tree-org-4.2.2.tgz",
+      "integrity": "sha512-AG2SykyD6dw0jIyqBm8iuF9j9GWli6KrwudxR1RjULCCBTDFsoNm7MmP/weKT7wowN/sPk+e2RsnvEJMw2OJMw==",
+      "dependencies": {
+        "core-js": "^3.6.5"
+      },
+      "peerDependencies": {
+        "vue": "^3.0.0"
+      }
+    },
     "node_modules/vuedraggable": {
       "version": "4.1.0",
       "resolved": "https://registry.npmmirror.com/vuedraggable/-/vuedraggable-4.1.0.tgz",

+ 17 - 0
langchat-ui/src/api/aigc/embedding.ts

@@ -1,4 +1,5 @@
 import { http } from '@/utils/http/axios';
+import { AxiosProgressEvent } from 'axios';
 
 export function embeddingText(params: any) {
   return http.request({
@@ -7,3 +8,19 @@ export function embeddingText(params: any) {
     params,
   });
 }
+
+export function embeddingDocs(
+  knowledgeId: string,
+  data: any,
+  onUploadProgress?: (progressEvent: AxiosProgressEvent) => void
+) {
+  return http.request({
+    url: `/aigc/embedding/docs/${knowledgeId}`,
+    method: 'post',
+    data,
+    headers: {
+      'Content-Type': 'multipart/form-data',
+    },
+    onUploadProgress,
+  });
+}

+ 0 - 17
langchat-ui/src/api/aigc/slice.ts

@@ -1,21 +1,4 @@
 import { http } from '@/utils/http/axios';
-import { AxiosProgressEvent } from 'axios';
-
-export function upload(
-  id: string,
-  data: any,
-  onUploadProgress?: (progressEvent: AxiosProgressEvent) => void
-) {
-  return http.request({
-    url: `/aigc/oss/put/${id}`,
-    method: 'post',
-    data,
-    headers: {
-      'Content-Type': 'multipart/form-data',
-    },
-    onUploadProgress,
-  });
-}
 
 export function list(params: any) {
   return http.request({

+ 34 - 6
langchat-ui/src/views/aigc/knowledge/components/DocsSlice/columns.ts

@@ -1,15 +1,42 @@
 import { BasicColumn } from '@/components/Table';
 import { FormSchema } from '@/components/Form';
+import { h } from 'vue';
+import { NTag } from 'naive-ui';
 
 export const columns: BasicColumn[] = [
   {
-    title: '文档描述',
-    key: 'des',
+    title: '文档名称',
+    key: 'name',
+    width: 150,
   },
   {
-    title: '文档内容',
+    title: '字符数',
+    key: 'wordNum',
+    width: 110,
+    align: 'center',
+  },
+  {
+    title: '切片内容',
     key: 'content',
   },
+  {
+    title: '切片状态',
+    key: 'status',
+    width: 100,
+    align: 'center',
+    render(row) {
+      return h(
+        NTag,
+        {
+          size: 'small',
+          type: row.status == true ? 'success' : 'info',
+        },
+        {
+          default: () => (row.status == true ? '已训练' : '未训练'),
+        }
+      );
+    },
+  },
   {
     title: '创建时间',
     key: 'createTime',
@@ -19,11 +46,12 @@ export const columns: BasicColumn[] = [
 
 export const searchSchemas: FormSchema[] = [
   {
-    field: 'des',
+    field: 'docsId',
     component: 'NInput',
-    label: '文档描述',
+    label: '文档',
+    slot: 'docsSlot',
     componentProps: {
-      placeholder: '请输入文档描述',
+      placeholder: '请选择文档',
     },
   },
 ];

+ 26 - 6
langchat-ui/src/views/aigc/knowledge/components/DocsSlice/index.vue

@@ -1,5 +1,5 @@
 <script lang="ts" setup>
-  import { h, reactive, ref } from 'vue';
+  import { h, onMounted, reactive, ref } from 'vue';
   import { BasicTable, TableAction } from '@/components/Table';
   import { BasicForm, useForm } from '@/components/Form';
   import { del, page as getPage } from '@/api/aigc/slice';
@@ -7,15 +7,16 @@
   import { DeleteOutlined } from '@vicons/antd';
   import { useDialog, useMessage } from 'naive-ui';
   import { useRouter } from 'vue-router';
+  import { list } from '@/api/aigc/docs';
 
   const router = useRouter();
   const message = useMessage();
   const dialog = useDialog();
-
   const actionRef = ref();
+  const docsList = ref();
 
   const actionColumn = reactive({
-    width: 150,
+    width: 100,
     title: '操作',
     key: 'action',
     fixed: 'right',
@@ -40,10 +41,13 @@
     showAdvancedButton: false,
     schemas: searchSchemas,
   });
+  onMounted(async () => {
+    docsList.value = await list({});
+  });
 
   const loadDataTable = async (res: any) => {
-    const kbId = router.currentRoute.value.params.id;
-    return await getPage({ ...getFieldsValue(), ...res, kbId });
+    const knowledgeId = router.currentRoute.value.params.id;
+    return await getPage({ ...getFieldsValue(), ...res, knowledgeId });
   };
 
   function reloadTable() {
@@ -68,11 +72,27 @@
   function handleReset(values: Recordable) {
     reloadTable();
   }
+  function handleSelectDocs(val: string) {
+    console.log(val);
+  }
 </script>
 
 <template>
   <n-card>
-    <BasicForm @register="register" @reset="handleReset" @submit="reloadTable" />
+    <BasicForm @register="register" @reset="handleReset" @submit="reloadTable">
+      <template #docsSlot="{ model, field }">
+        <n-select
+          v-model:value="model[field]"
+          :options="docsList"
+          filterable
+          clearable
+          label-field="name"
+          value-field="id"
+          placeholder="请选择文档查询"
+          @update:value="handleSelectDocs"
+        />
+      </template>
+    </BasicForm>
 
     <BasicTable
       ref="actionRef"

+ 9 - 16
langchat-ui/src/views/aigc/knowledge/components/ImportFile/components/DocImport.vue

@@ -2,24 +2,13 @@
   import { DownloadOutline } from '@vicons/ionicons5';
   import { useRouter } from 'vue-router';
   import { UploadCustomRequestOptions, useMessage } from 'naive-ui';
-  import { upload } from '@/api/aigc/slice';
+  import { embeddingDocs } from '@/api/aigc/embedding';
   import { ref } from 'vue';
 
   const router = useRouter();
   const message = useMessage();
   const fileList = ref<any[]>([]);
 
-  async function handleSubmit() {
-    if (fileList.value.length == 0) {
-      message.success('已经提交到知识库');
-      return;
-    }
-    console.log(fileList);
-    for (const i of fileList.value) {
-      // await add(i);
-    }
-  }
-
   const handleImport = ({
     file,
     data,
@@ -31,7 +20,7 @@
     onProgress,
   }: UploadCustomRequestOptions) => {
     const kbId = router.currentRoute.value.params.id;
-    upload(
+    embeddingDocs(
       String(kbId),
       {
         file: file.file,
@@ -57,8 +46,11 @@
 
 <template>
   <n-space vertical>
-    <n-button type="success" @click="handleSubmit">提交到知识库学习</n-button>
-    <n-upload :custom-request="handleImport" directory-dnd>
+    <n-upload
+      :custom-request="handleImport"
+      directory-dnd
+      accept=".doc,.docx,application/msword,application/vnd.openxmlformats-officedocument.wordprocessingml.document,.pdf"
+    >
       <n-upload-dragger>
         <div style="margin-bottom: 12px">
           <n-icon :depth="3" size="48">
@@ -67,7 +59,8 @@
         </div>
         <n-text style="font-size: 16px"> 点击或者拖动文件到该区域来上传</n-text>
         <n-p depth="3" style="margin: 8px 0 0 0">
-          请不要上传敏感数据,比如你的银行卡号和密码,信用卡号有效期和安全码
+          请上传文档文本类型的文件,文本类型文件将被单独处理和向量化,支持的文件格式有:.txt、 .md、
+          .docx、 .doc、.pdf
         </n-p>
       </n-upload-dragger>
     </n-upload>

+ 45 - 4
langchat-ui/src/views/aigc/knowledge/components/ImportFile/components/ExcelImport.vue

@@ -1,14 +1,55 @@
 <script setup lang="ts">
   import { DownloadOutline } from '@vicons/ionicons5';
+  import { UploadCustomRequestOptions, useMessage } from 'naive-ui';
+  import { embeddingDocs } from '@/api/aigc/embedding';
+  import { useRouter } from 'vue-router';
+  import { ref } from 'vue';
+
+  const router = useRouter();
+  const message = useMessage();
+  const fileList = ref<any[]>([]);
+
+  const handleImport = ({
+    file,
+    data,
+    headers,
+    withCredentials,
+    action,
+    onFinish,
+    onError,
+    onProgress,
+  }: UploadCustomRequestOptions) => {
+    const kbId = router.currentRoute.value.params.id;
+    embeddingDocs(
+      String(kbId),
+      {
+        file: file.file,
+      },
+      (progressEvent) => {
+        onProgress({
+          percent: Math.round((progressEvent.loaded * 100) / Number(progressEvent.total)),
+        });
+      }
+    )
+      .then((res) => {
+        console.log(res);
+        fileList.value.push(res);
+        message.success('上传成功');
+        onFinish();
+      })
+      .catch((err) => {
+        message.error('上传失败');
+        onError();
+      });
+  };
 </script>
 
 <template>
   <div>
     <n-upload
-      multiple
+      :custom-request="handleImport"
       directory-dnd
-      action="https://www.mocky.io/v2/5e4bafc63100007100d8b70f"
-      :max="5"
+      accept="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, application/vnd.ms-exce"
     >
       <n-upload-dragger>
         <div style="margin-bottom: 12px">
@@ -18,7 +59,7 @@
         </div>
         <n-text style="font-size: 16px"> 点击或者拖动文件到该区域来上传 </n-text>
         <n-p depth="3" style="margin: 8px 0 0 0">
-          请不要上传敏感数据,比如你的银行卡号和密码,信用卡号有效期和安全码
+          仅支持上传Excel文件,系统需要对Excel这种结构化文档数据单独处理和训练
         </n-p>
       </n-upload-dragger>
     </n-upload>