Przeglądaj źródła

适配embedding

tycoding 1 rok temu
rodzic
commit
34292e6def

+ 22 - 17
docs/langchat.sql

@@ -266,13 +266,16 @@ DROP TABLE IF EXISTS `aigc_oss`;
 CREATE TABLE `aigc_oss` (
                             `id` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '主键',
                             `user_id` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '用户ID',
-                            `file_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '原始文件名称',
-                            `target_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件存储名称',
-                            `bucket` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '桶路径',
+                            `oss_id` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL,
+                            `original_filename` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '原始文件名称',
+                            `filename` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件存储名称',
                             `url` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件地址',
+                            `base_path` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '桶路径',
                             `path` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件的绝对路径',
-                            `type` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件类型',
-                            `size` bigint DEFAULT NULL COMMENT '文件大小',
+                            `size` int DEFAULT NULL COMMENT '文件大小',
+                            `ext` varchar(50) DEFAULT NULL COMMENT '文件后缀',
+                            `content_type` varchar(100) DEFAULT NULL COMMENT '文件头',
+                            `platform` varchar(50) DEFAULT NULL COMMENT '平台',
                             `create_time` datetime DEFAULT NULL COMMENT '创建时间',
                             PRIMARY KEY (`id`) USING BTREE
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='资源文件表';
@@ -281,9 +284,10 @@ CREATE TABLE `aigc_oss` (
 -- Records of aigc_oss
 -- ----------------------------
 BEGIN;
-INSERT INTO `aigc_oss` (`id`, `user_id`, `file_name`, `target_name`, `bucket`, `url`, `path`, `type`, `size`, `create_time`) VALUES ('1c6883ff0f5f005f5c72700fd4423ad3', '1', 'story-about-happy-carrot', 'story-about-happy-carrot.pdf', '/20240606', 'http://cdn.tycoding.cn/story-about-happy-carrot.pdf', '/opt/homebrew/var/www/20240606/story-about-happy-carrot.pdf', 'pdf', 35359, '2024-06-12 12:27:30');
+INSERT INTO `aigc_oss` (`id`, `user_id`, `oss_id`, `original_filename`, `filename`, `url`, `base_path`, `path`, `size`, `ext`, `content_type`, `platform`, `create_time`) VALUES ('1c6883ff0f5f005f5c72700fd4423ad3', '1', '', 'story-about-happy-carrot', 'story-about-happy-carrot', 'http://cdn.tycoding.cn/story-about-happy-carrot.pdf', '/20240606', '/opt/homebrew/var/www/20240606/4a4d7dffe42ccc67ee0a4560901e83db.pdf', 35359, 'pdf', NULL, NULL, '2024-06-12 12:27:30');
 COMMIT;
 
+
 -- ----------------------------
 -- Table structure for aigc_prompt
 -- ----------------------------
@@ -479,17 +483,18 @@ COMMIT;
 -- ----------------------------
 DROP TABLE IF EXISTS `sys_oss`;
 CREATE TABLE `sys_oss` (
-                           `id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键',
-                           `user_id` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '操作用户ID',
-                           `file_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '原始文件名称',
-                           `target_name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件存储名称',
-                           `bucket` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '桶路径',
-                           `url` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件地址',
-                           `path` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件的绝对路径',
-                           `type` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件类型',
-                           `size` bigint DEFAULT NULL COMMENT '文件大小',
-                           `des` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件描述',
-                           `channel` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件来源渠道 input/output',
+                           `id` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci NOT NULL COMMENT '主键',
+                           `user_id` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '用户ID',
+                           `oss_id` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL,
+                           `original_filename` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '原始文件名称',
+                           `filename` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件存储名称',
+                           `url` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件地址',
+                           `base_path` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '桶路径',
+                           `path` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci DEFAULT NULL COMMENT '文件的绝对路径',
+                           `size` int DEFAULT NULL COMMENT '文件大小',
+                           `ext` varchar(50) DEFAULT NULL COMMENT '文件后缀',
+                           `content_type` varchar(100) DEFAULT NULL COMMENT '文件头',
+                           `platform` varchar(50) DEFAULT NULL COMMENT '平台',
                            `create_time` datetime DEFAULT NULL COMMENT '创建时间',
                            PRIMARY KEY (`id`) USING BTREE
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='资源文件表';

+ 6 - 0
langchat-core/src/main/java/cn/tycoding/langchat/core/consts/EmbedConst.java

@@ -8,4 +8,10 @@ public interface EmbedConst {
 
     String KNOWLEDGE = "knowledgeId";
     String FILENAME = "docsName";
+
+    String CLAZZ_NAME_OPENAI = "OpenAiEmbeddingModel";
+    String CLAZZ_NAME_AZURE_OPENAI = "AzureOpenAiEmbeddingModel";
+    String CLAZZ_NAME_QIANFAN = "QianfanEmbeddingModel";
+    String CLAZZ_NAME_QIANWEN = "QwenEmbeddingModel";
+    String CLAZZ_NAME_OLLAMA = "OllamaEmbeddingModel";
 }

+ 12 - 8
langchat-core/src/main/java/cn/tycoding/langchat/core/provider/EmbedProvider.java

@@ -1,6 +1,7 @@
 package cn.tycoding.langchat.core.provider;
 
 import cn.tycoding.langchat.biz.component.ProviderEnum;
+import cn.tycoding.langchat.core.consts.EmbedConst;
 import dev.langchain4j.data.document.DocumentSplitter;
 import dev.langchain4j.data.document.splitter.DocumentSplitters;
 import dev.langchain4j.model.azure.AzureOpenAiTokenizer;
@@ -34,17 +35,20 @@ public class EmbedProvider {
     }
 
     public EmbeddingModel embed() {
-        if (context.containsBean("OpenAiEmbeddingModel")) {
-            return (EmbeddingModel) context.getBean("OpenAiEmbeddingModel");
+        if (context.containsBean(EmbedConst.CLAZZ_NAME_OPENAI)) {
+            return (EmbeddingModel) context.getBean(EmbedConst.CLAZZ_NAME_OPENAI);
         }
-        if (context.containsBean("AzureOpenAiEmbeddingModel")) {
-            return (EmbeddingModel) context.getBean("AzureOpenAiEmbeddingModel");
+        if (context.containsBean(EmbedConst.CLAZZ_NAME_AZURE_OPENAI)) {
+            return (EmbeddingModel) context.getBean(EmbedConst.CLAZZ_NAME_AZURE_OPENAI);
         }
-        if (context.containsBean("QianfanEmbeddingModel")) {
-            return (EmbeddingModel) context.getBean("QianfanEmbeddingModel");
+        if (context.containsBean(EmbedConst.CLAZZ_NAME_QIANFAN)) {
+            return (EmbeddingModel) context.getBean(EmbedConst.CLAZZ_NAME_QIANFAN);
         }
-        if (context.containsBean("QwenEmbeddingModel")) {
-            return (EmbeddingModel) context.getBean("QwenEmbeddingModel");
+        if (context.containsBean(EmbedConst.CLAZZ_NAME_QIANWEN)) {
+            return (EmbeddingModel) context.getBean(EmbedConst.CLAZZ_NAME_QIANWEN);
+        }
+        if (context.containsBean(EmbedConst.CLAZZ_NAME_OLLAMA)) {
+            return (EmbeddingModel) context.getBean(EmbedConst.CLAZZ_NAME_OLLAMA);
         }
         throw new RuntimeException("No matching embedding model information found, please check the model configuration.");
     }

+ 21 - 4
langchat-core/src/main/java/cn/tycoding/langchat/core/provider/ProviderInitialize.java

@@ -5,11 +5,13 @@ import cn.tycoding.langchat.biz.component.ProviderEnum;
 import cn.tycoding.langchat.biz.entity.AigcModel;
 import cn.tycoding.langchat.biz.service.AigcModelService;
 import cn.tycoding.langchat.common.component.SpringContextHolder;
+import cn.tycoding.langchat.core.consts.EmbedConst;
 import dev.langchain4j.model.azure.AzureOpenAiEmbeddingModel;
 import dev.langchain4j.model.azure.AzureOpenAiImageModel;
 import dev.langchain4j.model.azure.AzureOpenAiStreamingChatModel;
 import dev.langchain4j.model.dashscope.QwenEmbeddingModel;
 import dev.langchain4j.model.dashscope.QwenStreamingChatModel;
+import dev.langchain4j.model.ollama.OllamaEmbeddingModel;
 import dev.langchain4j.model.ollama.OllamaStreamingChatModel;
 import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
 import dev.langchain4j.model.openai.OpenAiImageModel;
@@ -43,6 +45,12 @@ public class ProviderInitialize implements ApplicationContextAware {
     }
 
     public void init() {
+        // delete embedding model
+        contextHolder.unregisterBean(EmbedConst.CLAZZ_NAME_OPENAI);
+        contextHolder.unregisterBean(EmbedConst.CLAZZ_NAME_AZURE_OPENAI);
+        contextHolder.unregisterBean(EmbedConst.CLAZZ_NAME_QIANFAN);
+        contextHolder.unregisterBean(EmbedConst.CLAZZ_NAME_QIANWEN);
+
         List<AigcModel> list = aigcModelService.list();
         list.forEach(model -> {
             // Uninstall previously registered beans before registering them
@@ -195,7 +203,7 @@ public class ProviderInitialize implements ApplicationContextAware {
                             .modelName(model.getModel())
                             .dimensions(model.getDimensions())
                             .build();
-                    contextHolder.registerBean("OpenAiEmbeddingModel", build);
+                    contextHolder.registerBean(EmbedConst.CLAZZ_NAME_OPENAI, build);
                 }
 
                 if (ProviderEnum.AZURE_OPENAI.getModel().equals(model.getModelType())) {
@@ -204,7 +212,7 @@ public class ProviderInitialize implements ApplicationContextAware {
                             .apiKey(model.getApiKey())
                             .deploymentName(model.getBaseUrl())
                             .build();
-                    contextHolder.registerBean("AzureOpenAiEmbeddingModel", build);
+                    contextHolder.registerBean(EmbedConst.CLAZZ_NAME_AZURE_OPENAI, build);
                 }
 
                 if (ProviderEnum.BAIDU.getModel().equals(model.getModelType())) {
@@ -214,7 +222,7 @@ public class ProviderInitialize implements ApplicationContextAware {
                             .modelName(model.getModel())
                             .secretKey(model.getSecretKey())
                             .build();
-                    contextHolder.registerBean("QianfanEmbeddingModel", build);
+                    contextHolder.registerBean(EmbedConst.CLAZZ_NAME_QIANFAN, build);
                 }
 
                 if (ProviderEnum.ALIBABA.getModel().equals(model.getModelType())) {
@@ -223,7 +231,16 @@ public class ProviderInitialize implements ApplicationContextAware {
                             .apiKey(model.getApiKey())
                             .modelName(model.getModel())
                             .build();
-                    contextHolder.registerBean("QwenEmbeddingModel", build);
+                    contextHolder.registerBean(EmbedConst.CLAZZ_NAME_QIANWEN, build);
+                }
+
+                if (ProviderEnum.OLLAMA.getModel().equals(model.getModelType())) {
+                    OllamaEmbeddingModel build = OllamaEmbeddingModel
+                            .builder()
+                            .baseUrl(model.getBaseUrl())
+                            .modelName(model.getModel())
+                            .build();
+                    contextHolder.registerBean(EmbedConst.CLAZZ_NAME_OLLAMA, build);
                 }
             }
         });