WebPage.ts 579 B

1234567891011121314151617181920212223242526
  1. import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
  2. import { BaseChunker } from './BaseChunker';
  3. interface TextSplitterChunkParams {
  4. chunkSize: number;
  5. chunkOverlap: number;
  6. keepSeparator: boolean;
  7. }
  8. const TEXT_SPLITTER_CHUNK_PARAMS: TextSplitterChunkParams = {
  9. chunkSize: 500,
  10. chunkOverlap: 0,
  11. keepSeparator: false,
  12. };
  13. class WebPageChunker extends BaseChunker {
  14. constructor() {
  15. const textSplitter = new RecursiveCharacterTextSplitter(
  16. TEXT_SPLITTER_CHUNK_PARAMS
  17. );
  18. super(textSplitter);
  19. }
  20. }
  21. export { WebPageChunker };