123456789101112131415161718192021222324252627282930313233343536373839404142 |
- # -*- coding: utf-8 -*-
- # @Author: privacy
- # @Date: 2024-07-03 11:14:27
- # @Last Modified by: privacy
- # @Last Modified time: 2024-07-03 15:11:07
- import logging
- from textrank4zh import TextRank4Keyword, TextRank4Sentence
- from paddlenlp import Taskflow
- class LMU(object):
- def __init__(self):
- self.tr4w = TextRank4Keyword()
- self.tr4s = TextRank4Sentence()
- self.summarizer = Taskflow("text_summarization", model="unimo-text-1.0-summary")
- def run(self, text: str, topK: int = 5) -> None:
- # self.keywords = jiabe.analyse.textrank(text, topK=20)
- self.summary = self.summarizer(text)[0]
- self.tr4w.analyze(text, lower=True)
- self.key_words = self.tr4w.get_keywords(topK)
- # self.tr4s.analyze(text=text, lower=True, source='all_filters')
- # self.summary = [item.sentence for item in self.tr4s.get_key_sentences(topK)]
- def get_summary(self) -> str:
- """提取摘要"""
- return self.summary
- def get_key_words(self) -> list:
- """提取关键词"""
- return [item.word for item in self.key_words]
- if __name__ == '__main__':
- lmu = LMU()
- lmu.run('PaddleNLP是一个基于PaddlePaddle深度学习框架的自然语言处理工具包,提供了丰富的文本处理功能。关键词提取是其中一个重要的功能。TextRank算法是PaddleNLP中常用的关键词提取算法,它通过计算词语之间的权重得到关键词。')
- print(lmu.get_summary())
- print(lmu.get_key_words())
|