# -*- coding: utf-8 -*- # @Author: privacy # @Date: 2024-07-03 11:14:27 # @Last Modified by: privacy # @Last Modified time: 2024-07-03 15:11:07 import logging from textrank4zh import TextRank4Keyword, TextRank4Sentence from paddlenlp import Taskflow class LMU(object): def __init__(self): self.tr4w = TextRank4Keyword() self.tr4s = TextRank4Sentence() self.summarizer = Taskflow("text_summarization", model="unimo-text-1.0-summary") def run(self, text: str, topK: int = 5) -> None: # self.keywords = jiabe.analyse.textrank(text, topK=20) self.summary = self.summarizer(text)[0] self.tr4w.analyze(text, lower=True) self.key_words = self.tr4w.get_keywords(topK) # self.tr4s.analyze(text=text, lower=True, source='all_filters') # self.summary = [item.sentence for item in self.tr4s.get_key_sentences(topK)] def get_summary(self) -> str: """提取摘要""" return self.summary def get_key_words(self) -> list: """提取关键词""" return [item.word for item in self.key_words] if __name__ == '__main__': lmu = LMU() lmu.run('PaddleNLP是一个基于PaddlePaddle深度学习框架的自然语言处理工具包,提供了丰富的文本处理功能。关键词提取是其中一个重要的功能。TextRank算法是PaddleNLP中常用的关键词提取算法,它通过计算词语之间的权重得到关键词。') print(lmu.get_summary()) print(lmu.get_key_words())