lmu.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. # -*- coding: utf-8 -*-
  2. # @Author: privacy
  3. # @Date: 2024-07-03 11:14:27
  4. # @Last Modified by: privacy
  5. # @Last Modified time: 2024-07-03 15:11:07
  6. import logging
  7. from textrank4zh import TextRank4Keyword, TextRank4Sentence
  8. from paddlenlp import Taskflow
  9. class LMU(object):
  10. def __init__(self):
  11. self.tr4w = TextRank4Keyword()
  12. self.tr4s = TextRank4Sentence()
  13. self.summarizer = Taskflow("text_summarization", model="unimo-text-1.0-summary")
  14. def run(self, text: str, topK: int = 5) -> None:
  15. # self.keywords = jiabe.analyse.textrank(text, topK=20)
  16. self.summary = self.summarizer(text)[0]
  17. self.tr4w.analyze(text, lower=True)
  18. self.key_words = self.tr4w.get_keywords(topK)
  19. # self.tr4s.analyze(text=text, lower=True, source='all_filters')
  20. # self.summary = [item.sentence for item in self.tr4s.get_key_sentences(topK)]
  21. def get_summary(self) -> str:
  22. """提取摘要"""
  23. return self.summary
  24. def get_key_words(self) -> list:
  25. """提取关键词"""
  26. return [item.word for item in self.key_words]
  27. if __name__ == '__main__':
  28. lmu = LMU()
  29. lmu.run('PaddleNLP是一个基于PaddlePaddle深度学习框架的自然语言处理工具包,提供了丰富的文本处理功能。关键词提取是其中一个重要的功能。TextRank算法是PaddleNLP中常用的关键词提取算法,它通过计算词语之间的权重得到关键词。')
  30. print(lmu.get_summary())
  31. print(lmu.get_key_words())