page-analyzer.js 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. // 确保类定义在全局作用域
  2. window.PageAnalyzer = class PageAnalyzer {
  3. constructor() {
  4. this.readability = null;
  5. }
  6. /**
  7. * 分析页面内容
  8. * @returns {Object} 页面分析结果
  9. */
  10. analyzePage() {
  11. try {
  12. // 检查Readability是否可用
  13. if (typeof Readability === "undefined") {
  14. console.warn(
  15. "Readability not loaded, falling back to basic extraction"
  16. );
  17. return this.fallbackAnalysis();
  18. }
  19. // 创建文档副本以避免修改原始DOM
  20. const documentClone = document.cloneNode(true);
  21. // 初始化 Readability
  22. this.readability = new Readability(documentClone, {
  23. debug: false,
  24. charThreshold: 20,
  25. });
  26. // 解析页面
  27. const article = this.readability.parse();
  28. return {
  29. title: article.title || document.title,
  30. url: window.location.href,
  31. mainContent: article.textContent || article.excerpt || "",
  32. excerpt: article.excerpt || "",
  33. siteName: article.siteName || new URL(window.location.href).hostname,
  34. wordCount: article.length || 0,
  35. };
  36. } catch (error) {
  37. console.warn("Readability failed, using fallback:", error);
  38. return this.fallbackAnalysis();
  39. }
  40. }
  41. // 基础提取方法作为后备
  42. fallbackAnalysis() {
  43. return {
  44. title: document.title,
  45. url: window.location.href,
  46. mainContent: this.extractMainContent(),
  47. excerpt: "",
  48. siteName: new URL(window.location.href).hostname,
  49. wordCount: 0,
  50. };
  51. }
  52. // 基础的内容提取方法
  53. extractMainContent() {
  54. // 移除脚本、样式等
  55. const content = document.body.cloneNode(true);
  56. content
  57. .querySelectorAll("script, style, iframe, nav, header, footer")
  58. .forEach((el) => el.remove());
  59. return content.textContent.trim().replace(/\s+/g, " ");
  60. }
  61. };
  62. // 创建全局实例
  63. window.pageAnalyzer = new window.PageAnalyzer();