123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- // 确保类定义在全局作用域
- window.PageAnalyzer = class PageAnalyzer {
- constructor() {
- this.readability = null;
- }
- /**
- * 分析页面内容
- * @returns {Object} 页面分析结果
- */
- analyzePage() {
- try {
- // 检查Readability是否可用
- if (typeof Readability === "undefined") {
- console.warn(
- "Readability not loaded, falling back to basic extraction"
- );
- return this.fallbackAnalysis();
- }
- // 创建文档副本以避免修改原始DOM
- const documentClone = document.cloneNode(true);
- // 初始化 Readability
- this.readability = new Readability(documentClone, {
- debug: false,
- charThreshold: 20,
- });
- // 解析页面
- const article = this.readability.parse();
- return {
- title: article.title || document.title,
- url: window.location.href,
- mainContent: article.textContent || article.excerpt || "",
- excerpt: article.excerpt || "",
- siteName: article.siteName || new URL(window.location.href).hostname,
- wordCount: article.length || 0,
- };
- } catch (error) {
- console.warn("Readability failed, using fallback:", error);
- return this.fallbackAnalysis();
- }
- }
- // 基础提取方法作为后备
- fallbackAnalysis() {
- return {
- title: document.title,
- url: window.location.href,
- mainContent: this.extractMainContent(),
- excerpt: "",
- siteName: new URL(window.location.href).hostname,
- wordCount: 0,
- };
- }
- // 基础的内容提取方法
- extractMainContent() {
- // 移除脚本、样式等
- const content = document.body.cloneNode(true);
- content
- .querySelectorAll("script, style, iframe, nav, header, footer")
- .forEach((el) => el.remove());
- return content.textContent.trim().replace(/\s+/g, " ");
- }
- };
- // 创建全局实例
- window.pageAnalyzer = new window.PageAnalyzer();
|