1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- // 确保类定义在全局作用域
- window.PageAnalyzer = class PageAnalyzer {
- constructor() {
- this.readability = null;
- }
- /**
- * 分析页面内容
- * @returns {Object} 页面分析结果
- */
- analyzePage() {
- try {
- // 检查Readability是否可用
- if (typeof Readability === "undefined") {
- console.warn(
- "Readability not loaded, falling back to basic extraction"
- );
- return this.fallbackAnalysis();
- }
- // 创建文档副本以避免修改原始DOM
- const documentClone = document.cloneNode(true);
- // 初始化 Readability
- this.readability = new Readability(documentClone, {
- debug: false,
- charThreshold: 20,
- });
- // 解析页面
- const article = this.readability.parse();
- return {
- title: article.title || document.title,
- url: window.location.href,
- mainContent: article.textContent || article.excerpt || "",
- excerpt: article.excerpt || "",
- siteName: article.siteName || new URL(window.location.href).hostname,
- wordCount: article.length || 0,
- };
- } catch (error) {
- console.warn("Readability failed, using fallback:", error);
- return this.fallbackAnalysis();
- }
- }
- // 基础提取方法作为后备
- fallbackAnalysis() {
- return {
- title: document.title,
- url: window.location.href,
- mainContent: this.extractMainContent(),
- excerpt: "",
- siteName: new URL(window.location.href).hostname,
- wordCount: 0,
- };
- }
- // 基础的内容提取方法
- extractMainContent() {
- // 移除脚本、样式等
- const content = document.body.cloneNode(true);
- // content
- // .querySelectorAll("script, style, iframe, nav, header, footer")
- // .forEach((el) => el.remove());
- //提取页面的表单
- const form = document.querySelector("form");
- if (form) {
- content.textContent = form.outerHTML;
- }
- return content.textContent.trim().replace(/\s+/g, " ");
- }
- };
- // 创建全局实例
- window.pageAnalyzer = new window.PageAnalyzer();
|