-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcontent-script.js
More file actions
402 lines (335 loc) · 14.1 KB
/
content-script.js
File metadata and controls
402 lines (335 loc) · 14.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
// SideLlama Content Script
// Enhanced script for better page content extraction and analysis
(function() {
'use strict';
// Listen for messages from the extension
chrome.runtime.onMessage.addListener(async (request, sender, sendResponse) => {
try {
switch (request.type) {
case 'EXTRACT_PAGE_CONTENT':
const pageData = await extractPageContent();
sendResponse({ success: true, data: pageData });
break;
case 'SUMMARIZE_PAGE':
await handlePageSummary();
sendResponse({ success: true });
break;
case 'EXPLAIN_SELECTION':
await handleExplainSelection(request.selection);
sendResponse({ success: true });
break;
case 'DISPLAY_SCREENSHOT':
displayScreenshot(request.dataUrl);
sendResponse({ success: true });
break;
default:
sendResponse({ success: false, error: 'Unknown request type' });
}
} catch (error) {
sendResponse({ success: false, error: error.message });
}
return true; // Keep the message channel open for async response
});
async function extractPageContent() {
// Get page title
const title = document.title || 'Untitled Page';
// Get current URL
const url = window.location.href;
// Get meta description
const metaDesc = document.querySelector('meta[name="description"]');
const description = metaDesc ? metaDesc.getAttribute('content') : '';
// Extract main content using various strategies (now async)
let content = await extractMainContent();
// Get page language
const lang = document.documentElement.lang || 'en';
// Extract headings for structure
const headings = extractHeadings();
// Extract links (top 10 most relevant)
const links = extractRelevantLinks();
// Get page type/category hints
const pageType = detectPageType();
return {
title: title.trim(),
url,
description: description.trim(),
content: content.substring(0, 8000), // Limit to 8KB
language: lang,
headings: headings.slice(0, 10), // Top 10 headings
links: links.slice(0, 10), // Top 10 links
pageType,
wordCount: content.split(/\s+/).length,
extractedAt: new Date().toISOString()
};
}
async function extractMainContent() {
let content = '';
// Strategy 1: Try semantic HTML5 elements first
const semanticSelectors = [
'main',
'article',
'[role="main"]',
'.main-content',
'.content',
'.post-content',
'.entry-content',
'.article-content',
'.page-content',
'#main',
'#content'
];
for (const selector of semanticSelectors) {
const element = document.querySelector(selector);
if (element) {
content = extractTextFromElement(element);
if (content.length > 200) { // Only use if substantial content
break;
}
}
}
// Strategy 2: If no main content found, try body but filter out navigation/ads
if (!content || content.length < 200) {
const body = document.body;
if (body) {
content = extractTextFromElement(body, true);
}
}
// Strategy 3: Fallback to all text with better filtering (now async)
if (!content || content.length < 100) {
content = await extractAllText();
}
return cleanContent(content);
}
function extractTextFromElement(element, filterOut = false) {
if (!element) return '';
// Clone the element to avoid modifying the original
const clone = element.cloneNode(true);
if (filterOut) {
// Remove common non-content elements
const selectorsToRemove = [
'nav', 'header', 'footer', 'aside',
'.nav', '.navigation', '.header', '.footer', '.sidebar',
'.menu', '.ads', '.advertisement', '.social', '.share',
'.comments', '.comment', '.related', '.recommended',
'.popup', '.modal', '.overlay', '.banner',
'script', 'style', 'noscript', 'iframe',
'[role="navigation"]', '[role="banner"]', '[role="contentinfo"]',
'.cookie-notice', '.gdpr-notice'
];
selectorsToRemove.forEach(selector => {
const elements = clone.querySelectorAll(selector);
elements.forEach(el => el.remove());
});
}
return clone.innerText || clone.textContent || '';
}
function extractAllText() {
// Get all text nodes, but prioritize paragraphs and content areas
const contentElements = document.querySelectorAll('p, div, span, li, td, th, h1, h2, h3, h4, h5, h6');
// Process elements in batches to avoid blocking
return new Promise(resolve => {
let allText = '';
let index = 0;
const batchSize = 50; // Process 50 elements at a time
function processBatch() {
const endIndex = Math.min(index + batchSize, contentElements.length);
for (let i = index; i < endIndex; i++) {
const el = contentElements[i];
// Skip if element is hidden or likely non-content
if (isHiddenOrNonContent(el)) continue;
const text = el.innerText || el.textContent || '';
if (text.trim().length > 10) { // Only include substantial text
allText += text + '\n';
}
}
index = endIndex;
if (index < contentElements.length) {
// Schedule next batch to avoid blocking
setTimeout(processBatch, 0);
} else {
resolve(allText);
}
}
processBatch();
});
}
function isHiddenOrNonContent(element) {
// Check if element is hidden
const style = window.getComputedStyle(element);
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') {
return true;
}
// Check if element is likely non-content based on class/id
const className = element.className.toLowerCase();
const id = element.id.toLowerCase();
const nonContentPatterns = [
'nav', 'menu', 'sidebar', 'footer', 'header', 'ad', 'banner',
'popup', 'modal', 'overlay', 'cookie', 'gdpr', 'social', 'share'
];
return nonContentPatterns.some(pattern =>
className.includes(pattern) || id.includes(pattern)
);
}
function extractHeadings() {
const headings = [];
const headingElements = document.querySelectorAll('h1, h2, h3, h4, h5, h6');
headingElements.forEach(heading => {
const text = heading.innerText || heading.textContent || '';
if (text.trim() && text.length < 200) { // Reasonable heading length
headings.push({
level: parseInt(heading.tagName.charAt(1)),
text: text.trim()
});
}
});
return headings;
}
function extractRelevantLinks() {
const links = [];
const linkElements = document.querySelectorAll('a[href]');
linkElements.forEach(link => {
const href = link.getAttribute('href');
const text = (link.innerText || link.textContent || '').trim();
// Skip if no text, too long, or likely not content-related
if (!text || text.length > 100 || isNavigationLink(text, href)) {
return;
}
// Convert relative URLs to absolute
let absoluteUrl = href;
try {
absoluteUrl = new URL(href, window.location.origin).href;
} catch (e) {
// Skip invalid URLs
return;
}
links.push({ text, url: absoluteUrl });
});
// Sort by text length (prefer descriptive links) and deduplicate
return links
.filter((link, index, self) =>
self.findIndex(l => l.url === link.url) === index
)
.sort((a, b) => b.text.length - a.text.length);
}
function isNavigationLink(text, href) {
const navPatterns = [
'home', 'menu', 'login', 'register', 'search', 'contact',
'about', 'privacy', 'terms', 'cookie', 'next', 'previous',
'back', 'top', 'up', 'close', 'toggle'
];
const lowerText = text.toLowerCase();
const lowerHref = href.toLowerCase();
return navPatterns.some(pattern =>
lowerText.includes(pattern) || lowerHref.includes(pattern)
) || text.length < 3;
}
function detectPageType() {
// Analyze URL, title, and content to determine page type
const url = window.location.href.toLowerCase();
const title = document.title.toLowerCase();
const bodyClass = document.body.className.toLowerCase();
// Common page type patterns
const patterns = {
article: ['article', 'post', 'blog', 'news', 'story'],
documentation: ['docs', 'documentation', 'guide', 'manual', 'api', 'reference'],
ecommerce: ['shop', 'store', 'product', 'cart', 'buy', 'price'],
social: ['profile', 'timeline', 'feed', 'social', 'community'],
search: ['search', 'results', 'query'],
homepage: ['home', 'index', 'welcome'],
form: ['form', 'contact', 'signup', 'login', 'register']
};
for (const [type, keywords] of Object.entries(patterns)) {
if (keywords.some(keyword =>
url.includes(keyword) || title.includes(keyword) || bodyClass.includes(keyword)
)) {
return type;
}
}
// Default fallback
return 'webpage';
}
function cleanContent(content) {
if (!content) return '';
return content
// Remove excessive whitespace
.replace(/\n\s*\n\s*\n/g, '\n\n')
// Remove excessive spaces
.replace(/[ \t]+/g, ' ')
// Remove empty lines and trim
.split('\n')
.map(line => line.trim())
.filter(line => line.length > 0)
.join('\n')
.trim();
}
// Keyboard shortcut handling removed - now handled centrally by service worker via Chrome Commands API
// Context menu support removed - handled by service worker's onClicked listener
// Auto-extract page context when page loads (for caching)
function cachePageContext() {
// Wait for page to be fully loaded
if (document.readyState === 'complete') {
setTimeout(async () => {
try {
const context = await extractPageContent();
// Store in session storage for quick access
sessionStorage.setItem('sideLlamaPageContext', JSON.stringify(context));
} catch (error) {
console.warn('Failed to cache page context:', error);
}
}, 1000);
} else {
window.addEventListener('load', () => cachePageContext());
}
}
// Duplicate handler functions removed - functionality now centralized in service worker
function displayScreenshot(dataUrl) {
// Create a floating screenshot preview
const overlay = document.createElement('div');
overlay.style.cssText = `
position: fixed;
top: 20px;
right: 20px;
z-index: 10000;
background: rgba(0, 0, 0, 0.9);
border-radius: 8px;
padding: 10px;
max-width: 300px;
box-shadow: 0 4px 20px rgba(0, 0, 0, 0.5);
`;
const img = document.createElement('img');
img.src = dataUrl;
img.style.cssText = `
width: 100%;
height: auto;
border-radius: 4px;
cursor: pointer;
`;
const closeBtn = document.createElement('div');
closeBtn.textContent = '×';
closeBtn.style.cssText = `
position: absolute;
top: 5px;
right: 10px;
color: white;
cursor: pointer;
font-size: 20px;
font-weight: bold;
`;
closeBtn.addEventListener('click', () => overlay.remove());
img.addEventListener('click', () => {
// Open in new tab for full view
const newTab = window.open();
newTab.document.write(`<img src="${dataUrl}" style="max-width: 100%; height: auto;">`);
});
// Auto-remove after 10 seconds
setTimeout(() => {
if (overlay.parentNode) overlay.remove();
}, 10000);
overlay.appendChild(img);
overlay.appendChild(closeBtn);
document.body.appendChild(overlay);
}
// Initialize remaining features
cachePageContext();
// Signal that the content script is ready
console.log('🦙 SideLlama enhanced content script loaded');
})();