-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathloader.py
More file actions
67 lines (54 loc) · 2.21 KB
/
loader.py
File metadata and controls
67 lines (54 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# loader.py — remote CSS loader
import re
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from typing import Dict
from pathlib import Path
from .config import settings
def load_css_remote() -> Dict[str, str]:
"""Load all CSS files referenced in the target webpage HTML."""
if not settings.TARGET_URL:
raise ValueError("TARGET_URL must be set in .env for remote CSS mode.")
print(f"🌍 Loading webpage HTML: {settings.TARGET_URL}")
resp = requests.get(settings.TARGET_URL, timeout=10)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
# -------------------------------------------------
# 1) Extract <link rel="stylesheet" href="...">
# -------------------------------------------------
css_urls = []
for link in soup.find_all("link"):
rel = link.get("rel", [])
href = link.get("href")
if not href:
continue
# Accept any stylesheet-ish link
if "stylesheet" in rel or href.lower().endswith(".css"):
full_url = urljoin(settings.TARGET_URL, href)
css_urls.append(full_url)
css_urls = list(dict.fromkeys(css_urls))
print(f"🔗 Found {len(css_urls)} CSS files in HTML.")
# -------------------------------------------------
# 2) Fetch each CSS file
# -------------------------------------------------
css_map: Dict[str, str] = {}
for url in css_urls:
try:
print(f"📥 Fetching: {url}")
text = requests.get(url, timeout=10).text
fname = url.split("/")[-1]
css_map[fname] = text
except Exception as e:
print(f"❌ Failed to fetch {url}: {e}")
# -------------------------------------------------
# 3) Write combined.css to output
# -------------------------------------------------
combined_path = Path(settings.OUTPUT_DIR) / "combined.css"
combined_path.parent.mkdir(parents=True, exist_ok=True)
combined_text = "\n".join(
f"/* ===== {name} ===== */\n{text}" for name, text in css_map.items()
)
combined_path.write_text(combined_text, encoding="utf-8")
print(f"🧵 combined.css written → {combined_path}")
return css_map