-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhtml_mapper.py
More file actions
106 lines (83 loc) · 3.21 KB
/
html_mapper.py
File metadata and controls
106 lines (83 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""
html_mapper.py
──────────────────────────────────────────────
Maps HTML DOM to existing CSS selectors — *without*
adding HTML nodes or links.
This module:
- Fetches HTML (remote or local)
- Checks which CSS selectors match elements in the DOM
- Marks nodes: unused = True / False
- Writes a clean, enriched CSS graph
No HTML nodes, no match-links are created.
"""
import json
import requests
from bs4 import BeautifulSoup
from pathlib import Path
def map_html_to_css(url: str, css_graph_path: str, output_path: str):
"""
Mark which selectors are used in the HTML document.
NO nodes or links are added.
Args:
url (str): URL or path to HTML file
css_graph_path (str): Input CSS graph JSON
output_path (str): Output CSS graph JSON
"""
print(f"🌐 Fetching HTML from: {url}")
# -------------------------------------------------
# 1) Load HTML content
# -------------------------------------------------
try:
if url.startswith(("http://", "https://")):
resp = requests.get(url, timeout=15)
resp.raise_for_status()
html = resp.text
else:
html = Path(url).read_text(encoding="utf-8")
except Exception as e:
print(f"❌ Failed to load HTML: {e}")
return {"error": str(e)}
soup = BeautifulSoup(html, "html.parser")
# -------------------------------------------------
# 2) Load CSS graph
# -------------------------------------------------
try:
graph = json.loads(Path(css_graph_path).read_text())
except Exception as e:
print(f"❌ Failed to load CSS graph: {e}")
return {"error": str(e)}
selector_nodes = [n for n in graph["nodes"] if n["type"] == "selector"]
print(f"📊 Checking {len(selector_nodes)} selectors in HTML...")
used = set()
# -------------------------------------------------
# 3) Test all selectors via soup.select()
# -------------------------------------------------
for node in selector_nodes:
selector = node["label"]
try:
matches = soup.select(selector)
if matches:
used.add(node["id"])
except Exception:
# Invalid selector (CSS4 pseudo etc.)
pass
# -------------------------------------------------
# 4) Assign unused flags
# -------------------------------------------------
for node in selector_nodes:
node["unused"] = node["id"] not in used
unused_count = sum(1 for n in selector_nodes if n["unused"])
print(f"🔍 Found {unused_count} unused selectors.")
# -------------------------------------------------
# 5) Write output graph
# -------------------------------------------------
graph.setdefault("meta", {})
graph["meta"]["unused_selectors"] = unused_count
graph["meta"]["used_selectors"] = len(used)
Path(output_path).write_text(json.dumps(graph, indent=2), encoding="utf-8")
print(f"✅ Updated graph written → {output_path}")
return {
"unused_selectors": unused_count,
"used_selectors": len(used),
"output": str(output_path),
}