From b2448685e67117111adca62776d278fb91632fcb Mon Sep 17 00:00:00 2001 From: Abdul Aouwal <58025118+aouwalitshikkha@users.noreply.github.com> Date: Thu, 18 Jun 2026 13:26:27 +0600 Subject: [PATCH] Add HTML lang attribute SEO check (#84) Detects missing or empty lang attribute on tag and reports it as a warning. Follows the same pattern as existing checks (h1, img alt, og tags). Closes #84 --- pyseoanalyzer/page.py | 9 +++++++++ tests/test_page.py | 25 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/pyseoanalyzer/page.py b/pyseoanalyzer/page.py index 7ee31f4..bed82de 100644 --- a/pyseoanalyzer/page.py +++ b/pyseoanalyzer/page.py @@ -273,6 +273,7 @@ def get_meta_value(key): self.analyze_a_tags(soup_unmodified) self.analyze_img_tags(soup_lower) self.analyze_h1_tags(soup_lower) + self.analyze_html_lang(soup_lower) if self.analyze_headings: self.analyze_heading_tags(soup_unmodified) @@ -449,6 +450,14 @@ def analyze_h1_tags(self, bs): if len(htags) == 0: self.warn("Each page should have at least one h1 tag") + def analyze_html_lang(self, bs): + """ + Make sure the HTML tag has a lang attribute + """ + html_tag = bs.find("html") + if html_tag is not None and not html_tag.get("lang"): + self.warn("Missing lang attribute on tag") + def analyze_a_tags(self, bs): """ Add any new links (that we didn't find in the sitemap) diff --git a/tests/test_page.py b/tests/test_page.py index 70f4053..c1de76c 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -30,6 +30,31 @@ def test_analyze(): assert "seth" in p.title.lower() +def test_analyze_html_lang_missing(): + from bs4 import BeautifulSoup + + p = page.Page(url="https://example.com/", base_domain="https://example.com/") + soup = BeautifulSoup( + "Test

Hello

", + "html.parser", + ) + p.analyze_html_lang(soup) + assert len(p.warnings) == 1 + assert "lang" in p.warnings[0].lower() + + +def test_analyze_html_lang_present(): + from bs4 import BeautifulSoup + + p = page.Page(url="https://example.com/", base_domain="https://example.com/") + soup = BeautifulSoup( + 'Test

Hello

', + "html.parser", + ) + p.analyze_html_lang(soup) + assert len(p.warnings) == 0 + + def test_analyze_with_llm(): p = page.Page( url="https://www.sethserver.com/",