diff --git a/pyseoanalyzer/page.py b/pyseoanalyzer/page.py index 7ee31f4..bed82de 100644 --- a/pyseoanalyzer/page.py +++ b/pyseoanalyzer/page.py @@ -273,6 +273,7 @@ def get_meta_value(key): self.analyze_a_tags(soup_unmodified) self.analyze_img_tags(soup_lower) self.analyze_h1_tags(soup_lower) + self.analyze_html_lang(soup_lower) if self.analyze_headings: self.analyze_heading_tags(soup_unmodified) @@ -449,6 +450,14 @@ def analyze_h1_tags(self, bs): if len(htags) == 0: self.warn("Each page should have at least one h1 tag") + def analyze_html_lang(self, bs): + """ + Make sure the HTML tag has a lang attribute + """ + html_tag = bs.find("html") + if html_tag is not None and not html_tag.get("lang"): + self.warn("Missing lang attribute on tag") + def analyze_a_tags(self, bs): """ Add any new links (that we didn't find in the sitemap) diff --git a/tests/test_page.py b/tests/test_page.py index 70f4053..c1de76c 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -30,6 +30,31 @@ def test_analyze(): assert "seth" in p.title.lower() +def test_analyze_html_lang_missing(): + from bs4 import BeautifulSoup + + p = page.Page(url="https://example.com/", base_domain="https://example.com/") + soup = BeautifulSoup( + "Test

Hello

", + "html.parser", + ) + p.analyze_html_lang(soup) + assert len(p.warnings) == 1 + assert "lang" in p.warnings[0].lower() + + +def test_analyze_html_lang_present(): + from bs4 import BeautifulSoup + + p = page.Page(url="https://example.com/", base_domain="https://example.com/") + soup = BeautifulSoup( + 'Test

Hello

', + "html.parser", + ) + p.analyze_html_lang(soup) + assert len(p.warnings) == 0 + + def test_analyze_with_llm(): p = page.Page( url="https://www.sethserver.com/",