From c9e721f7f897dca6de5fedf8fb33410d3d9d6f74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thibault=20Cl=C3=A9rice?= Date: Wed, 22 Apr 2026 15:40:54 +0200 Subject: [PATCH] (bugfix) Fix a bug where a tree where a level split in two and expect descendants would lead to None != iterable --- dapytains/tei/citeStructure.py | 14 ++++++----- tests/tei/uneven_parent_level.xml | 41 +++++++++++++++++++++++++++++++ tests/test_tei.py | 15 +++++++++++ 3 files changed, 64 insertions(+), 6 deletions(-) create mode 100644 tests/tei/uneven_parent_level.xml diff --git a/dapytains/tei/citeStructure.py b/dapytains/tei/citeStructure.py index 1c31f1f..f45751c 100644 --- a/dapytains/tei/citeStructure.py +++ b/dapytains/tei/citeStructure.py @@ -284,12 +284,14 @@ def find_refs_from_branches( unsorted = [] for s in structure: - unsorted.extend( - [ - (f"{prefix}{s.delim}{value}", s) - for value in xpath_proc.evaluate(f"{xpath_prefix}{s.xpath}") - ] - ) + results = xpath_proc.evaluate(f"{xpath_prefix}{s.xpath}") + if results is not None: + unsorted.extend( + [ + (f"{prefix}{s.delim}{value}", s) + for value in results + ] + ) unsorted = [ _simple_node(ref, self.generate_xpath(ref), struct) diff --git a/tests/tei/uneven_parent_level.xml b/tests/tei/uneven_parent_level.xml new file mode 100644 index 0000000..7412e2d --- /dev/null +++ b/tests/tei/uneven_parent_level.xml @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + +
+
+ Text 1 +
+ + Text 2 + Text 3 + +
+
+ +
Text A
+
Text B
+ Text C +
Text D
+
+ +
Text A
+
Text B
+ Text C +
Text D
+
+
+ +
+
diff --git a/tests/test_tei.py b/tests/test_tei.py index dd5fe6a..763e7f7 100644 --- a/tests/test_tei.py +++ b/tests/test_tei.py @@ -1,6 +1,8 @@ import os.path import pytest + +from dapytains.tei.citeStructure import CitableUnit from dapytains.tei.document import Document from lxml.etree import tostring @@ -252,3 +254,16 @@ def test_xml_entity(): ' \n' '') + +def _flat_refs(refs: list[CitableUnit]) -> list[str]: + data = [] + for ref in refs: + data.append(ref.ref) + data.extend(_flat_refs(ref.children)) + return data + + +def test_ref_parsing_uneven_tree(): + """Test that a level that can contain data is not missed""" + doc = Document(f"{local_dir}/uneven_parent_level.xml") + assert _flat_refs(doc.get_reffs()) == ['Luke', 'Luke 1', 'Luke 1#1', 'Luke:1', 'Mark', 'Mark:1', 'Mark:2']