diff --git a/dapytains/tei/citeStructure.py b/dapytains/tei/citeStructure.py index 1c31f1f..f45751c 100644 --- a/dapytains/tei/citeStructure.py +++ b/dapytains/tei/citeStructure.py @@ -284,12 +284,14 @@ def find_refs_from_branches( unsorted = [] for s in structure: - unsorted.extend( - [ - (f"{prefix}{s.delim}{value}", s) - for value in xpath_proc.evaluate(f"{xpath_prefix}{s.xpath}") - ] - ) + results = xpath_proc.evaluate(f"{xpath_prefix}{s.xpath}") + if results is not None: + unsorted.extend( + [ + (f"{prefix}{s.delim}{value}", s) + for value in results + ] + ) unsorted = [ _simple_node(ref, self.generate_xpath(ref), struct) diff --git a/tests/tei/uneven_parent_level.xml b/tests/tei/uneven_parent_level.xml new file mode 100644 index 0000000..7412e2d --- /dev/null +++ b/tests/tei/uneven_parent_level.xml @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + +
+
+ Text 1 +
+ + Text 2 + Text 3 + +
+
+ +
Text A
+
Text B
+ Text C +
Text D
+
+ +
Text A
+
Text B
+ Text C +
Text D
+
+
+ +
+
diff --git a/tests/test_tei.py b/tests/test_tei.py index dd5fe6a..763e7f7 100644 --- a/tests/test_tei.py +++ b/tests/test_tei.py @@ -1,6 +1,8 @@ import os.path import pytest + +from dapytains.tei.citeStructure import CitableUnit from dapytains.tei.document import Document from lxml.etree import tostring @@ -252,3 +254,16 @@ def test_xml_entity(): ' \n' '') + +def _flat_refs(refs: list[CitableUnit]) -> list[str]: + data = [] + for ref in refs: + data.append(ref.ref) + data.extend(_flat_refs(ref.children)) + return data + + +def test_ref_parsing_uneven_tree(): + """Test that a level that can contain data is not missed""" + doc = Document(f"{local_dir}/uneven_parent_level.xml") + assert _flat_refs(doc.get_reffs()) == ['Luke', 'Luke 1', 'Luke 1#1', 'Luke:1', 'Mark', 'Mark:1', 'Mark:2']