diff --git a/.github/workflows/dts-validator.yml b/.github/workflows/dts-validator.yml
index dfde4a1..b41d42c 100644
--- a/.github/workflows/dts-validator.yml
+++ b/.github/workflows/dts-validator.yml
@@ -2,8 +2,6 @@ name: DTS-Validator
on:
push:
- branches:
- - main
pull_request:
jobs:
@@ -39,10 +37,10 @@ jobs:
run: sleep 5 # Adjust as needed
- name: Install and run tests
- run: |
+ run: |
cd dts-validator
python -m venv env
source env/bin/activate
pip install poetry
poetry install
- pytest --entry-endpoint=http://localhost:5000
\ No newline at end of file
+ pytest --entry-endpoint=http://localhost:5000
diff --git a/dapytains/processor.py b/dapytains/processor.py
index c6ee05d..0eaf509 100644
--- a/dapytains/processor.py
+++ b/dapytains/processor.py
@@ -7,7 +7,7 @@
try:
if saxon_version == "PE":
import saxoncpe as saxonlib
- elif saxon_version == "PE":
+ elif saxon_version == "EE":
import saxoncee as saxonlib
else:
import saxonche as saxonlib
diff --git a/dapytains/tei/citeStructure.py b/dapytains/tei/citeStructure.py
index 7e87756..1c31f1f 100644
--- a/dapytains/tei/citeStructure.py
+++ b/dapytains/tei/citeStructure.py
@@ -2,9 +2,10 @@
from typing import Dict, List, Optional
from dataclasses import dataclass, field
from collections import namedtuple, defaultdict
-from functools import cmp_to_key
from dapytains.processor import get_xpath_proc, saxonlib
+_pos_re = re.compile(r'\[(\d+)\]')
+
@dataclass
class CiteData:
@@ -281,15 +282,6 @@ def find_refs_from_branches(
units = []
xpath_prefix = "./" if unit else ""
- # Custom comparison function to compare nodes by document order
- def compare_nodes_by_doc_order(node1, node2):
- # Check if node1 precedes node2 in document order
- precedes = xpath_proc.evaluate_single(f'{node1.xpath} << {node2.xpath}').string_value
- if precedes == "true":
- return -1 # node1 comes before node2
-
- return 1
-
unsorted = []
for s in structure:
unsorted.extend(
@@ -303,7 +295,19 @@ def compare_nodes_by_doc_order(node1, node2):
_simple_node(ref, self.generate_xpath(ref), struct)
for ref, struct in unsorted
]
- unsorted = sorted(unsorted, key=cmp_to_key(compare_nodes_by_doc_order))
+ # Generate a positional path key for each node once (O(n) JVM calls) and sort
+ # natively, rather than calling back into Saxon for every pairwise comparison
+ # (which would cost O(n log n) JVM round-trips).
+ def _doc_order_key(node):
+ # Count ALL preceding element siblings (not just same-name) so that
+ # mixed-name siblings at the same level sort in document order.
+ path_str = str(xpath_proc.evaluate_single(
+ f"string-join(for $n in ({node.xpath}/ancestor-or-self::*) "
+ f"return concat('/', name($n), '[', 1 + count($n/preceding-sibling::*), ']'), '')"
+ ))
+ return tuple(int(x) for x in _pos_re.findall(path_str))
+
+ unsorted = sorted(unsorted, key=_doc_order_key)
units = []
for elem in unsorted:
diff --git a/dapytains/tei/document.py b/dapytains/tei/document.py
index 68934ec..2d24e74 100644
--- a/dapytains/tei/document.py
+++ b/dapytains/tei/document.py
@@ -37,21 +37,19 @@ def xpath_walk(xpath: List[str]) -> Tuple[str, List[str], List[str]]:
return current_filled, queue, [xpath[0]] if len(xpath) > 1 else []
-def is_traversing_xpath(parent: saxonlib.PyXdmNode, xpath: str, processor: saxonlib.PySaxonProcessor) -> bool:
+def is_traversing_xpath(xpath_proc: saxonlib.PyXPathProcessor, xpath: str) -> bool:
""" Check if an XPath is traversing more than one level
- :param parent:
+ :param xpath_proc: XPath processor with context already set to the parent node
:param xpath:
:return:
"""
- xpath_proc = get_xpath_proc(parent, processor=processor)
if xpath.startswith(".//"):
# If the XPath starts with .//, we try to see if we have a direct child that matches
drct_xpath = xpath.replace(".//", "./", 1)
if xpath_proc.effective_boolean_value(f"head({xpath}) is head({drct_xpath})"):
return False
- else:
- return True
+ return True
return False
@@ -70,7 +68,7 @@ def xpath_walk_step(parent: saxonlib.PyXdmNode, xpath: str, processor: saxonlib.
xpath_proc = get_xpath_proc(parent, processor=processor)
# We check first for loops, because that changes the xpath
if xpath.startswith(".//"):
- if is_traversing_xpath(parent, xpath, processor=processor):
+ if is_traversing_xpath(xpath_proc, xpath):
return xpath_proc.evaluate_single(f"./*[{xpath}]"), True
else:
return xpath_proc.evaluate_single(xpath), False
@@ -174,11 +172,12 @@ def copy_node(
_add_space_tail(element, node, processor=processor)
return element
elif parent is not None:
- if not parent.getchildren():
+ existing = parent.getchildren()
+ if not existing:
if not isinstance(parent, (StringElement, ObjectifiedElement)):
parent.text = unescape((parent.text or "") + element)
else:
- parent.getchildren()[-1].tail = unescape(element)
+ existing[-1].tail = unescape(element)
return parent
if node is None:
@@ -373,7 +372,7 @@ def reconstruct_doc(
# Given that both XPath returns the same node, we still need to check if end is looping
# We optimize by avoiding this check when start and end are the same
- if start_xpath != end_xpath and is_traversing_xpath(root, current_end, processor=processor):
+ if start_xpath != end_xpath and is_traversing_xpath(xpath_proc, current_end):
queue_end = end_xpath
# If we have a child XPath, then continue the job
@@ -407,7 +406,7 @@ def reconstruct_doc(
# Given that both XPath returns the same node, we still need to check if end is looping
# We optimize by avoiding this check when start and end are the same
- if start_xpath != end_xpath and is_traversing_xpath(root, current_end, processor=processor):
+ if start_xpath != end_xpath and is_traversing_xpath(xpath_proc, current_end):
queue_end = end_xpath
reconstruct_doc(
@@ -434,7 +433,7 @@ def reconstruct_doc(
# Given that both XPath returns the same node, we still need to check if end is looping
# We optimize by avoiding this check when start and end are the same
- if start_xpath != end_xpath and is_traversing_xpath(root, current_end, processor=processor):
+ if start_xpath != end_xpath and is_traversing_xpath(xpath_proc, current_end):
queue_end = end_xpath
new_tree = reconstruct_doc(
@@ -486,8 +485,7 @@ def reconstruct_doc(
sib_current_end = clean_xpath_for_following(current_end, end_is_traversing)
# We look for siblings between start and end matches
- xpath = get_xpath_proc(root, processor=processor)
- for sibling in xpath_eval(xpath, f"./node()[preceding-sibling::{sib_current_start} and following-sibling::{sib_current_end}]"):
+ for sibling in xpath_eval(xpath_proc, f"./node()[preceding-sibling::{sib_current_start} and following-sibling::{sib_current_end}]"):
copy_node(sibling, include_children=True, parent=new_tree, processor=processor)
# Here we reached the end, logically.
@@ -622,7 +620,7 @@ def get_passage(self, ref_or_start: Optional[str], end: Optional[str] = None, tr
return root
def get_reffs(self, tree: Optional[str] = None):
- tree = self.citeStructure[tree or self.default_tree]
+ tree: CiteStructureParser = self.citeStructure[tree or self.default_tree]
return tree.find_refs(root=self.xml, structure=tree.structure)
def get_next(self, tree, unit) -> Optional[CitableUnit]:
diff --git a/tests/test_parallel_app.py b/tests/test_parallel_app.py
new file mode 100644
index 0000000..c77df4a
--- /dev/null
+++ b/tests/test_parallel_app.py
@@ -0,0 +1,241 @@
+"""
+Parallel-worker tests for the Flask app.
+
+Strategy: spawn 2 Flask server subprocesses on separate ports (simulating 2
+gunicorn prefork workers — each has its own Python process and JVM instance),
+then hammer both with concurrent HTTP requests via requests + ThreadPoolExecutor.
+
+NOTE: Saxon's C/JVM bindings are NOT thread-safe; concurrent Saxon calls from
+the *same* process will crash. The safe parallelism model is one OS process per
+concurrent client, which is exactly what prefork servers (gunicorn, uwsgi) use.
+These tests exercise that model directly.
+"""
+import concurrent.futures
+import os
+import socket
+import sys
+import tempfile
+import time
+import urllib.parse
+
+import pytest
+import requests
+
+_basedir = os.path.abspath(os.path.dirname(__file__))
+_project_root = os.path.dirname(_basedir)
+_catalog = os.path.join(_basedir, "catalog", "example-collection.xml")
+
+# Resources present in the test catalog
+_RESOURCE_TEXT = "https://foo.bar/text" # base_tei.xml (Luke/Mark)
+_RESOURCE_MULTI = "https://example.org/resource1" # multiple_tree.xml
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helpers
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _free_port() -> int:
+ with socket.socket() as s:
+ s.bind(("127.0.0.1", 0))
+ return s.getsockname()[1]
+
+
+_SERVER_SCRIPT = """\
+import sys, os
+sys.path.insert(0, sys.argv[1]) # project root
+port = int(sys.argv[2])
+db_path = sys.argv[3]
+catalog = sys.argv[4]
+
+from flask import Flask
+from dapytains.app.app import create_app
+from dapytains.app.ingest import store_catalog
+from dapytains.metadata.xml_parser import parse
+
+app = Flask(__name__)
+app, db = create_app(app)
+app.config["SQLALCHEMY_DATABASE_URI"] = f"sqlite:///{db_path}"
+app.config["SQLALCHEMY_TRACK_MODIFICATIONS"] = False
+db.init_app(app)
+
+with app.app_context():
+ db.create_all()
+ catalog_data, _ = parse(catalog)
+ store_catalog(catalog_data)
+
+# threaded=False: one request at a time per process — safe with Saxon
+app.run(host="127.0.0.1", port=port, threaded=False, use_reloader=False)
+"""
+
+
+def _start_server(port: int, db_path: str):
+ """Spawn a Flask server subprocess and return the Popen handle."""
+ import subprocess
+ return subprocess.Popen(
+ [sys.executable, "-c", _SERVER_SCRIPT,
+ _project_root, str(port), db_path, _catalog],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ )
+
+
+def _wait_ready(port: int, retries: int = 30, delay: float = 0.3) -> None:
+ """Poll until the server accepts connections or raise TimeoutError."""
+ for _ in range(retries):
+ try:
+ requests.get(f"http://127.0.0.1:{port}/", timeout=1)
+ return
+ except requests.exceptions.ConnectionError:
+ time.sleep(delay)
+ raise TimeoutError(f"Server on port {port} did not start in time")
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Fixtures
+# ─────────────────────────────────────────────────────────────────────────────
+
+@pytest.fixture(scope="module")
+def two_workers():
+ """Start 2 Flask worker processes and yield their base URLs."""
+ import subprocess
+
+ tmpdir = tempfile.mkdtemp()
+ port1, port2 = _free_port(), _free_port()
+ db1 = os.path.join(tmpdir, "worker1.db")
+ db2 = os.path.join(tmpdir, "worker2.db")
+
+ proc1 = _start_server(port1, db1)
+ proc2 = _start_server(port2, db2)
+
+ try:
+ _wait_ready(port1)
+ _wait_ready(port2)
+ yield [f"http://127.0.0.1:{port1}", f"http://127.0.0.1:{port2}"]
+ finally:
+ proc1.terminate()
+ proc2.terminate()
+ proc1.wait(timeout=10)
+ proc2.wait(timeout=10)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Reference responses (computed against a single worker during setup)
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _doc_url(base: str, resource: str, **kwargs) -> str:
+ params = {"resource": resource, **kwargs}
+ return f"{base}/document/?{urllib.parse.urlencode(params)}"
+
+
+def _nav_url(base: str, resource: str, **kwargs) -> str:
+ params = {"resource": resource, **kwargs}
+ return f"{base}/navigation/?{urllib.parse.urlencode(params)}"
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Tests
+# ─────────────────────────────────────────────────────────────────────────────
+
+class TestTwoWorkersParallel:
+
+ def test_workers_are_up(self, two_workers):
+ """Both workers answer the entry-point."""
+ for base in two_workers:
+ r = requests.get(f"{base}/")
+ assert r.status_code == 200
+ assert "collection" in r.json()
+
+ def test_sequential_passages_per_worker(self, two_workers):
+ """Each worker handles N sequential document requests correctly."""
+ refs = ["Luke 1:1", "Luke 1:2", "Mark 1:1"]
+ for base in two_workers:
+ responses = [
+ requests.get(_doc_url(base, _RESOURCE_TEXT, ref=ref))
+ for ref in refs
+ ]
+ for r in responses:
+ assert r.status_code == 200
+ assert r.content # non-empty XML
+
+ def test_parallel_requests_to_both_workers(self, two_workers):
+ """Send 20 document requests distributed across both workers in parallel.
+
+ Requests use threads only for HTTP I/O; Saxon runs inside separate server
+ processes so there is no intra-process concurrency on the JVM.
+ """
+ resource = _RESOURCE_TEXT
+ ref = "Luke 1:1"
+ urls = [_doc_url(base, resource, ref=ref) for base in two_workers] * 10
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=8) as pool:
+ results = list(pool.map(requests.get, urls))
+
+ assert all(r.status_code == 200 for r in results), [
+ r.text for r in results if r.status_code != 200
+ ]
+ # All responses must be identical XML
+ bodies = [r.text for r in results]
+ assert all(b == bodies[0] for b in bodies), "inconsistent XML across workers"
+
+ def test_parallel_range_requests(self, two_workers):
+ """Range passages delivered in parallel from both workers are consistent."""
+ urls = [
+ _doc_url(base, _RESOURCE_TEXT, start="Luke 1:1", end="Luke 1#1")
+ for base in two_workers
+ ] * 8
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=8) as pool:
+ results = list(pool.map(requests.get, urls))
+
+ assert all(r.status_code == 200 for r in results)
+ bodies = [r.text for r in results]
+ assert all(b == bodies[0] for b in bodies), "range XML must be identical across workers"
+
+ def test_parallel_navigation_requests(self, two_workers):
+ """Navigation endpoint delivers consistent results in parallel from both workers."""
+ urls = [_nav_url(base, _RESOURCE_TEXT, down=1) for base in two_workers] * 8
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=8) as pool:
+ results = list(pool.map(requests.get, urls))
+
+ assert all(r.status_code == 200 for r in results)
+ # member identifiers must be identical across workers
+ member_sets = [
+ {m["identifier"] for m in r.json().get("member", [])}
+ for r in results
+ ]
+ assert all(m == member_sets[0] for m in member_sets)
+
+ def test_parallel_mixed_requests(self, two_workers):
+ """Mix of single-passage, range, and navigation requests in parallel."""
+ base1, base2 = two_workers
+ urls = (
+ [_doc_url(base1, _RESOURCE_TEXT, ref="Luke 1:1")] * 4
+ + [_doc_url(base2, _RESOURCE_TEXT, ref="Luke 1:2")] * 4
+ + [_doc_url(base1, _RESOURCE_TEXT, start="Luke 1:1", end="Luke 1#1")] * 4
+ + [_nav_url(base2, _RESOURCE_TEXT, down=1)] * 4
+ )
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=8) as pool:
+ results = list(pool.map(requests.get, urls))
+
+ assert all(r.status_code == 200 for r in results), [
+ (r.url, r.status_code, r.text[:120]) for r in results if r.status_code != 200
+ ]
+
+ def test_repeated_parallel_bursts(self, two_workers):
+ """Run several bursts of parallel requests; state must stay consistent between bursts."""
+ resource = _RESOURCE_TEXT
+ ref = "Luke 1:1"
+ urls = [_doc_url(base, resource, ref=ref) for base in two_workers] * 5
+
+ reference_body = None
+ for burst in range(3):
+ with concurrent.futures.ThreadPoolExecutor(max_workers=6) as pool:
+ results = list(pool.map(requests.get, urls))
+ assert all(r.status_code == 200 for r in results), f"burst {burst} had failures"
+ burst_body = results[0].text
+ if reference_body is None:
+ reference_body = burst_body
+ assert all(r.text == reference_body for r in results), (
+ f"burst {burst}: response drifted from the reference"
+ )
diff --git a/tests/test_successive_calls.py b/tests/test_successive_calls.py
new file mode 100644
index 0000000..ec9c844
--- /dev/null
+++ b/tests/test_successive_calls.py
@@ -0,0 +1,317 @@
+"""
+Tests for processor-reuse stability: verifies that sharing or reusing Saxon
+XPath/XQuery processors across successive calls and across multiple Document
+instances does not corrupt context or results.
+"""
+import os.path
+
+from lxml.etree import tostring
+
+from dapytains.processor import get_processor, get_xpath_proc
+from dapytains.tei.citeStructure import CiteStructureParser
+from dapytains.tei.document import Document
+
+local_dir = os.path.join(os.path.dirname(__file__), "tei")
+_project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+
+
+# ─────────────────────────────────────────────────────────────
+# Successive calls on the SAME Document instance
+# ─────────────────────────────────────────────────────────────
+
+def test_successive_single_passages():
+ """get_passage called repeatedly on one Document must give stable results."""
+ doc = Document(f"{local_dir}/base_tei.xml")
+ expected_1_1 = tostring(doc.get_passage("Luke 1:1"), encoding=str)
+ expected_1_2 = tostring(doc.get_passage("Luke 1:2"), encoding=str)
+
+ # Call again — shared XPath processor state must not bleed between calls
+ assert tostring(doc.get_passage("Luke 1:1"), encoding=str) == expected_1_1
+ assert tostring(doc.get_passage("Luke 1:2"), encoding=str) == expected_1_2
+ assert tostring(doc.get_passage("Luke 1:1"), encoding=str) == expected_1_1
+
+
+def test_successive_range_then_single():
+ """A range call followed by a single-ref call must not corrupt context.
+
+ reconstruct_doc calls xpath_proc.set_context(result_end) mid-function;
+ this test confirms that state does not leak into the next call.
+ """
+ doc = Document(f"{local_dir}/base_tei.xml")
+ expected_single = tostring(doc.get_passage("Luke 1:1"), encoding=str)
+ expected_range = tostring(doc.get_passage("Luke 1:1", "Luke 1#1"), encoding=str)
+
+ for _ in range(3):
+ assert tostring(doc.get_passage("Luke 1:1", "Luke 1#1"), encoding=str) == expected_range
+ assert tostring(doc.get_passage("Luke 1:1"), encoding=str) == expected_single
+
+
+def test_successive_reffs_and_passage():
+ """get_reffs and get_passage interleaved on the same Document must be stable."""
+ doc = Document(f"{local_dir}/base_tei.xml")
+ reffs_1 = [r.ref for r in doc.get_reffs()]
+ passage = tostring(doc.get_passage("Luke 1:1"), encoding=str)
+ reffs_2 = [r.ref for r in doc.get_reffs()]
+
+ assert reffs_1 == reffs_2, (
+ "get_reffs must return the same refs before and after get_passage"
+ )
+ assert tostring(doc.get_passage("Luke 1:1"), encoding=str) == passage
+
+
+def test_successive_calls_with_traversing_xpath():
+ """Documents using .// XPaths exercise is_traversing_xpath; calls must be stable."""
+ doc = Document(f"{local_dir}/tei_with_two_traversing_with_n.xml")
+ expected_wide = tostring(doc.get_passage("Luke 1:1", "Luke 1#3"), encoding=str)
+ expected_narrow = tostring(doc.get_passage("Luke 1:1", "Luke 1#1"), encoding=str)
+
+ for _ in range(3):
+ assert tostring(doc.get_passage("Luke 1:1", "Luke 1#3"), encoding=str) == expected_wide
+ assert tostring(doc.get_passage("Luke 1:1", "Luke 1#1"), encoding=str) == expected_narrow
+
+
+def test_successive_calls_lb_milestone():
+ """Milestone (lb) documents: successive range and single calls stay consistent."""
+ doc = Document(f"{local_dir}/lb_same_ab.xml")
+ single = tostring(doc.get_passage("2"), encoding=str)
+ rng = tostring(doc.get_passage("2", "4"), encoding=str)
+ last = tostring(doc.get_passage("5"), encoding=str)
+
+ for _ in range(3):
+ assert tostring(doc.get_passage("2"), encoding=str) == single
+ assert tostring(doc.get_passage("2", "4"), encoding=str) == rng
+ assert tostring(doc.get_passage("5"), encoding=str) == last
+
+
+def test_successive_calls_multiple_trees():
+ """Switching between trees on the same Document must always return correct results."""
+ doc = Document(f"{local_dir}/multiple_tree.xml")
+ default_passage = tostring(doc.get_passage("I"), encoding=str)
+ alpha_passage = tostring(doc.get_passage("div-002", tree="alpha"), encoding=str)
+
+ for _ in range(3):
+ assert tostring(doc.get_passage("I"), encoding=str) == default_passage
+ assert tostring(doc.get_passage("div-002", tree="alpha"), encoding=str) == alpha_passage
+ assert tostring(doc.get_passage("I"), encoding=str) == default_passage
+
+
+# ─────────────────────────────────────────────────────────────
+# Multiple Document instances in memory simultaneously
+# ─────────────────────────────────────────────────────────────
+
+def test_two_docs_in_memory_independent():
+ """Two Document objects with separate processors must not share state."""
+ doc_a = Document(f"{local_dir}/base_tei.xml")
+ doc_b = Document(f"{local_dir}/simple_doc.xml")
+
+ result_a = tostring(doc_a.get_passage("Luke 1:1"), encoding=str)
+ result_b = tostring(doc_b.get_passage("1"), encoding=str)
+
+ # Re-query after the other document has been used
+ assert tostring(doc_a.get_passage("Luke 1:1"), encoding=str) == result_a
+ assert tostring(doc_b.get_passage("1"), encoding=str) == result_b
+
+
+def test_two_docs_same_file_independent():
+ """Two Document objects from the same file must produce identical, independent results."""
+ doc_a = Document(f"{local_dir}/base_tei.xml")
+ doc_b = Document(f"{local_dir}/base_tei.xml")
+
+ single_a = tostring(doc_a.get_passage("Luke 1:1"), encoding=str)
+ single_b = tostring(doc_b.get_passage("Luke 1:1"), encoding=str)
+ assert single_a == single_b
+
+ range_a = tostring(doc_a.get_passage("Luke 1:1", "Luke 1#1"), encoding=str)
+ range_b = tostring(doc_b.get_passage("Luke 1:1", "Luke 1#1"), encoding=str)
+ assert range_a == range_b
+
+ # Using doc_a must not affect doc_b
+ tostring(doc_a.get_passage("Luke 1:2"), encoding=str)
+ assert tostring(doc_b.get_passage("Luke 1:1"), encoding=str) == single_b
+ assert tostring(doc_b.get_passage("Luke 1:1", "Luke 1#1"), encoding=str) == range_b
+
+
+def test_interleaved_calls_two_different_docs():
+ """Interleave passage calls between two structurally different documents."""
+ doc_a = Document(f"{local_dir}/base_tei.xml")
+ doc_b = Document(f"{local_dir}/tei_with_two_traversing_with_n.xml")
+
+ expected_a = tostring(doc_a.get_passage("Luke 1:1"), encoding=str)
+ expected_b = tostring(doc_b.get_passage("Luke 1:1", "Luke 1#3"), encoding=str)
+
+ for _ in range(3):
+ assert tostring(doc_a.get_passage("Luke 1:1"), encoding=str) == expected_a
+ assert tostring(doc_b.get_passage("Luke 1:1", "Luke 1#3"), encoding=str) == expected_b
+ assert tostring(doc_a.get_passage("Luke 1:1"), encoding=str) == expected_a
+
+
+def test_three_docs_in_memory():
+ """Three Document objects alive simultaneously all return correct results."""
+ doc_a = Document(f"{local_dir}/base_tei.xml")
+ doc_b = Document(f"{local_dir}/simple_doc.xml")
+ doc_c = Document(f"{local_dir}/lb_same_ab.xml")
+
+ expected_a = tostring(doc_a.get_passage("Luke 1:1"), encoding=str)
+ expected_b = tostring(doc_b.get_passage("2", "3"), encoding=str)
+ expected_c = tostring(doc_c.get_passage("2", "4"), encoding=str)
+
+ # Query in rotated order
+ assert tostring(doc_c.get_passage("2", "4"), encoding=str) == expected_c
+ assert tostring(doc_a.get_passage("Luke 1:1"), encoding=str) == expected_a
+ assert tostring(doc_b.get_passage("2", "3"), encoding=str) == expected_b
+ assert tostring(doc_b.get_passage("2", "3"), encoding=str) == expected_b
+ assert tostring(doc_c.get_passage("2", "4"), encoding=str) == expected_c
+ assert tostring(doc_a.get_passage("Luke 1:1"), encoding=str) == expected_a
+
+
+# ─────────────────────────────────────────────────────────────
+# Multiple XML nodes via a SHARED PySaxonProcessor
+# ─────────────────────────────────────────────────────────────
+
+def test_shared_processor_two_docs():
+ """Two Documents sharing a PySaxonProcessor must stay independent."""
+ processor = get_processor()
+ doc_a = Document(f"{local_dir}/base_tei.xml", processor=processor)
+ doc_b = Document(f"{local_dir}/simple_doc.xml", processor=processor)
+
+ result_a = tostring(doc_a.get_passage("Luke 1:1"), encoding=str)
+ result_b = tostring(doc_b.get_passage("1"), encoding=str)
+
+ for _ in range(3):
+ assert tostring(doc_a.get_passage("Luke 1:1"), encoding=str) == result_a
+ assert tostring(doc_b.get_passage("1"), encoding=str) == result_b
+
+
+def test_shared_processor_interleaved_reffs():
+ """get_reffs on two docs sharing a processor must return independent, stable results."""
+ processor = get_processor()
+ doc_a = Document(f"{local_dir}/base_tei.xml", processor=processor)
+ doc_b = Document(f"{local_dir}/simple_doc.xml", processor=processor)
+
+ reffs_a1 = [r.ref for r in doc_a.get_reffs()]
+ reffs_b1 = [r.ref for r in doc_b.get_reffs()]
+
+ # Query in the opposite order
+ reffs_b2 = [r.ref for r in doc_b.get_reffs()]
+ reffs_a2 = [r.ref for r in doc_a.get_reffs()]
+
+ assert reffs_a1 == reffs_a2, "doc_a refs must be stable"
+ assert reffs_b1 == reffs_b2, "doc_b refs must be stable"
+ assert set(reffs_a1) != set(reffs_b1), "different documents must have different top-level refs"
+
+
+def test_shared_processor_passage_and_reffs_interleaved():
+ """Mix get_passage and get_reffs across two docs on a shared processor."""
+ processor = get_processor()
+ doc_a = Document(f"{local_dir}/base_tei.xml", processor=processor)
+ doc_b = Document(f"{local_dir}/simple_doc.xml", processor=processor)
+
+ passage_a = tostring(doc_a.get_passage("Luke 1:1"), encoding=str)
+ reffs_b = [r.ref for r in doc_b.get_reffs()]
+ passage_b = tostring(doc_b.get_passage("2"), encoding=str)
+ reffs_a = [r.ref for r in doc_a.get_reffs()]
+
+ assert tostring(doc_a.get_passage("Luke 1:1"), encoding=str) == passage_a
+ assert [r.ref for r in doc_b.get_reffs()] == reffs_b
+ assert tostring(doc_b.get_passage("2"), encoding=str) == passage_b
+ assert [r.ref for r in doc_a.get_reffs()] == reffs_a
+
+
+# ─────────────────────────────────────────────────────────────
+# CiteStructureParser successive calls and multiple parsers
+# ─────────────────────────────────────────────────────────────
+
+_MIXED_CHILDREN_XML = """
+
+
+
+
+
+
+
+
+
+
+
+"""
+
+
+def test_citestructure_successive_find_refs():
+ """find_refs called multiple times on the same parser must produce stable results."""
+ processor = get_processor()
+ TEI = processor.parse_xml(xml_text=_MIXED_CHILDREN_XML)
+ xp = get_xpath_proc(elem=TEI, processor=processor)
+ parser = CiteStructureParser(
+ xp.evaluate_single("/TEI/teiHeader/encodingDesc/refsDecl[1]"),
+ processor=processor
+ )
+
+ for _ in range(3):
+ roots = parser.find_refs(root=TEI, structure=parser.structure)
+ refs = [r.ref for r in roots]
+ assert refs == ["Luke", "Mark"], "top-level refs must be stable"
+
+ luke_children = [c.ref for c in roots[0].children[0].children]
+ assert luke_children == ["Luke 1:1", "Luke 1:2", "Luke 1#1"], (
+ "mixed-type children must stay in document order"
+ )
+
+ mark_children = [c.ref for c in roots[1].children[0].children]
+ assert mark_children == ["Mark 1:1", "Mark 1#1", "Mark 1:2"], (
+ "mixed-type children in Mark must follow document order"
+ )
+
+
+def test_citestructure_two_parsers_shared_processor():
+ """Two CiteStructureParsers on different XML nodes sharing a processor must not interfere."""
+ processor = get_processor()
+
+ tei_a = processor.parse_xml(xml_text="""
+
+
+
+ A
B
""")
+
+ tei_b = processor.parse_xml(xml_text="""
+
+
+
+ X
Y
""")
+
+ xp_a = get_xpath_proc(elem=tei_a, processor=processor)
+ xp_b = get_xpath_proc(elem=tei_b, processor=processor)
+ parser_a = CiteStructureParser(
+ xp_a.evaluate_single("/TEI/teiHeader/encodingDesc/refsDecl[1]"), processor=processor
+ )
+ parser_b = CiteStructureParser(
+ xp_b.evaluate_single("/TEI/teiHeader/encodingDesc/refsDecl[1]"), processor=processor
+ )
+
+ refs_a = [r.ref for r in parser_a.find_refs(root=tei_a, structure=parser_a.structure)]
+ refs_b = [r.ref for r in parser_b.find_refs(root=tei_b, structure=parser_b.structure)]
+
+ assert refs_a == ["1", "2"]
+ assert refs_b == ["alpha", "beta"]
+
+ # Swap query order — must still give correct results
+ assert [r.ref for r in parser_b.find_refs(root=tei_b, structure=parser_b.structure)] == refs_b
+ assert [r.ref for r in parser_a.find_refs(root=tei_a, structure=parser_a.structure)] == refs_a
+
+
+def test_citestructure_generate_xpath_stable():
+ """generate_xpath must return consistent results across repeated calls."""
+ processor = get_processor()
+ TEI = processor.parse_xml(xml_text=_MIXED_CHILDREN_XML)
+ xp = get_xpath_proc(elem=TEI, processor=processor)
+ parser = CiteStructureParser(
+ xp.evaluate_single("/TEI/teiHeader/encodingDesc/refsDecl[1]"),
+ processor=processor
+ )
+
+ for _ in range(5):
+ assert parser.generate_xpath("Luke 1:2") == "//body/div[@n='Luke']/div[position()=1]/div[position()=2]"
+ assert parser.generate_xpath("Mark 1#1") == "//body/div[@n='Mark']/div[position()=1]/l[position()=1]"
+ assert parser.generate_xpath("Luke") == "//body/div[@n='Luke']"
+