Skip to content

Commit 10b1ac0

Browse files
Test XML and anything else converted to text
1 parent d1f8977 commit 10b1ac0

1 file changed

Lines changed: 38 additions & 1 deletion

File tree

cardinal_pythonlib/tests/extract_text_tests.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from faker_file.providers.odt_file import OdtFileProvider
3636
from faker_file.providers.pdf_file import PdfFileProvider
3737
from faker_file.providers.txt_file import TxtFileProvider
38+
from faker_file.providers.xml_file import XmlFileProvider
3839

3940
from cardinal_pythonlib.extract_text import (
4041
document_to_text,
@@ -62,11 +63,12 @@ def _create_mock_objects(self) -> None:
6263
)
6364

6465
def _register_faker_providers(self) -> None:
65-
self.fake = Faker()
66+
self.fake = Faker("en-GB")
6667
self.fake.add_provider(DocxFileProvider)
6768
self.fake.add_provider(OdtFileProvider)
6869
self.fake.add_provider(PdfFileProvider)
6970
self.fake.add_provider(TxtFileProvider)
71+
self.fake.add_provider(XmlFileProvider)
7072

7173
def _replace_external_tools_with_fakes(self) -> None:
7274
# For external tools we assume the tools are running correctly
@@ -275,3 +277,38 @@ def test_txt_converted(self) -> None:
275277
text = document_to_text(filename=txt_file.data["filename"])
276278

277279
self.assertEqual(text.strip(), content)
280+
281+
def test_xml_converted(self) -> None:
282+
name = self.fake.name()
283+
address = self.fake.address()
284+
285+
xml_file = self.fake.xml_file(
286+
num_rows=1,
287+
data_columns={
288+
"name": name,
289+
"address": address,
290+
},
291+
)
292+
text = document_to_text(filename=xml_file.data["filename"])
293+
294+
self.assertEqual(text.strip(), f"{name}{address}")
295+
296+
def test_unsupported_converted(self) -> None:
297+
with mock.patch.multiple(
298+
"cardinal_pythonlib.extract_text.subprocess",
299+
Popen=self.mock_popen,
300+
):
301+
with NamedTemporaryFile(suffix=".exe", delete=False) as temp_file:
302+
temp_file.close()
303+
document_to_text(filename=temp_file.name, config=self.config)
304+
305+
expected_calls = [
306+
mock.call(
307+
(
308+
f"{self.empty_dir}/strings",
309+
temp_file.name,
310+
),
311+
stdout=subprocess.PIPE,
312+
),
313+
]
314+
self.mock_popen.assert_has_calls(expected_calls)

0 commit comments

Comments
 (0)