Skip to content

Commit 650f2a4

Browse files
Use extension property for Outlook msg rather than guessing from mimetype
1 parent c02b2b7 commit 650f2a4

2 files changed

Lines changed: 2 additions & 3 deletions

File tree

cardinal_pythonlib/extract_text.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,8 +1362,7 @@ def _gen_msg_content(
13621362
for attachment in message.attachments:
13631363
# null termination seen in the real world
13641364
# https://github.com/TeamMsgExtractor/msg-extractor/issues/464
1365-
content_type = attachment.mimetype.replace("\x00", "")
1366-
ext = guess_extension(content_type)
1365+
ext = attachment.extension.replace("\x00", "")
13671366
if ext is not None and ext in ext_map:
13681367
yield document_to_text(
13691368
blob=attachment.data, extension=ext, config=config

cardinal_pythonlib/tests/extract_text_tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,7 +677,7 @@ def test_attachment_converted(self) -> None:
677677
mock_attachment = mock.Mock(
678678
# null termination seen in the real world
679679
# https://github.com/TeamMsgExtractor/msg-extractor/issues/464
680-
mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document\x00", # noqa: E501
680+
extension=".docx",
681681
data=BytesIO(docx).read(),
682682
)
683683
mock_msgfile = mock.Mock(

0 commit comments

Comments
 (0)