From e8f8d9b77f0c58dde6f23f2ad0acf2860e074db4 Mon Sep 17 00:00:00 2001 From: kamilbenkirane Date: Sat, 28 Mar 2026 17:24:38 +0100 Subject: [PATCH] fix: correct URL handling for document and media input OpenResponses: use file_url field for URL-based documents instead of file_data (which expects base64 data URIs only). Google: include mime_type in file_data when using URL-based artifacts, fixing "Unsupported MIME type" errors for all media types. Also replaces empty test PDF with one containing actual text content. --- .../text/protocols/openresponses/client.py | 22 +++---- src/celeste/providers/google/utils.py | 5 +- .../text/assets/test_document.pdf | Bin 415 -> 589 bytes .../test_text_modality_analyze_document.py | 54 ++++++++++++++++++ 4 files changed, 70 insertions(+), 11 deletions(-) diff --git a/src/celeste/modalities/text/protocols/openresponses/client.py b/src/celeste/modalities/text/protocols/openresponses/client.py index fe8ea7a..3dd7dbb 100644 --- a/src/celeste/modalities/text/protocols/openresponses/client.py +++ b/src/celeste/modalities/text/protocols/openresponses/client.py @@ -87,16 +87,18 @@ def _init_request(self, inputs: TextInput) -> dict[str, Any]: else [inputs.document] ) for doc in docs: - file_data = build_document_data_url(doc) - content.append( - { - "type": "input_file", - "filename": doc.path.rsplit("/", 1)[-1] - if doc.path - else "document", - "file_data": file_data, - } - ) + if doc.url and not doc.data and not doc.path: + content.append({"type": "input_file", "file_url": doc.url}) + else: + content.append( + { + "type": "input_file", + "filename": doc.path.rsplit("/", 1)[-1] + if doc.path + else "document", + "file_data": build_document_data_url(doc), + } + ) content.append({"type": "input_text", "text": inputs.prompt or ""}) return {"input": [{"role": "user", "content": content}]} diff --git a/src/celeste/providers/google/utils.py b/src/celeste/providers/google/utils.py index 97b7c47..5f1b55c 100644 --- a/src/celeste/providers/google/utils.py +++ b/src/celeste/providers/google/utils.py @@ -10,7 +10,10 @@ def build_media_part(artifact: Artifact) -> dict[str, Any]: """Convert any media artifact to a Gemini inline_data/file_data part.""" if artifact.url: - return {"file_data": {"file_uri": artifact.url}} + part: dict[str, Any] = {"file_data": {"file_uri": artifact.url}} + if artifact.mime_type: + part["file_data"]["mime_type"] = artifact.mime_type.value + return part media_bytes = artifact.get_bytes() b64 = base64.b64encode(media_bytes).decode("utf-8") mime = artifact.mime_type or detect_mime_type(media_bytes) diff --git a/tests/integration_tests/text/assets/test_document.pdf b/tests/integration_tests/text/assets/test_document.pdf index 106e95e008fd6cfc098a78b013b61f241f6261a9..5ea844bdc1398e95e47399cff41189e24eab59c9 100644 GIT binary patch literal 589 zcmZWnO;5r=5WVlOcoz~q=ystkl8_!yh>02rH1R+}n^5L&)^|(?H&9uM4|{oa(Kq&?UeONg_;{F_54C6B?X+%CicK z9>bTUV)vzoY7u#JNn{&m2-^6_7n9|BWDi&bgpXAh{Xusr0G2F0AuELJ0XZ|3-Mckq z&EfLtQS+$C!BXWFN~{;oaa9VG4*UMf63!-P<_L3v4o!~;HDu+HIB!s9^ZdDI%v#pGx7}*Xgd_A^7Y1Hn;^6ZRf z#@TvV-HCh71aL6>kJ$|0zds2V%B$9#aIH?{AP>vBEU9Y(g~i*5nke|98@DYYZ-)!h zcOJwbi13vFU#Zb*Zq8dCmJ^Y~<22s|J4->D{IDixdV3%jLRF^o@euG3J<2ryc_K1; zK2mYh?|tkM%Ltq+M+ttW_KUnes3F_g(vfkHj1L17DGLclxL{LATj(yBe7eDekT+Of kjI`b=-I5JOHu{G^Vr@+LsJ}+cd1ZadQwYwoZ1q None: assert content[0]["type"] == "document_url" assert content[0]["document_url"].startswith("data:application/pdf;base64,") assert content[-1] == {"type": "text", "text": "Summarize this document"} + + +def test_openai_init_request_uses_file_url_for_url_document() -> None: + model = Model( + id="gpt-4o", + provider=Provider.OPENAI, + display_name="GPT-4o", + operations={Modality.TEXT: {Operation.GENERATE, Operation.ANALYZE}}, + ) + client = OpenAITextClient( + model=model, + provider=Provider.OPENAI, + auth=AuthHeader(secret=SecretStr("test")), + ) + + request = client._init_request( + TextInput( + prompt="Summarize this document", + document=DocumentArtifact(url="https://example.com/doc.pdf"), + ) + ) + + content = request["input"][0]["content"] + assert content[0]["type"] == "input_file" + assert content[0]["file_url"] == "https://example.com/doc.pdf" + assert "file_data" not in content[0] + + +def test_google_init_request_includes_mime_type_for_url_document() -> None: + model = Model( + id="gemini-2.5-pro", + provider=Provider.GOOGLE, + display_name="Gemini 2.5 Pro", + operations={Modality.TEXT: {Operation.GENERATE, Operation.ANALYZE}}, + ) + client = GoogleTextClient( + model=model, + provider=Provider.GOOGLE, + auth=AuthHeader(secret=SecretStr("test"), header="x-goog-api-key", prefix=""), + ) + + request = client._init_request( + TextInput( + prompt="Summarize this document", + document=DocumentArtifact( + url="https://example.com/doc.pdf", mime_type=DocumentMimeType.PDF + ), + ) + ) + + parts = request["contents"][0]["parts"] + assert "file_data" in parts[0] + assert parts[0]["file_data"]["file_uri"] == "https://example.com/doc.pdf" + assert parts[0]["file_data"]["mime_type"] == "application/pdf"