From 524e1c19b2fbabc494e123122b91a77914924f29 Mon Sep 17 00:00:00 2001
From: thuantan2060 <thuantan2060@gmail.com>
Date: Mon, 30 Mar 2026 21:33:20 +0700
Subject: [PATCH 1/4] =?UTF-8?q?fix:=20Azure=20OpenAI=20compatibility=20?=
 =?UTF-8?q?=E2=80=94=20omit=20max=5Ftokens=20when=20None?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Azure OpenAI rejects max_tokens=null in chat completion requests.
Only include max_tokens in the request payload when it has a value.

Affected methods:
- OpenAISDKClient.chat()
- OpenAISDKClient.summarize()
- OpenAISDKClient.vision()
- OpenAILLMBackend.build_summary_payload()
- OpenAILLMBackend.build_vision_payload()

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/memu/llm/backends/openai.py | 12 ++++++---
 src/memu/llm/openai_sdk.py      | 45 ++++++++++++++++++++-------------
 2 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/src/memu/llm/backends/openai.py b/src/memu/llm/backends/openai.py
index aef24fc6..b9a33bf2 100644
--- a/src/memu/llm/backends/openai.py
+++ b/src/memu/llm/backends/openai.py
@@ -15,15 +15,17 @@ def build_summary_payload(
         self, *, text: str, system_prompt: str | None, chat_model: str, max_tokens: int | None
     ) -> dict[str, Any]:
         prompt = system_prompt or "Summarize the text in one short paragraph."
-        return {
+        payload: dict[str, Any] = {
             "model": chat_model,
             "messages": [
                 {"role": "system", "content": prompt},
                 {"role": "user", "content": text},
             ],
             "temperature": 0.2,
-            "max_tokens": max_tokens,
         }
+        if max_tokens is not None:
+            payload["max_tokens"] = max_tokens
+        return payload
 
     def parse_summary_response(self, data: dict[str, Any]) -> str:
         return cast(str, data["choices"][0]["message"]["content"])
@@ -56,9 +58,11 @@ def build_vision_payload(
             ],
         })
 
-        return {
+        payload: dict[str, Any] = {
             "model": chat_model,
             "messages": messages,
             "temperature": 0.2,
-            "max_tokens": max_tokens,
         }
+        if max_tokens is not None:
+            payload["max_tokens"] = max_tokens
+        return payload
diff --git a/src/memu/llm/openai_sdk.py b/src/memu/llm/openai_sdk.py
index 38c6c8bb..08dae608 100644
--- a/src/memu/llm/openai_sdk.py
+++ b/src/memu/llm/openai_sdk.py
@@ -53,12 +53,15 @@ async def chat(
         user_message: ChatCompletionUserMessageParam = {"role": "user", "content": prompt}
         messages.append(user_message)
 
-        response = await self.client.chat.completions.create(
-            model=self.chat_model,
-            messages=messages,
-            temperature=temperature,
-            max_tokens=max_tokens,
-        )
+        kwargs: dict[str, Any] = {
+            "model": self.chat_model,
+            "messages": messages,
+            "temperature": temperature,
+        }
+        if max_tokens is not None:
+            kwargs["max_tokens"] = max_tokens
+
+        response = await self.client.chat.completions.create(**kwargs)
         content = response.choices[0].message.content
         logger.debug("OpenAI chat response: %s", response)
         return content or "", response
@@ -76,12 +79,15 @@ async def summarize(
         user_message: ChatCompletionUserMessageParam = {"role": "user", "content": text}
         messages: list[ChatCompletionMessageParam] = [system_message, user_message]
 
-        response = await self.client.chat.completions.create(
-            model=self.chat_model,
-            messages=messages,
-            temperature=1,
-            max_tokens=max_tokens,
-        )
+        kwargs: dict[str, Any] = {
+            "model": self.chat_model,
+            "messages": messages,
+            "temperature": 1,
+        }
+        if max_tokens is not None:
+            kwargs["max_tokens"] = max_tokens
+
+        response = await self.client.chat.completions.create(**kwargs)
         content = response.choices[0].message.content
         logger.debug("OpenAI summarize response: %s", response)
         return content or "", response
@@ -142,12 +148,15 @@ async def vision(
         }
         messages.append(user_message)
 
-        response = await self.client.chat.completions.create(
-            model=self.chat_model,
-            messages=messages,
-            temperature=1,
-            max_tokens=max_tokens,
-        )
+        kwargs: dict[str, Any] = {
+            "model": self.chat_model,
+            "messages": messages,
+            "temperature": 1,
+        }
+        if max_tokens is not None:
+            kwargs["max_tokens"] = max_tokens
+
+        response = await self.client.chat.completions.create(**kwargs)
         content = response.choices[0].message.content
         logger.debug("OpenAI vision response: %s", response)
         return content or "", response

From ace8e8b619673e61e2ab6d4daa461b6c007b96ab Mon Sep 17 00:00:00 2001
From: thuantan2060 <thuantan2060@gmail.com>
Date: Sat, 4 Apr 2026 19:44:12 +0700
Subject: [PATCH 2/4] ci: add build-wheel workflow for reusable GitHub Actions
 artifact

Builds linux-x86_64 wheel on every push to main and uploads as
'memu-wheel' artifact with 7-day retention for cross-repo consumption
by memu-server Docker CI pipeline.
---
 .github/workflows/build-wheel.yml | 41 +++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 .github/workflows/build-wheel.yml

diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml
new file mode 100644
index 00000000..fe3f38c5
--- /dev/null
+++ b/.github/workflows/build-wheel.yml
@@ -0,0 +1,41 @@
+# Builds the memu-py wheel for linux-x86_64 on every push to main.
+# The uploaded artifact ("memu-wheel") can be consumed cross-repo via
+# actions/download-artifact@v7 with `repository` + `github-token` params,
+# or via the GitHub REST API: GET /repos/{owner}/{repo}/actions/artifacts?name=memu-wheel
+
+name: build-wheel
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  build-wheel:
+    name: build linux-x86_64 wheel
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v7
+        with:
+          python-version: "3.13"
+
+      - name: Setup Rust
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Install maturin
+        run: uv tool install maturin
+
+      - name: Build wheel
+        run: uvx maturin build --release --out dist --compatibility manylinux_2_39
+
+      - name: Upload wheel artifact
+        uses: actions/upload-artifact@v6
+        with:
+          name: memu-wheel
+          path: dist/*.whl
+          retention-days: 7
+          if-no-files-found: error

From 34fbc04ef2f3db764bcac5a2366a644aa597f23f Mon Sep 17 00:00:00 2001
From: thuantan2060 <thuantan2060@gmail.com>
Date: Sat, 4 Apr 2026 19:56:50 +0700
Subject: [PATCH 3/4] ci: upgrade actions to Node.js 24-compatible versions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- actions/checkout v4 → v6
- astral-sh/setup-uv v7 → v8
- actions/upload-artifact v6 → v7
---
 .github/workflows/build-wheel.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml
index fe3f38c5..7a236d97 100644
--- a/.github/workflows/build-wheel.yml
+++ b/.github/workflows/build-wheel.yml
@@ -16,10 +16,10 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v7
+        uses: astral-sh/setup-uv@v8
         with:
           python-version: "3.13"
 
@@ -33,7 +33,7 @@ jobs:
         run: uvx maturin build --release --out dist --compatibility manylinux_2_39
 
       - name: Upload wheel artifact
-        uses: actions/upload-artifact@v6
+        uses: actions/upload-artifact@v7
         with:
           name: memu-wheel
           path: dist/*.whl

From 3a5c54bedbc8ab2bf333a808ba91ddfa11da605a Mon Sep 17 00:00:00 2001
From: thuantan2060 <thuantan2060@gmail.com>
Date: Sat, 4 Apr 2026 19:58:24 +0700
Subject: [PATCH 4/4] =?UTF-8?q?fix(ci):=20revert=20setup-uv=20to=20v7=20?=
 =?UTF-8?q?=E2=80=94=20v8=20tag=20not=20yet=20published?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/build-wheel.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml
index 7a236d97..645ae5f2 100644
--- a/.github/workflows/build-wheel.yml
+++ b/.github/workflows/build-wheel.yml
@@ -19,7 +19,7 @@ jobs:
       - uses: actions/checkout@v6
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v8
+        uses: astral-sh/setup-uv@v7
         with:
           python-version: "3.13"