From 524e1c19b2fbabc494e123122b91a77914924f29 Mon Sep 17 00:00:00 2001 From: thuantan2060 Date: Mon, 30 Mar 2026 21:33:20 +0700 Subject: [PATCH 1/4] =?UTF-8?q?fix:=20Azure=20OpenAI=20compatibility=20?= =?UTF-8?q?=E2=80=94=20omit=20max=5Ftokens=20when=20None?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Azure OpenAI rejects max_tokens=null in chat completion requests. Only include max_tokens in the request payload when it has a value. Affected methods: - OpenAISDKClient.chat() - OpenAISDKClient.summarize() - OpenAISDKClient.vision() - OpenAILLMBackend.build_summary_payload() - OpenAILLMBackend.build_vision_payload() Co-Authored-By: Claude Opus 4.6 (1M context) --- src/memu/llm/backends/openai.py | 12 ++++++--- src/memu/llm/openai_sdk.py | 45 ++++++++++++++++++++------------- 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/src/memu/llm/backends/openai.py b/src/memu/llm/backends/openai.py index aef24fc6..b9a33bf2 100644 --- a/src/memu/llm/backends/openai.py +++ b/src/memu/llm/backends/openai.py @@ -15,15 +15,17 @@ def build_summary_payload( self, *, text: str, system_prompt: str | None, chat_model: str, max_tokens: int | None ) -> dict[str, Any]: prompt = system_prompt or "Summarize the text in one short paragraph." - return { + payload: dict[str, Any] = { "model": chat_model, "messages": [ {"role": "system", "content": prompt}, {"role": "user", "content": text}, ], "temperature": 0.2, - "max_tokens": max_tokens, } + if max_tokens is not None: + payload["max_tokens"] = max_tokens + return payload def parse_summary_response(self, data: dict[str, Any]) -> str: return cast(str, data["choices"][0]["message"]["content"]) @@ -56,9 +58,11 @@ def build_vision_payload( ], }) - return { + payload: dict[str, Any] = { "model": chat_model, "messages": messages, "temperature": 0.2, - "max_tokens": max_tokens, } + if max_tokens is not None: + payload["max_tokens"] = max_tokens + return payload diff --git a/src/memu/llm/openai_sdk.py b/src/memu/llm/openai_sdk.py index 38c6c8bb..08dae608 100644 --- a/src/memu/llm/openai_sdk.py +++ b/src/memu/llm/openai_sdk.py @@ -53,12 +53,15 @@ async def chat( user_message: ChatCompletionUserMessageParam = {"role": "user", "content": prompt} messages.append(user_message) - response = await self.client.chat.completions.create( - model=self.chat_model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens, - ) + kwargs: dict[str, Any] = { + "model": self.chat_model, + "messages": messages, + "temperature": temperature, + } + if max_tokens is not None: + kwargs["max_tokens"] = max_tokens + + response = await self.client.chat.completions.create(**kwargs) content = response.choices[0].message.content logger.debug("OpenAI chat response: %s", response) return content or "", response @@ -76,12 +79,15 @@ async def summarize( user_message: ChatCompletionUserMessageParam = {"role": "user", "content": text} messages: list[ChatCompletionMessageParam] = [system_message, user_message] - response = await self.client.chat.completions.create( - model=self.chat_model, - messages=messages, - temperature=1, - max_tokens=max_tokens, - ) + kwargs: dict[str, Any] = { + "model": self.chat_model, + "messages": messages, + "temperature": 1, + } + if max_tokens is not None: + kwargs["max_tokens"] = max_tokens + + response = await self.client.chat.completions.create(**kwargs) content = response.choices[0].message.content logger.debug("OpenAI summarize response: %s", response) return content or "", response @@ -142,12 +148,15 @@ async def vision( } messages.append(user_message) - response = await self.client.chat.completions.create( - model=self.chat_model, - messages=messages, - temperature=1, - max_tokens=max_tokens, - ) + kwargs: dict[str, Any] = { + "model": self.chat_model, + "messages": messages, + "temperature": 1, + } + if max_tokens is not None: + kwargs["max_tokens"] = max_tokens + + response = await self.client.chat.completions.create(**kwargs) content = response.choices[0].message.content logger.debug("OpenAI vision response: %s", response) return content or "", response From ace8e8b619673e61e2ab6d4daa461b6c007b96ab Mon Sep 17 00:00:00 2001 From: thuantan2060 Date: Sat, 4 Apr 2026 19:44:12 +0700 Subject: [PATCH 2/4] ci: add build-wheel workflow for reusable GitHub Actions artifact Builds linux-x86_64 wheel on every push to main and uploads as 'memu-wheel' artifact with 7-day retention for cross-repo consumption by memu-server Docker CI pipeline. --- .github/workflows/build-wheel.yml | 41 +++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 .github/workflows/build-wheel.yml diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml new file mode 100644 index 00000000..fe3f38c5 --- /dev/null +++ b/.github/workflows/build-wheel.yml @@ -0,0 +1,41 @@ +# Builds the memu-py wheel for linux-x86_64 on every push to main. +# The uploaded artifact ("memu-wheel") can be consumed cross-repo via +# actions/download-artifact@v7 with `repository` + `github-token` params, +# or via the GitHub REST API: GET /repos/{owner}/{repo}/actions/artifacts?name=memu-wheel + +name: build-wheel + +on: + push: + branches: + - main + +jobs: + build-wheel: + name: build linux-x86_64 wheel + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v7 + with: + python-version: "3.13" + + - name: Setup Rust + uses: dtolnay/rust-toolchain@stable + + - name: Install maturin + run: uv tool install maturin + + - name: Build wheel + run: uvx maturin build --release --out dist --compatibility manylinux_2_39 + + - name: Upload wheel artifact + uses: actions/upload-artifact@v6 + with: + name: memu-wheel + path: dist/*.whl + retention-days: 7 + if-no-files-found: error From 34fbc04ef2f3db764bcac5a2366a644aa597f23f Mon Sep 17 00:00:00 2001 From: thuantan2060 Date: Sat, 4 Apr 2026 19:56:50 +0700 Subject: [PATCH 3/4] ci: upgrade actions to Node.js 24-compatible versions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - actions/checkout v4 → v6 - astral-sh/setup-uv v7 → v8 - actions/upload-artifact v6 → v7 --- .github/workflows/build-wheel.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index fe3f38c5..7a236d97 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -16,10 +16,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Install uv - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@v8 with: python-version: "3.13" @@ -33,7 +33,7 @@ jobs: run: uvx maturin build --release --out dist --compatibility manylinux_2_39 - name: Upload wheel artifact - uses: actions/upload-artifact@v6 + uses: actions/upload-artifact@v7 with: name: memu-wheel path: dist/*.whl From 3a5c54bedbc8ab2bf333a808ba91ddfa11da605a Mon Sep 17 00:00:00 2001 From: thuantan2060 Date: Sat, 4 Apr 2026 19:58:24 +0700 Subject: [PATCH 4/4] =?UTF-8?q?fix(ci):=20revert=20setup-uv=20to=20v7=20?= =?UTF-8?q?=E2=80=94=20v8=20tag=20not=20yet=20published?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build-wheel.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-wheel.yml b/.github/workflows/build-wheel.yml index 7a236d97..645ae5f2 100644 --- a/.github/workflows/build-wheel.yml +++ b/.github/workflows/build-wheel.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/checkout@v6 - name: Install uv - uses: astral-sh/setup-uv@v8 + uses: astral-sh/setup-uv@v7 with: python-version: "3.13"