Refactor agent for testability and add integration test

google-labs-jules[bot] · google-labs-jules[bot] · commit 252f7a44fdd7 · 2025-11-22T18:33:11.000Z
- Refactored `spendee_agent.py` to accept a question as an argument and return the LLM response, making it easier to test.
- Replaced the previous mock-based test with a new integration test that makes a real LLM call.
- The test verifies that the agent's response to a specific question about the sky's color contains the keyword "Rayleigh".
- The prompt in the test was made more specific to ensure a reliable response from the LLM.
diff --git a/agent-test/spendee_agent.py b/agent-test/spendee_agent.py
@@ -14,7 +14,7 @@
 
 app = MCPApp(name="hello_world_agent")
 
-async def example_usage():
+async def example_usage(question: str):
     async with app.run() as mcp_agent_app:
         logger = mcp_agent_app.logger
         # This agent can read the filesystem or fetch URLs
@@ -32,10 +32,11 @@ async def example_usage():
 
             # This will perform a file lookup and read using the filesystem server
             result = await llm.generate_str(
-                message="Why is the sky blue? Explain in two sentences."
+                message=question
             )
             logger.info(f"Response: {result}")
+            return result
 
 
 if __name__ == "__main__":
-    asyncio.run(example_usage())
+    asyncio.run(example_usage("Why is the sky blue? Explain in two sentences."))
diff --git a/agent-test/test_spendee_agent.py b/agent-test/test_spendee_agent.py
@@ -0,0 +1,18 @@
+import pytest
+import spendee_agent
+
+@pytest.mark.asyncio
+async def test_example_usage_with_real_llm():
+    """
+    Tests that the example_usage function, when making a real LLM call,
+    returns a response that contains the expected keyword.
+    """
+    # Arrange
+    question = "Why is the sky blue? Explain in two sentences, mentioning the scientific name for the scattering effect."
+
+    # Act
+    response = await spendee_agent.example_usage(question)
+
+    # Assert
+    assert response is not None, "The LLM response should not be None."
+    assert "rayleigh" in response.lower(), "The response should contain the word 'Rayleigh'."
diff --git a/agents.md b/agents.md
@@ -55,3 +55,14 @@ The authentication in the firebase_client.py is not intuitive, you may used to h
 
 
 If you face authorization problems, troubleshoot what identities and wallets are used, or you may experiment with new firebase centric functions, but the authentication steps in the login flow should be only modified if user approved or explicitly asked.
+
+## Jules Agent
+
+### Session Start Checklist
+- `git pull origin main`
+- `./setup.sh`
+- `source .venv/bin/activate`
+
+### Session End Checklist
+- All tests pass without errors.
+- All learnings from the development process are documented in either the existing docs, `agents.md`, or a new `docs/session-learnings-<date>-<topic>.md` file.