diff --git a/.claude/skills/opengradient/SKILL.md b/.claude/skills/opengradient/SKILL.md index d4878d7..010a9bb 100644 --- a/.claude/skills/opengradient/SKILL.md +++ b/.claude/skills/opengradient/SKILL.md @@ -61,10 +61,10 @@ client.llm.ensure_opg_approval(opg_amount=5) | Provider | Models | |------------|--------| -| OpenAI | `GPT_4_1_2025_04_14`, `GPT_4O`, `O4_MINI` | -| Anthropic | `CLAUDE_3_7_SONNET`, `CLAUDE_3_5_HAIKU`, `CLAUDE_4_0_SONNET` | -| Google | `GEMINI_2_5_FLASH`, `GEMINI_2_5_PRO`, `GEMINI_2_0_FLASH`, `GEMINI_2_5_FLASH_LITE` | -| xAI | `GROK_3_MINI_BETA`, `GROK_3_BETA`, `GROK_2_1212`, `GROK_4_1_FAST`, `GROK_4_1_FAST_NON_REASONING` | +| OpenAI | `GPT_4_1_2025_04_14`, `O4_MINI`, `GPT_5`, `GPT_5_MINI`, `GPT_5_2` | +| Anthropic | `CLAUDE_SONNET_4_5`, `CLAUDE_SONNET_4_6`, `CLAUDE_HAIKU_4_5`, `CLAUDE_OPUS_4_5`, `CLAUDE_OPUS_4_6` | +| Google | `GEMINI_2_5_FLASH`, `GEMINI_2_5_PRO`, `GEMINI_2_5_FLASH_LITE`, `GEMINI_3_PRO`, `GEMINI_3_FLASH` | +| xAI | `GROK_4`, `GROK_4_FAST`, `GROK_4_1_FAST`, `GROK_4_1_FAST_NON_REASONING` | ## Settlement Modes (`og.x402SettlementMode`) @@ -117,7 +117,7 @@ tools = [{ }] result = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Weather in NYC?"}], tools=tools, max_tokens=200, @@ -138,7 +138,7 @@ messages = [ for _ in range(max_iterations): result = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=messages, tools=tools, tool_choice="auto", diff --git a/.claude/skills/opengradient/api-reference.md b/.claude/skills/opengradient/api-reference.md index 6832190..b7db429 100644 --- a/.claude/skills/opengradient/api-reference.md +++ b/.claude/skills/opengradient/api-reference.md @@ -106,25 +106,28 @@ Returned by `client.alpha.infer()`. ### OpenAI - `GPT_4_1_2025_04_14` -- `GPT_4O` - `O4_MINI` +- `GPT_5` +- `GPT_5_MINI` +- `GPT_5_2` ### Anthropic -- `CLAUDE_3_7_SONNET` -- `CLAUDE_3_5_HAIKU` -- `CLAUDE_4_0_SONNET` +- `CLAUDE_SONNET_4_5` +- `CLAUDE_SONNET_4_6` +- `CLAUDE_HAIKU_4_5` +- `CLAUDE_OPUS_4_5` +- `CLAUDE_OPUS_4_6` ### Google - `GEMINI_2_5_FLASH` - `GEMINI_2_5_PRO` -- `GEMINI_2_0_FLASH` - `GEMINI_2_5_FLASH_LITE` +- `GEMINI_3_PRO` +- `GEMINI_3_FLASH` ### xAI (Grok) -- `GROK_3_MINI_BETA` -- `GROK_3_BETA` -- `GROK_2_1212` -- `GROK_2_VISION_LATEST` +- `GROK_4` +- `GROK_4_FAST` - `GROK_4_1_FAST` - `GROK_4_1_FAST_NON_REASONING` diff --git a/Makefile b/Makefile index 1c2ab74..acc46ee 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # OpenGradient SDK Makefile -# Default model for testing (override with: make chat MODEL=openai/gpt-4o) -MODEL ?= anthropic/claude-3.7-sonnet +# Default model for testing (override with: make chat MODEL=google/gemini-3-pro-preview) +MODEL ?= google/gemini-3-pro-preview # ============================================================================ # Development @@ -67,7 +67,7 @@ infer: completion: python -m opengradient.cli completion \ - --model $(MODEL) --mode TEE \ + --model $(MODEL) \ --prompt "Hello, how are you?" \ --max-tokens 50 @@ -75,16 +75,23 @@ chat: python -m opengradient.cli chat \ --model $(MODEL) \ --messages '[{"role":"user","content":"Tell me a fun fact"}]' \ - --max-tokens 150 + --max-tokens 350 chat-stream: python -m opengradient.cli chat \ --model $(MODEL) \ --messages '[{"role":"user","content":"Tell me a short story"}]' \ - --max-tokens 250 \ + --max-tokens 1250 \ --stream chat-tool: + python -m opengradient.cli chat \ + --model $(MODEL) \ + --messages '[{"role":"user","content":"What is the weather in Tokyo?"}]' \ + --tools '[{"type":"function","function":{"name":"get_weather","description":"Get weather for a location","parameters":{"type":"object","properties":{"location":{"type":"string"},"unit":{"type":"string","enum":["celsius","fahrenheit"]}},"required":["location"]}}}]' \ + --max-tokens 100 + +chat-stream-tool: python -m opengradient.cli chat \ --model $(MODEL) \ --messages '[{"role":"user","content":"What is the weather in Tokyo?"}]' \ @@ -93,4 +100,4 @@ chat-tool: --stream .PHONY: install build publish check docs test utils_test client_test langchain_adapter_test opg_token_test integrationtest examples \ - infer completion chat chat-stream chat-tool + infer completion chat chat-stream chat-tool chat-stream-tool diff --git a/README.md b/README.md index 983c40d..3134b4a 100644 --- a/README.md +++ b/README.md @@ -100,7 +100,7 @@ See [Payment Settlement](#payment-settlement) for details on settlement modes. OpenGradient provides secure, verifiable inference through Trusted Execution Environments. All supported models include cryptographic attestation verified by the OpenGradient network: ```python completion = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hello!"}], ) print(f"Response: {completion.chat_output['content']}") @@ -112,7 +112,7 @@ print(f"Transaction hash: {completion.transaction_hash}") For real-time generation, enable streaming: ```python stream = client.llm.chat( - model=og.TEE_LLM.CLAUDE_3_7_SONNET, + model=og.TEE_LLM.CLAUDE_SONNET_4_6, messages=[{"role": "user", "content": "Explain quantum computing"}], max_tokens=500, stream=True, @@ -133,7 +133,7 @@ import opengradient as og llm = og.agents.langchain_adapter( private_key=os.environ.get("OG_PRIVATE_KEY"), - model_cid=og.TEE_LLM.GPT_4O, + model_cid=og.TEE_LLM.GPT_5, ) @tool @@ -154,25 +154,28 @@ The SDK provides access to models from multiple providers via the `og.TEE_LLM` e #### OpenAI - GPT-4.1 (2025-04-14) -- GPT-4o - o4-mini +- GPT-5 +- GPT-5 Mini +- GPT-5.2 #### Anthropic -- Claude 3.7 Sonnet -- Claude 3.5 Haiku -- Claude 4.0 Sonnet +- Claude Sonnet 4.5 +- Claude Sonnet 4.6 +- Claude Haiku 4.5 +- Claude Opus 4.5 +- Claude Opus 4.6 #### Google - Gemini 2.5 Flash - Gemini 2.5 Pro -- Gemini 2.0 Flash - Gemini 2.5 Flash Lite +- Gemini 3 Pro +- Gemini 3 Flash #### xAI -- Grok 3 Beta -- Grok 3 Mini Beta -- Grok 2 (1212) -- Grok 2 Vision +- Grok 4 +- Grok 4 Fast - Grok 4.1 Fast (reasoning and non-reasoning) For a complete list, reference the `og.TEE_LLM` enum or consult the [API documentation](https://docs.opengradient.ai/api_reference/python_sdk/). @@ -262,7 +265,7 @@ opengradient infer -m QmbUqS93oc4JTLMHwpVxsE39mhNxy6hpf6Py3r9oANr8aZ \ Run a chat completion: ```bash -opengradient chat --model anthropic/claude-3.5-haiku \ +opengradient chat --model anthropic/claude-haiku-4-5 \ --messages '[{"role":"user","content":"Hello"}]' \ --max-tokens 100 ``` @@ -297,7 +300,7 @@ OpenGradient supports multiple settlement modes through the x402 payment protoco Specify settlement mode in your requests: ```python result = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hello"}], x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH, ) diff --git a/docs/CLAUDE_SDK_USERS.md b/docs/CLAUDE_SDK_USERS.md index 8a4bf8c..009b6f5 100644 --- a/docs/CLAUDE_SDK_USERS.md +++ b/docs/CLAUDE_SDK_USERS.md @@ -25,7 +25,7 @@ client = og.Client( # LLM Chat (TEE-verified with x402 payments) result = client.llm.chat( - model=og.TEE_LLM.CLAUDE_3_5_HAIKU, + model=og.TEE_LLM.CLAUDE_HAIKU_4_5, messages=[{"role": "user", "content": "Hello!"}], max_tokens=100, ) @@ -108,25 +108,28 @@ All LLM models are TEE-verified. `og.LLM` and `og.TEE_LLM` contain the same mode ```python # OpenAI og.TEE_LLM.GPT_4_1_2025_04_14 -og.TEE_LLM.GPT_4O og.TEE_LLM.O4_MINI +og.TEE_LLM.GPT_5 +og.TEE_LLM.GPT_5_MINI +og.TEE_LLM.GPT_5_2 # Anthropic -og.TEE_LLM.CLAUDE_3_7_SONNET -og.TEE_LLM.CLAUDE_3_5_HAIKU -og.TEE_LLM.CLAUDE_4_0_SONNET +og.TEE_LLM.CLAUDE_SONNET_4_5 +og.TEE_LLM.CLAUDE_SONNET_4_6 +og.TEE_LLM.CLAUDE_HAIKU_4_5 +og.TEE_LLM.CLAUDE_OPUS_4_5 +og.TEE_LLM.CLAUDE_OPUS_4_6 # Google og.TEE_LLM.GEMINI_2_5_FLASH og.TEE_LLM.GEMINI_2_5_PRO -og.TEE_LLM.GEMINI_2_0_FLASH og.TEE_LLM.GEMINI_2_5_FLASH_LITE +og.TEE_LLM.GEMINI_3_PRO +og.TEE_LLM.GEMINI_3_FLASH # xAI -og.TEE_LLM.GROK_3_BETA -og.TEE_LLM.GROK_3_MINI_BETA -og.TEE_LLM.GROK_2_1212 -og.TEE_LLM.GROK_2_VISION_LATEST +og.TEE_LLM.GROK_4 +og.TEE_LLM.GROK_4_FAST og.TEE_LLM.GROK_4_1_FAST og.TEE_LLM.GROK_4_1_FAST_NON_REASONING ``` @@ -137,7 +140,7 @@ All models are accessed through the OpenGradient TEE infrastructure with x402 pa ```python result = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hello"}], ) ``` @@ -163,7 +166,7 @@ tools = [{ }] result = client.llm.chat( - model=og.TEE_LLM.CLAUDE_3_7_SONNET, + model=og.TEE_LLM.CLAUDE_SONNET_4_6, messages=[{"role": "user", "content": "What's the weather in NYC?"}], tools=tools, tool_choice="auto", @@ -178,7 +181,7 @@ if result.chat_output.get("tool_calls"): ```python stream = client.llm.chat( - model=og.TEE_LLM.CLAUDE_3_7_SONNET, + model=og.TEE_LLM.CLAUDE_SONNET_4_6, messages=[{"role": "user", "content": "Tell me a story"}], stream=True, ) @@ -197,7 +200,7 @@ from langgraph.prebuilt import create_react_agent # Create LangChain-compatible LLM llm = og.agents.langchain_adapter( private_key=os.environ["OG_PRIVATE_KEY"], - model_cid=og.LLM.CLAUDE_3_7_SONNET, + model_cid=og.LLM.CLAUDE_SONNET_4_6, max_tokens=300, ) diff --git a/examples/README.md b/examples/README.md index acb97be..6bcbc1e 100644 --- a/examples/README.md +++ b/examples/README.md @@ -169,7 +169,7 @@ LLM chat pattern: ```python completion = og_client.llm.chat( - model=og.TEE_LLM.CLAUDE_3_5_HAIKU, + model=og.TEE_LLM.CLAUDE_HAIKU_4_5, messages=[{"role": "user", "content": "Your message"}], ) print(f"Response: {completion.chat_output['content']}") diff --git a/integrationtest/agent/test_agent.py b/integrationtest/agent/test_agent.py index 0a89403..bd9db99 100644 --- a/integrationtest/agent/test_agent.py +++ b/integrationtest/agent/test_agent.py @@ -20,7 +20,7 @@ def setUp(self): raise ValueError("PRIVATE_KEY environment variable is not set") self.client = og.Client(private_key=private_key) - self.llm = OpenGradientChatModel(private_key=private_key, model_cid=LLM.CLAUDE_3_7_SONNET) + self.llm = OpenGradientChatModel(private_key=private_key, model_cid=LLM.CLAUDE_SONNET_4_6) def test_simple_completion(self): message = self.llm.invoke("say 'hello'. literally") diff --git a/pyproject.toml b/pyproject.toml index ef374cc..262637b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "opengradient" -version = "0.7.4" +version = "0.7.5" description = "Python SDK for OpenGradient decentralized model management & inference services" authors = [{name = "OpenGradient", email = "adam@vannalabs.ai"}] readme = "README.md" @@ -27,7 +27,7 @@ dependencies = [ "langchain>=0.3.7", "openai>=1.58.1", "pydantic>=2.9.2", - "og-test-v2-x402==0.0.9" + "og-test-v2-x402==0.0.11" ] [project.scripts] diff --git a/requirements.txt b/requirements.txt index 7a51cd1..df03caa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,4 @@ requests>=2.32.3 langchain>=0.3.7 openai>=1.58.1 pydantic>=2.9.2 -og-test-v2-x402==0.0.9 \ No newline at end of file +og-test-v2-x402==0.0.11 \ No newline at end of file diff --git a/src/opengradient/__init__.py b/src/opengradient/__init__.py index 562ad4e..2c89d42 100644 --- a/src/opengradient/__init__.py +++ b/src/opengradient/__init__.py @@ -24,7 +24,7 @@ # Chat with an LLM (TEE-verified) response = client.llm.chat( - model=og.TEE_LLM.CLAUDE_3_5_HAIKU, + model=og.TEE_LLM.CLAUDE_HAIKU_4_5, messages=[{"role": "user", "content": "Hello!"}], max_tokens=200, ) @@ -32,7 +32,7 @@ # Stream a response for chunk in client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Explain TEE in one paragraph."}], max_tokens=300, stream=True, @@ -141,7 +141,7 @@ def init( import opengradient as og client = og.init(private_key="0x...") client.llm.ensure_opg_approval(opg_amount=5) - response = client.llm.chat(model=og.TEE_LLM.GPT_4O, messages=[...]) + response = client.llm.chat(model=og.TEE_LLM.GPT_5, messages=[...]) """ global global_client global_client = Client( diff --git a/src/opengradient/cli.py b/src/opengradient/cli.py index 2c5b07e..28669db 100644 --- a/src/opengradient/cli.py +++ b/src/opengradient/cli.py @@ -365,7 +365,7 @@ def infer(ctx, model_cid: str, inference_mode: str, input_data, input_file: Path "-m", "model_cid", required=True, - help="Model identifier (e.g., 'anthropic/claude-3.5-haiku', 'openai/gpt-4o')", + help="Model identifier (e.g., 'anthropic/claude-haiku-4-5', 'openai/gpt-5')", ) @click.option("--prompt", "-p", required=True, help="Input prompt for the LLM completion") @click.option("--max-tokens", type=int, default=100, help="Maximum number of tokens for LLM completion output") @@ -396,8 +396,8 @@ def completion( Example usage: \b - opengradient completion --model anthropic/claude-3.5-haiku --prompt "Hello, how are you?" --max-tokens 50 - opengradient completion --model openai/gpt-4o --prompt "Write a haiku" --max-tokens 100 + opengradient completion --model anthropic/claude-haiku-4-5 --prompt "Hello, how are you?" --max-tokens 50 + opengradient completion --model openai/gpt-5 --prompt "Write a haiku" --max-tokens 100 """ client: Client = ctx.obj["client"] @@ -448,7 +448,7 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output, is_vanilla=True) "-m", "model_cid", required=True, - help="Model identifier (e.g., 'anthropic/claude-3.5-haiku', 'openai/gpt-4o')", + help="Model identifier (e.g., 'anthropic/claude-haiku-4-5', 'openai/gpt-5')", ) @click.option("--messages", type=str, required=False, help="Input messages for the chat inference in JSON format") @click.option( @@ -496,11 +496,11 @@ def chat( Example usage: \b - opengradient chat --model anthropic/claude-3.5-haiku --messages '[{"role":"user","content":"hello"}]' --max-tokens 50 - opengradient chat --model openai/gpt-4o --messages '[{"role":"user","content":"hello"}]' --max-tokens 50 + opengradient chat --model anthropic/claude-haiku-4-5 --messages '[{"role":"user","content":"hello"}]' --max-tokens 50 + opengradient chat --model openai/gpt-5 --messages '[{"role":"user","content":"hello"}]' --max-tokens 50 # With streaming - opengradient chat --model anthropic/claude-3.5-haiku --messages '[{"role":"user","content":"How are clouds formed?"}]' --max-tokens 250 --stream + opengradient chat --model anthropic/claude-haiku-4-5 --messages '[{"role":"user","content":"How are clouds formed?"}]' --max-tokens 250 --stream """ client: Client = ctx.obj["client"] @@ -608,8 +608,16 @@ def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_van click.secho("Chat Output:", fg="yellow", bold=True) click.echo() for key, value in chat_output.items(): - if value != None and value != "" and value != "[]" and value != []: - click.echo(f"{key}: {value}") + if value is not None and value not in ("", "[]", []): + # Normalize list-of-blocks content (e.g. Gemini 3 thought signatures) + if key == "content" and isinstance(value, list): + text = " ".join( + block.get("text", "") for block in value + if isinstance(block, dict) and block.get("type") == "text" + ).strip() + click.echo(f"{key}: {text}") + else: + click.echo(f"{key}: {value}") click.echo() diff --git a/src/opengradient/client/__init__.py b/src/opengradient/client/__init__.py index 44813c6..1266f0d 100644 --- a/src/opengradient/client/__init__.py +++ b/src/opengradient/client/__init__.py @@ -33,7 +33,7 @@ # LLM chat (TEE-verified, streamed) for chunk in client.llm.chat( - model=og.TEE_LLM.CLAUDE_3_5_HAIKU, + model=og.TEE_LLM.CLAUDE_HAIKU_4_5, messages=[{"role": "user", "content": "Hello!"}], max_tokens=200, stream=True, diff --git a/src/opengradient/client/client.py b/src/opengradient/client/client.py index 8bb35e6..2caef8b 100644 --- a/src/opengradient/client/client.py +++ b/src/opengradient/client/client.py @@ -36,7 +36,7 @@ class Client: client = og.Client(private_key="0x...") client = og.Client(private_key="0xBASE_KEY", alpha_private_key="0xALPHA_KEY") client.llm.ensure_opg_approval(opg_amount=5) # one-time Permit2 approval - result = client.llm.chat(model=TEE_LLM.CLAUDE_3_5_HAIKU, messages=[...]) + result = client.llm.chat(model=TEE_LLM.CLAUDE_HAIKU_4_5, messages=[...]) result = client.alpha.infer(model_cid, InferenceMode.VANILLA, input_data) """ diff --git a/src/opengradient/client/llm.py b/src/opengradient/client/llm.py index e1490cb..83f8eb8 100644 --- a/src/opengradient/client/llm.py +++ b/src/opengradient/client/llm.py @@ -5,6 +5,10 @@ import threading from queue import Queue from typing import AsyncGenerator, Dict, List, Optional, Union +import ssl +import socket +import tempfile +from urllib.parse import urlparse import httpx from eth_account.account import LocalAccount @@ -36,6 +40,53 @@ ) +def _fetch_tls_cert_as_ssl_context(server_url: str) -> Optional[ssl.SSLContext]: + """ + Connect to a server, retrieve its TLS certificate (TOFU), + and return an ssl.SSLContext that trusts ONLY that certificate. + + Hostname verification is disabled because the TEE server's cert + is typically issued for a hostname but we may connect via IP address. + The pinned certificate itself provides the trust anchor. + + Returns None if the server is not HTTPS or unreachable. + """ + parsed = urlparse(server_url) + if parsed.scheme != "https": + return None + + hostname = parsed.hostname + port = parsed.port or 443 + + # Connect without verification to retrieve the server's certificate + fetch_ctx = ssl.create_default_context() + fetch_ctx.check_hostname = False + fetch_ctx.verify_mode = ssl.CERT_NONE + + try: + with socket.create_connection((hostname, port), timeout=10) as sock: + with fetch_ctx.wrap_socket(sock, server_hostname=hostname) as ssock: + der_cert = ssock.getpeercert(binary_form=True) + pem_cert = ssl.DER_cert_to_PEM_cert(der_cert) + except Exception: + return None + + # Write PEM to a temp file so we can load it into the SSLContext + cert_file = tempfile.NamedTemporaryFile( + prefix="og_tee_tls_", suffix=".pem", delete=False, mode="w" + ) + cert_file.write(pem_cert) + cert_file.flush() + cert_file.close() + + # Build an SSLContext that trusts ONLY this cert, with hostname check disabled + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + ctx.load_verify_locations(cert_file.name) + ctx.check_hostname = False # Cert is for a hostname, but we connect via IP + ctx.verify_mode = ssl.CERT_REQUIRED # Still verify the cert itself + return ctx + + class LLM: """ LLM inference namespace. @@ -55,8 +106,8 @@ class LLM: # One-time approval (idempotent — skips if allowance is already sufficient) client.llm.ensure_opg_approval(opg_amount=5) - result = client.llm.chat(model=TEE_LLM.CLAUDE_3_5_HAIKU, messages=[...]) - result = client.llm.completion(model=TEE_LLM.CLAUDE_3_5_HAIKU, prompt="Hello") + result = client.llm.chat(model=TEE_LLM.CLAUDE_HAIKU_4_5, messages=[...]) + result = client.llm.completion(model=TEE_LLM.CLAUDE_HAIKU_4_5, prompt="Hello") """ def __init__(self, wallet_account: LocalAccount, og_llm_server_url: str, og_llm_streaming_server_url: str): @@ -64,6 +115,13 @@ def __init__(self, wallet_account: LocalAccount, og_llm_server_url: str, og_llm_ self._og_llm_server_url = og_llm_server_url self._og_llm_streaming_server_url = og_llm_streaming_server_url + self._tls_verify: Union[ssl.SSLContext, bool] = ( + _fetch_tls_cert_as_ssl_context(self._og_llm_server_url) or True + ) + self._streaming_tls_verify: Union[ssl.SSLContext, bool] = ( + _fetch_tls_cert_as_ssl_context(self._og_llm_streaming_server_url) or True + ) + signer = EthAccountSignerv2(self._wallet_account) self._x402_client = x402Clientv2() register_exact_evm_clientv2(self._x402_client, signer, networks=[BASE_TESTNET_NETWORK]) @@ -92,10 +150,10 @@ def _run_coroutine(self, coroutine): async def _initialize_http_clients(self) -> None: if self._request_client is None: - self._request_client_ctx = x402HttpxClientv2(self._x402_client) + self._request_client_ctx = x402HttpxClientv2(self._x402_client, verify=self._tls_verify) self._request_client = await self._request_client_ctx.__aenter__() if self._stream_client is None: - self._stream_client_ctx = x402HttpxClientv2(self._x402_client) + self._stream_client_ctx = x402HttpxClientv2(self._x402_client, verify=self._streaming_tls_verify) self._stream_client = await self._stream_client_ctx.__aenter__() async def _close_http_clients(self) -> None: @@ -153,7 +211,7 @@ def completion( Perform inference on an LLM model using completions via TEE. Args: - model (TEE_LLM): The model to use (e.g., TEE_LLM.CLAUDE_3_5_HAIKU). + model (TEE_LLM): The model to use (e.g., TEE_LLM.CLAUDE_HAIKU_4_5). prompt (str): The input prompt for the LLM. max_tokens (int): Maximum number of tokens for LLM output. Default is 100. stop_sequence (List[str], optional): List of stop sequences for LLM. Default is None. @@ -223,6 +281,8 @@ async def make_request_v2(): return TextGenerationOutput( transaction_hash="external", completion_output=result.get("completion"), + tee_signature=result.get("tee_signature"), + tee_timestamp=result.get("tee_timestamp"), ) except Exception as e: @@ -251,7 +311,7 @@ def chat( Perform inference on an LLM model using chat via TEE. Args: - model (TEE_LLM): The model to use (e.g., TEE_LLM.CLAUDE_3_5_HAIKU). + model (TEE_LLM): The model to use (e.g., TEE_LLM.CLAUDE_HAIKU_4_5). messages (List[Dict]): The messages that will be passed into the chat. max_tokens (int): Maximum number of tokens for LLM output. Default is 100. stop_sequence (List[str], optional): List of stop sequences for LLM. @@ -348,10 +408,20 @@ async def make_request_v2(): if not choices: raise OpenGradientError(f"Invalid response: 'choices' missing or empty in {result}") + message = choices[0].get("message", {}) + content = message.get("content") + if isinstance(content, list): + message["content"] = " ".join( + block.get("text", "") for block in content + if isinstance(block, dict) and block.get("type") == "text" + ).strip() + return TextGenerationOutput( transaction_hash="external", finish_reason=choices[0].get("finish_reason"), - chat_output=choices[0].get("message"), + chat_output=message, + tee_signature=result.get("tee_signature"), + tee_timestamp=result.get("tee_timestamp"), ) except Exception as e: diff --git a/src/opengradient/defaults.py b/src/opengradient/defaults.py index a23ce6b..c053225 100644 --- a/src/opengradient/defaults.py +++ b/src/opengradient/defaults.py @@ -6,5 +6,6 @@ DEFAULT_INFERENCE_CONTRACT_ADDRESS = "0x8383C9bD7462F12Eb996DD02F78234C0421A6FaE" DEFAULT_SCHEDULER_ADDRESS = "0x7179724De4e7FF9271FA40C0337c7f90C0508eF6" DEFAULT_BLOCKCHAIN_EXPLORER = "https://explorer.opengradient.ai/tx/" -DEFAULT_OPENGRADIENT_LLM_SERVER_URL = "https://llm.opengradient.ai" -DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL = "https://llm.opengradient.ai" +# TODO (Kyle): Add a process to fetch these IPs from the TEE registry +DEFAULT_OPENGRADIENT_LLM_SERVER_URL = "https://3.15.214.21:443" +DEFAULT_OPENGRADIENT_LLM_STREAMING_SERVER_URL = "https://3.15.214.21:443" diff --git a/src/opengradient/types.py b/src/opengradient/types.py index 866bfba..3a13aac 100644 --- a/src/opengradient/types.py +++ b/src/opengradient/types.py @@ -239,12 +239,16 @@ class StreamChunk: model: Model identifier usage: Token usage information (only in final chunk) is_final: Whether this is the final chunk (before [DONE]) + tee_signature: RSA-PSS signature over the response, present on the final chunk + tee_timestamp: ISO timestamp from the TEE at signing time, present on the final chunk """ choices: List[StreamChoice] model: str usage: Optional[StreamUsage] = None is_final: bool = False + tee_signature: Optional[str] = None + tee_timestamp: Optional[str] = None @classmethod def from_sse_data(cls, data: Dict) -> "StreamChunk": @@ -275,8 +279,14 @@ def from_sse_data(cls, data: Dict) -> "StreamChunk": is_final = any(c.finish_reason is not None for c in choices) or usage is not None - return cls(choices=choices, model=data.get("model", "unknown"), usage=usage, is_final=is_final) - + return cls( + choices=choices, + model=data.get("model", "unknown"), + usage=usage, + is_final=is_final, + tee_signature=data.get("tee_signature"), + tee_timestamp=data.get("tee_timestamp"), + ) @dataclass class TextGenerationStream: @@ -380,6 +390,12 @@ class TextGenerationOutput: payment_hash: Optional[str] = None """Payment hash for x402 transaction""" + tee_signature: Optional[str] = None + """RSA-PSS signature over the response produced by the TEE enclave.""" + + tee_timestamp: Optional[str] = None + """ISO timestamp from the TEE at signing time.""" + @dataclass class AbiFunction: @@ -427,36 +443,35 @@ class TEE_LLM(str, Enum): Usage: # TEE-verified inference result = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hello"}], ) """ - - # Existing (Currently turned off) - # META_LLAMA_3_1_70B_INSTRUCT = "meta-llama/Llama-3.1-70B-Instruct" - # OpenAI models via TEE GPT_4_1_2025_04_14 = "openai/gpt-4.1-2025-04-14" - GPT_4O = "openai/gpt-4o" O4_MINI = "openai/o4-mini" + GPT_5 = "openai/gpt-5" + GPT_5_MINI = "openai/gpt-5-mini" + GPT_5_2 = "openai/gpt-5.2" # Anthropic models via TEE - CLAUDE_3_7_SONNET = "anthropic/claude-3.7-sonnet" - CLAUDE_3_5_HAIKU = "anthropic/claude-3.5-haiku" - CLAUDE_4_0_SONNET = "anthropic/claude-4.0-sonnet" + CLAUDE_SONNET_4_5 = "anthropic/claude-sonnet-4-5" + CLAUDE_SONNET_4_6 = "anthropic/claude-sonnet-4-6" + CLAUDE_HAIKU_4_5 = "anthropic/claude-haiku-4-5" + CLAUDE_OPUS_4_5 = "anthropic/claude-opus-4-5" + CLAUDE_OPUS_4_6 = "anthropic/claude-opus-4-6" # Google models via TEE GEMINI_2_5_FLASH = "google/gemini-2.5-flash" GEMINI_2_5_PRO = "google/gemini-2.5-pro" - GEMINI_2_0_FLASH = "google/gemini-2.0-flash" GEMINI_2_5_FLASH_LITE = "google/gemini-2.5-flash-lite" + GEMINI_3_PRO = "google/gemini-3-pro-preview" + GEMINI_3_FLASH = "google/gemini-3-flash-preview" # xAI Grok models via TEE - GROK_3_MINI_BETA = "x-ai/grok-3-mini-beta" - GROK_3_BETA = "x-ai/grok-3-beta" - GROK_2_1212 = "x-ai/grok-2-1212" - GROK_2_VISION_LATEST = "x-ai/grok-2-vision-latest" - GROK_4_1_FAST = "x-ai/grok-4.1-fast" + GROK_4 = "x-ai/grok-4" + GROK_4_FAST = "x-ai/grok-4-fast" + GROK_4_1_FAST = "x-ai/grok-4-1-fast" GROK_4_1_FAST_NON_REASONING = "x-ai/grok-4-1-fast-non-reasoning" diff --git a/stresstest/llm.py b/stresstest/llm.py index 02f08e7..610d3bf 100644 --- a/stresstest/llm.py +++ b/stresstest/llm.py @@ -7,7 +7,7 @@ # Number of requests to run serially NUM_REQUESTS = 100 -MODEL = "anthropic/claude-3.5-haiku" +MODEL = "anthropic/claude-haiku-4-5" def main(private_key: str): diff --git a/tests/client_test.py b/tests/client_test.py index f17283b..822fc74 100644 --- a/tests/client_test.py +++ b/tests/client_test.py @@ -194,7 +194,7 @@ def test_llm_completion_success(self, client): ) result = client.llm.completion( - model=TEE_LLM.GPT_4O, + model=TEE_LLM.GPT_5, prompt="Hello", max_tokens=100, ) @@ -215,7 +215,7 @@ def test_llm_chat_success_non_streaming(self, client): ) result = client.llm.chat( - model=TEE_LLM.GPT_4O, + model=TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hello"}], stream=False, ) @@ -233,7 +233,7 @@ def test_llm_chat_streaming(self, client): mock_stream.return_value = iter(mock_chunks) result = client.llm.chat( - model=TEE_LLM.GPT_4O, + model=TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hello"}], stream=True, ) diff --git a/tests/langchain_adapter_test.py b/tests/langchain_adapter_test.py index ab290a8..1671c7f 100644 --- a/tests/langchain_adapter_test.py +++ b/tests/langchain_adapter_test.py @@ -26,34 +26,34 @@ def mock_client(): @pytest.fixture def model(mock_client): """Create an OpenGradientChatModel with a mocked client.""" - return OpenGradientChatModel(private_key="0x" + "a" * 64, model_cid=TEE_LLM.GPT_4O) + return OpenGradientChatModel(private_key="0x" + "a" * 64, model_cid=TEE_LLM.GPT_5) class TestOpenGradientChatModel: def test_initialization(self, model): """Test model initializes with correct fields.""" - assert model.model_cid == TEE_LLM.GPT_4O + assert model.model_cid == TEE_LLM.GPT_5 assert model.max_tokens == 300 assert model.x402_settlement_mode == x402SettlementMode.SETTLE_BATCH assert model._llm_type == "opengradient" def test_initialization_custom_max_tokens(self, mock_client): """Test model initializes with custom max_tokens.""" - model = OpenGradientChatModel(private_key="0x" + "a" * 64, model_cid=TEE_LLM.CLAUDE_3_5_HAIKU, max_tokens=1000) + model = OpenGradientChatModel(private_key="0x" + "a" * 64, model_cid=TEE_LLM.CLAUDE_HAIKU_4_5, max_tokens=1000) assert model.max_tokens == 1000 def test_initialization_custom_settlement_mode(self, mock_client): """Test model initializes with custom settlement mode.""" model = OpenGradientChatModel( private_key="0x" + "a" * 64, - model_cid=TEE_LLM.GPT_4O, + model_cid=TEE_LLM.GPT_5, x402_settlement_mode=x402SettlementMode.SETTLE, ) assert model.x402_settlement_mode == x402SettlementMode.SETTLE def test_identifying_params(self, model): """Test _identifying_params returns model name.""" - assert model._identifying_params == {"model_name": TEE_LLM.GPT_4O} + assert model._identifying_params == {"model_name": TEE_LLM.GPT_5} class TestGenerate: @@ -210,7 +210,7 @@ def test_passes_correct_params_to_client(self, model, mock_client): model._generate([HumanMessage(content="Hi")], stop=["END"]) mock_client.llm.chat.assert_called_once_with( - model=TEE_LLM.GPT_4O, + model=TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hi"}], stop_sequence=["END"], max_tokens=300, diff --git a/tutorials/01-verifiable-ai-agent.md b/tutorials/01-verifiable-ai-agent.md index 2d853c3..51fad5e 100644 --- a/tutorials/01-verifiable-ai-agent.md +++ b/tutorials/01-verifiable-ai-agent.md @@ -349,7 +349,7 @@ if __name__ == "__main__": ## Next Steps -- **Swap models**: Replace `GPT_4_1_2025_04_14` with `CLAUDE_4_0_SONNET` or +- **Swap models**: Replace `GPT_4_1_2025_04_14` with `CLAUDE_SONNET_4_6` or `GEMINI_2_5_PRO` -- the rest of your code stays the same. - **Add more on-chain tools**: Use `create_run_model_tool` with different model CIDs to give your agent access to price prediction, sentiment analysis, or other ML diff --git a/tutorials/02-streaming-multi-provider.md b/tutorials/02-streaming-multi-provider.md index 801fa47..fc4dafd 100644 --- a/tutorials/02-streaming-multi-provider.md +++ b/tutorials/02-streaming-multi-provider.md @@ -55,7 +55,7 @@ client = og.init(private_key=private_key) client.llm.ensure_opg_approval(opg_amount=5) result = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=[{"role": "user", "content": "What is the x402 payment protocol?"}], max_tokens=200, temperature=0.0, @@ -80,14 +80,14 @@ identical. ```python # OpenAI result_openai = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hello from OpenAI!"}], max_tokens=100, ) # Anthropic result_anthropic = client.llm.chat( - model=og.TEE_LLM.CLAUDE_3_7_SONNET, + model=og.TEE_LLM.CLAUDE_SONNET_4_6, messages=[{"role": "user", "content": "Hello from Anthropic!"}], max_tokens=100, ) @@ -101,10 +101,11 @@ result_google = client.llm.chat( # xAI result_xai = client.llm.chat( - model=og.TEE_LLM.GROK_3_BETA, + model=og.TEE_LLM.GROK_4, messages=[{"role": "user", "content": "Hello from xAI!"}], max_tokens=100, ) + ``` This makes A/B testing trivial -- run the same prompt across providers and compare @@ -118,7 +119,7 @@ that yields `StreamChunk` objects. ```python stream = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=[ {"role": "system", "content": "You are a concise technical writer."}, {"role": "user", "content": "Explain TEEs in one paragraph."}, @@ -169,7 +170,7 @@ privacy/cost/transparency trade-off: ```python # Privacy-first: only hashes stored on-chain result_private = client.llm.chat( - model=og.TEE_LLM.CLAUDE_3_7_SONNET, + model=og.TEE_LLM.CLAUDE_SONNET_4_6, messages=[{"role": "user", "content": "Sensitive query here."}], max_tokens=100, x402_settlement_mode=og.x402SettlementMode.SETTLE, @@ -178,7 +179,7 @@ print(f"Payment hash (SETTLE): {result_private.payment_hash}") # Cost-efficient: batched settlement (this is the default) result_batch = client.llm.chat( - model=og.TEE_LLM.CLAUDE_3_7_SONNET, + model=og.TEE_LLM.CLAUDE_SONNET_4_6, messages=[{"role": "user", "content": "Regular query."}], max_tokens=100, x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH, @@ -187,7 +188,7 @@ print(f"Payment hash (SETTLE_BATCH): {result_batch.payment_hash}") # Full transparency: everything on-chain result_transparent = client.llm.chat( - model=og.TEE_LLM.CLAUDE_3_7_SONNET, + model=og.TEE_LLM.CLAUDE_SONNET_4_6, messages=[{"role": "user", "content": "Auditable query."}], max_tokens=100, x402_settlement_mode=og.x402SettlementMode.SETTLE_METADATA, @@ -272,10 +273,10 @@ PROMPT = "Explain what a Trusted Execution Environment is in two sentences." # ── Multi-provider comparison ───────────────────────────────────────────── models = [ - ("GPT-4o", og.TEE_LLM.GPT_4O), - ("Claude 3.7 Sonnet", og.TEE_LLM.CLAUDE_3_7_SONNET), - ("Gemini 2.5 Flash", og.TEE_LLM.GEMINI_2_5_FLASH), - ("Grok 3 Beta", og.TEE_LLM.GROK_3_BETA), + ("GPT-5", og.TEE_LLM.GPT_5), + ("Claude Sonnet 4.6", og.TEE_LLM.CLAUDE_SONNET_4_6), + ("Gemini 2.5 Flash", og.TEE_LLM.GEMINI_2_5_FLASH), + ("Grok 4", og.TEE_LLM.GROK_4), ] for name, model in models: @@ -292,9 +293,9 @@ for name, model in models: print(f"[{name}] Error: {e}\n") # ── Streaming ───────────────────────────────────────────────────────────── -print("--- Streaming from GPT-4o ---") +print("--- Streaming from GPT-5 ---") stream = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=[{"role": "user", "content": "What is x402? Keep it under 50 words."}], max_tokens=100, stream=True, @@ -313,7 +314,7 @@ for mode_name, mode in [ ]: try: r = client.llm.chat( - model=og.TEE_LLM.CLAUDE_3_7_SONNET, + model=og.TEE_LLM.CLAUDE_SONNET_4_6, messages=[{"role": "user", "content": "Say hello."}], max_tokens=50, x402_settlement_mode=mode, diff --git a/tutorials/03-verified-tool-calling.md b/tutorials/03-verified-tool-calling.md index a2f7902..c7caa49 100644 --- a/tutorials/03-verified-tool-calling.md +++ b/tutorials/03-verified-tool-calling.md @@ -168,7 +168,7 @@ Pass the `tools` list and `tool_choice` parameter to any `client.llm.chat()` cal ```python result = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=[ {"role": "system", "content": "You are a crypto portfolio assistant."}, {"role": "user", "content": "What's my portfolio worth?"}, @@ -219,7 +219,7 @@ def run_agent(client: og.Client, user_query: str) -> str: try: result = client.llm.chat( - model=og.TEE_LLM.GPT_4O, + model=og.TEE_LLM.GPT_5, messages=messages, max_tokens=600, temperature=0.0, @@ -374,7 +374,7 @@ def run_agent(user_query: str) -> str: for i in range(5): try: result = client.llm.chat( - model=og.TEE_LLM.GPT_4O, messages=messages, max_tokens=600, + model=og.TEE_LLM.GPT_5, messages=messages, max_tokens=600, temperature=0.0, tools=TOOLS, tool_choice="auto", x402_settlement_mode=og.x402SettlementMode.SETTLE_BATCH, ) @@ -407,7 +407,7 @@ if __name__ == "__main__": ML predictions alongside local function calls. - **Stream tool-calling responses**: Pass `stream=True` to get incremental tokens even during multi-turn tool loops. See **Tutorial 2** for streaming basics. -- **Use different providers**: Swap `og.TEE_LLM.GPT_4O` for `CLAUDE_3_7_SONNET` or +- **Use different providers**: Swap `og.TEE_LLM.GPT_5` for `CLAUDE_SONNET_4_6` or `GEMINI_2_5_FLASH` -- tool calling works across all providers. - **Add settlement transparency**: Switch to `SETTLE_METADATA` to store the full tool-calling reasoning chain on-chain for audit purposes.