From 1b931da8054e30bebac8b063c5e89625fb7bd605 Mon Sep 17 00:00:00 2001
From: lu9dce <hellocodelinux@gmail.com>
Date: Thu, 4 Dec 2025 08:32:58 -0300
Subject: [PATCH 1/2] Enhance API key retrieval and add 'ollama' service

Updated get_api_key to accept service parameter and modified error messages. Added support for 'ollama' service in argument parsing.
---
 src/compose | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/compose b/src/compose
index ea9ddbe6..9aaed380 100755
--- a/src/compose
+++ b/src/compose
@@ -12,13 +12,13 @@ import argparse
 import urllib.request
 import urllib.error
 
-def get_api_key(api_key_env: str|None) -> str|None:
+def get_api_key(api_key_env: str|None, service: str) -> str|None:
     """Return API key from `api_key_env`. If None, return None (no auth)."""
     if api_key_env is None:
         return None
     api_key = os.getenv(api_key_env)
     if not api_key:
-        print(f"Error: Please set the {api_key_env} environment variable.", file=sys.stderr)
+        print(f"Error: Please set the {api_key_env} environment variable for the '{service}' service.", file=sys.stderr)
         sys.exit(1)
     return api_key
 
@@ -141,7 +141,7 @@ def main():
     parser.add_argument("-c", "--chat-completion", required=True,
                         help="Path to input chat file. Last line must be 'YourName:'.")
     parser.add_argument("-s", "--service", default="openai",
-                        help="Service: openai (default), gemini, or host[:port]/URL for OpenAI-compatible server.")
+                        help="Service: openai (default), gemini, ollama, or host[:port]/URL for OpenAI-compatible server.")
     parser.add_argument("-m", "--model", default=None,
                         help="Model name. Defaults depend on --service (openai: gpt-4o-mini, gemini: gemini-2.0-flash).")
     parser.add_argument("-M", "--max-tokens", type=int,
@@ -161,7 +161,7 @@ def main():
         sys.exit(1)
 
     # Resolve service -> api_url, api_key_env, and default model
-    service = args.service.strip()
+    service = args.service.strip().lower()  # Normalize service name
     if service == "openai":
         api_url = "https://api.openai.com/v1/chat/completions"
         api_key_env = "OPENAI_API_KEY"
@@ -170,13 +170,18 @@ def main():
         api_url = "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions"
         api_key_env = "GEMINI_API_KEY"
         service_default_model = "gemini-2.0-flash"
+    elif service == "ollama":
+        api_url = "http://localhost:11434/v1/chat/completions"
+        api_key_env = None
+        service_default_model = "deepseek-v3.1:671b-cloud"
     else:
         base = service if service.startswith(("http://", "https://")) else f"http://{service}"
         api_url = base.rstrip("/") + "/v1/chat/completions"
         api_key_env = None  # no auth for custom hosts
         service_default_model = "gpt-4o-mini"
 
-    api_key = get_api_key(api_key_env)
+    # Ensure the correct API key is used for the selected service
+    api_key = get_api_key(api_key_env, service)
     model = args.model if args.model else service_default_model
 
     lines = read_chat_file(args.chat_completion)

From d2151b9dd7df336ceb400c82be4e7d2d30ff8767 Mon Sep 17 00:00:00 2001
From: hellocodelinux <hellocodelinux@gmail.com>
Date: Sat, 6 Dec 2025 08:24:26 -0300
Subject: [PATCH 2/2] Refactor API key handling and enhance support for Ollama
 Cloud service

---
 doc/AUTOCOMPOSE.md | 198 +++++++++++++++++++++++++++++++++++++--------
 src/compose        | 128 ++++++++++++++++++++---------
 2 files changed, 255 insertions(+), 71 deletions(-)

diff --git a/doc/AUTOCOMPOSE.md b/doc/AUTOCOMPOSE.md
index 20c8a49c..08536bb4 100644
--- a/doc/AUTOCOMPOSE.md
+++ b/doc/AUTOCOMPOSE.md
@@ -1,71 +1,205 @@
 Auto-Compose
 ============
-The nchat bundled default auto-compose utility `compose` uses external
-services for chat completion, and generally requires an API key
-(set in environment) to work. Environment variables to set:
 
-    OpenAI: OPENAI_API_KEY
-    Gemini: GEMINI_API_KEY
+The nchat bundled default auto-compose utility `compose` uses external services
+for chat completion to suggest your next reply in conversations. Most services
+require an API key set in environment variables.
+
+Authentication
+---------------
+
+The following services require an API key (provided via `-k/--api-key`):
+
+- **OpenAI**: Use `-k "sk-..."` with your OpenAI API key
+- **Google Gemini**: Use `-k "AIza..."` with your Gemini API key
+- **Ollama Cloud**: Use `-k "your-key"` with your Ollama Cloud API key (accessed via local Ollama client)
+
+These services do NOT require authentication:
+
+- **Ollama (local)**: No API key needed
+- **Custom OpenAI-compatible servers**: No API key needed (unless the server requires it)
+
+
+Command-Line Options
+--------------------
+
+The `compose` utility accepts the following arguments:
+
+- `-c, --chat-completion <PATH>` **(required)**
+  Path to input chat file. The last line must contain only your name followed
+  by a colon (e.g., `Stanley:`) with no message content.
+
+- `-s, --service <SERVICE>` (default: `openai`)
+  Service provider: `openai`, `gemini`, `ollama` (local), `ollama-cloud`, or a 
+  custom OpenAI-compatible server URL (e.g., `http://localhost:8000` or 
+  `192.168.10.159:8080`).
+  
+  Note: `ollama-cloud` connects via your local Ollama client at 
+  `http://localhost:11434/api/chat` and requires an API key.
+
+- `-m, --model <MODEL>`
+  Model name for the service. Defaults depend on the selected service:
+  - OpenAI: `gpt-4o-mini`
+  - Gemini: `gemini-2.0-flash`
+  - Ollama: `gemma3` (example local model)
+  - Ollama Cloud: `kimi-k2:1t-cloud` (example cloud model)
+  - Custom servers: `gpt-4o-mini`
+
+- `-p, --prompt <PROMPT>` (default: `Suggest {your_name}'s next reply.`)
+  Custom instruction prompt. Use `{your_name}` placeholder to reference the
+  user's name dynamically. Example:
+  `"Suggest {your_name}'s next reply in a joking manner."`
+
+- `-t, --temperature <VALUE>`
+  Sampling temperature for response randomness (e.g., `0.2` for deterministic,
+  `0.9` for creative). Higher values = more random responses.
+
+- `-M, --max-tokens <NUMBER>`
+  Maximum number of output tokens to generate. Limits response length.
+
+- `-T, --timeout <SECONDS>` (default: `10`)
+  Network timeout in seconds. Increase for slow connections or complex requests.
+
+- `-k, --api-key <KEY>` **(required for OpenAI, Gemini, Ollama Cloud)**
+  API key for authenticated services. Required when using OpenAI, Google Gemini,
+  or Ollama Cloud. Not needed for local Ollama or custom servers.
+  Example: `-k "sk-1234567890abcdef"` for OpenAI.
+
+- `--api-url <URL>`
+  Custom API endpoint URL (overrides default for the service). Useful for
+  custom Ollama Cloud endpoints or other compatible services.
+  Example: `--api-url "http://localhost:8000/v1/chat/completions"`
+
+- `-v, --verbose`
+  Print request payload and raw responses to stderr for debugging purposes.
+
+- `-h, --help`
+  Display help message and exit.
+
+
+Chat File Format
+----------------
+
+The compose utility expects a chat file with a specific format:
+
+```
+Alice: Hello, how are you?
+Bob: I'm doing great, thanks for asking!
+Alice: That's wonderful to hear!
+Alice:
+```
+
+Rules:
+- Each line must follow the format: `Name: message content`
+- The last line must contain only your name followed by a colon (e.g., `Alice:`)
+- Empty lines are ignored
+- The utility will suggest what your character (the last name) should reply next
 
 
 Basic Testing
 -------------
+
 An [example chat history file](/doc/example-history.txt) is provided for
-testing chat completion standalone. Example:
+testing chat completion standalone.
+
+Basic usage with defaults (OpenAI, gpt-4o-mini):
 
     ./src/compose -c doc/example-history.txt
 
 
-Testing Services / Models
--------------------------
-The utility `compose` may be used with OpenAI-compatible services.
+Service Usage Examples
+----------------------
 
-Example usage with default service (OpenAI) and default model (gpt-4o-mini):
+**OpenAI with default model:**
 
-    ./src/compose -c doc/example-history.txt
+    ./src/compose -s openai -k "sk-your-api-key" -c doc/example-history.txt
+
+**OpenAI with custom model and longer timeout:**
+
+    ./src/compose -s openai -k "sk-your-api-key" -m gpt-4-turbo -T 60 -c doc/example-history.txt
 
-Example usage with OpenAI:
+**Google Gemini:**
 
-    ./src/compose -s openai -c doc/example-history.txt
+    ./src/compose -s gemini -k "AIza-your-api-key" -c doc/example-history.txt
 
-Example usage with OpenAI and custom model and longer timeout of 60 secs:
+**Google Gemini with custom model:**
 
-    ./src/compose -s openai -m gpt-5-nano -T 60 -c doc/example-history.txt
+    ./src/compose -s gemini -k "AIza-your-api-key" -m gemini-2.5-flash -c doc/example-history.txt
 
-Example usage with Google Gemini:
+**Ollama (local, no auth required):**
 
-    ./src/compose -s gemini -c doc/example-history.txt
+    ./src/compose -s ollama -c doc/example-history.txt
 
-Example usage with Google Gemini and custom model:
+**Ollama with custom model:**
 
-    ./src/compose -s gemini -m gemini-2.5-flash -c doc/example-history.txt
+```bash
+./src/compose -s ollama -m gemma3 -c doc/example-history.txt
+```
 
-Example usage with llama.cpp:
+**Ollama Cloud (with API key via local client):**
+
+    ./src/compose -s ollama-cloud -k "your-ollama-cloud-api-key" -m kimi-k2:1t-cloud -c doc/example-history.txt
+
+**Ollama Cloud with custom timeout:**
+
+    ./src/compose -s ollama-cloud -k "your-ollama-cloud-api-key" -m kimi-k2:1t-cloud -T 60 -c doc/example-history.txt
+
+**Custom OpenAI-compatible server (via URL):**
 
     ./src/compose -s "http://192.168.10.159:8080" -c doc/example-history.txt
 
-Example starting llama.cpp server:
+**Custom llama.cpp server:**
+
+    llama-server --port 8080 -m ./models/llama-2-13b-chat.Q4_K_M.gguf &
+    ./src/compose -s "http://localhost:8080" -c doc/example-history.txt
+
+**Advanced example with custom prompt and sampling:**
 
-    llama-server --port 8080 -m ./models/llama-2-13b-chat.Q4_K_M.gguf
+    ./src/compose -s openai -m gpt-4o -p "Suggest {your_name}'s next reply as if they were a pirate." \
+      -t 0.8 -M 150 -c doc/example-history.txt
 
+**Verbose mode for debugging:**
 
-Configuring Custom Service / Model
-----------------------------------
-Edit `ui.conf` to match the desired compose path and usage.
+    ./src/compose -s gemini -c doc/example-history.txt -v
 
-Determine the path of `compose` based on nchat install path:
+
+Configuring for Auto-Compose in nchat
+--------------------------------------
+
+Edit the nchat configuration file `ui.conf` to set the auto-compose command.
+
+First, determine the path of `compose` based on your nchat installation:
 
     realpath $(dirname $(which nchat))/../libexec/nchat/compose
 
-Example usage with Google Gemini and custom model:
+Then configure `ui.conf` with one of the following examples:
+
+**Using Google Gemini with custom model:**
+
+    auto_compose_command=/usr/local/libexec/nchat/compose -s gemini -k "AIza-your-key" -m gemini-2.0-flash -c '%1'
+
+**Using OpenAI with custom model and longer timeout:**
+
+    auto_compose_command=/usr/local/libexec/nchat/compose -s openai -k "sk-your-key" -m gpt-4-turbo -T 60 -c '%1'
+
+**Using custom prompt with token limit:**
+
+    auto_compose_command=/usr/local/libexec/nchat/compose -p "Suggest {your_name}'s next reply in a professional tone." -M 100 -c '%1'
+
+**Using local Ollama (no auth needed):**
+
+```bash
+auto_compose_command=/usr/local/libexec/nchat/compose -s ollama -m gemma3 -c '%1'
+```
 
-    auto_compose_command=/usr/local/libexec/nchat/compose -s gemini -m gemini-2.0-flash -c '%1'
+**Using Ollama Cloud (with API key):**
 
-Example usage with OpenAI and custom model and longer timeout of 60 secs:
+    auto_compose_command=/usr/local/libexec/nchat/compose -s ollama-cloud -k "your-ollama-cloud-api-key" -m kimi-k2:1t-cloud -c '%1'
 
-    auto_compose_command=/usr/local/libexec/nchat/compose -s openai -m gpt-5-nano -T 60 -c '%1'
+**Using custom OpenAI-compatible server:**
 
-Example usage with custom prompt and max token limit of 100:
+    auto_compose_command=/usr/local/libexec/nchat/compose -s "http://192.168.1.100:8000" -c '%1'
 
-    auto_compose_command=/usr/local/libexec/nchat/compose -p "Suggest {your_name}'s next reply in a joking manner." -M 100 -c '%1'
+Note: The `%1` placeholder will be replaced with the chat file path when nchat
+invokes the auto-compose command.
 
diff --git a/src/compose b/src/compose
index 9aaed380..48b3e17c 100755
--- a/src/compose
+++ b/src/compose
@@ -12,15 +12,20 @@ import argparse
 import urllib.request
 import urllib.error
 
-def get_api_key(api_key_env: str|None, service: str) -> str|None:
-    """Return API key from `api_key_env`. If None, return None (no auth)."""
-    if api_key_env is None:
+def get_api_key(requires_auth: bool, service: str, cli_api_key: str|None = None) -> str|None:
+    """Return API key from cli_api_key, or None if no auth needed.
+    
+    If authentication is required but no key is provided, exit with error.
+    """
+    if not requires_auth:
         return None
-    api_key = os.getenv(api_key_env)
-    if not api_key:
-        print(f"Error: Please set the {api_key_env} environment variable for the '{service}' service.", file=sys.stderr)
-        sys.exit(1)
-    return api_key
+    
+    if cli_api_key:
+        return cli_api_key
+    
+    # Auth required but no key provided
+    print(f"Error: API key required for '{service}' service. Use -k/--api-key to provide it.", file=sys.stderr)
+    sys.exit(1)
 
 def read_chat_file(path: str) -> list[str]:
     try:
@@ -66,36 +71,61 @@ def parse_chat(lines: list[str]) -> tuple[str, list[dict]]:
 
     return your_name, messages
 
-def build_payload(model: str, your_name: str, chat_messages: list[dict], temperature: float|None, prompt: str | None, max_tokens: int | None) -> dict:
+def build_payload(model: str, your_name: str, chat_messages: list[dict], temperature: float|None, prompt: str | None, max_tokens: int | None, is_ollama_native: bool = False) -> dict:
     prompt = prompt if prompt else "Suggest {your_name}'s next reply."
     prompt = prompt.replace("{your_name}", your_name)
 
-    system_msg = {
-        "role": "system",
-        "content": f"You are {your_name} in a chat. " + prompt
-    }
-    final_instruction = {
-        "role": "user",
-        "content": prompt
-    }
-    payload = {
-        "model": model,
-        "messages": [system_msg] + chat_messages + [final_instruction]
-    }
-
-    if temperature is not None:
-        payload["temperature"] = temperature
-
+    if is_ollama_native:
+        # Ollama native format (/api/chat)
+        # Convert chat_messages to Ollama format
+        messages = []
+        messages.append({"role": "system", "content": f"You are {your_name} in a chat. " + prompt})
+        for msg in chat_messages:
+            messages.append(msg)
+        messages.append({"role": "user", "content": prompt})
+        
+        payload = {
+            "model": model,
+            "messages": messages,
+            "stream": False,
+        }
+        if temperature is not None:
+            payload["options"] = {"temperature": temperature}
+    else:
+        # OpenAI format
+        system_msg = {
+            "role": "system",
+            "content": f"You are {your_name} in a chat. " + prompt
+        }
+        final_instruction = {
+            "role": "user",
+            "content": prompt
+        }
+        payload = {
+            "model": model,
+            "messages": [system_msg] + chat_messages + [final_instruction]
+        }
+        if temperature is not None:
+            payload["temperature"] = temperature
+    
     if max_tokens is not None:
-        payload["max_tokens"] = max_tokens
+        if is_ollama_native:
+            if "options" not in payload:
+                payload["options"] = {}
+            payload["options"]["num_predict"] = max_tokens
+        else:
+            payload["max_tokens"] = max_tokens
 
     return payload
 
-def send_request(payload: dict, api_url: str, api_key: str, verbose: bool, timeout: int) -> str:
+def send_request(payload: dict, api_url: str, api_key: str|None, verbose: bool, timeout: int, is_ollama_native: bool = False) -> str:
     headers = {
         "Content-Type": "application/json",
-        "Authorization": f"Bearer {api_key}",
     }
+    # Only add Authorization header if API key is provided
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+    
     if verbose:
         print("=== Request ===", file=sys.stderr)
         print(json.dumps(payload, ensure_ascii=False, indent=2), file=sys.stderr)
@@ -126,7 +156,12 @@ def send_request(payload: dict, api_url: str, api_key: str, verbose: bool, timeo
         sys.exit(1)
 
     try:
-        return result["choices"][0]["message"]["content"]
+        if is_ollama_native:
+            # Ollama native format returns response in message.content
+            return result["message"]["content"]
+        else:
+            # OpenAI format
+            return result["choices"][0]["message"]["content"]
     except (KeyError, IndexError):
         if verbose:
             print("=== Raw Response ===", file=sys.stderr)
@@ -141,7 +176,7 @@ def main():
     parser.add_argument("-c", "--chat-completion", required=True,
                         help="Path to input chat file. Last line must be 'YourName:'.")
     parser.add_argument("-s", "--service", default="openai",
-                        help="Service: openai (default), gemini, ollama, or host[:port]/URL for OpenAI-compatible server.")
+                        help="Service: openai (default), gemini, ollama, ollama-cloud, or host[:port]/URL for OpenAI-compatible server.")
     parser.add_argument("-m", "--model", default=None,
                         help="Model name. Defaults depend on --service (openai: gpt-4o-mini, gemini: gemini-2.0-flash).")
     parser.add_argument("-M", "--max-tokens", type=int,
@@ -154,40 +189,55 @@ def main():
                         help="Print request payload to stderr.")
     parser.add_argument("-T", "--timeout", type=int, default=10,
                         help="Network timeout in seconds (default: 10).")
+    parser.add_argument("-k", "--api-key", default=None,
+                        help="API key for authenticated services (OpenAI, Gemini, Ollama Cloud).")
+    parser.add_argument("--api-url", default=None,
+                        help="Custom API endpoint URL (overrides default for the service). Useful for custom Ollama Cloud endpoints or compatible services.")
     args = parser.parse_args()
 
     if args.timeout <= 0:
         print("Error: -T / --timeout must be > 0 seconds.", file=sys.stderr)
         sys.exit(1)
 
-    # Resolve service -> api_url, api_key_env, and default model
+    # Resolve service -> api_url, requires_auth, and default model
     service = args.service.strip().lower()  # Normalize service name
     if service == "openai":
         api_url = "https://api.openai.com/v1/chat/completions"
-        api_key_env = "OPENAI_API_KEY"
+        requires_auth = True
         service_default_model = "gpt-4o-mini"
     elif service == "gemini":
         api_url = "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions"
-        api_key_env = "GEMINI_API_KEY"
+        requires_auth = True
         service_default_model = "gemini-2.0-flash"
     elif service == "ollama":
         api_url = "http://localhost:11434/v1/chat/completions"
-        api_key_env = None
+        requires_auth = False  # No auth required for local Ollama
         service_default_model = "deepseek-v3.1:671b-cloud"
+    elif service == "ollama-cloud":
+        api_url = "http://localhost:11434/api/chat"  # Ollama Cloud via local client
+        requires_auth = True  # API key required for Ollama Cloud
+        service_default_model = "gpt-4o-mini"
     else:
         base = service if service.startswith(("http://", "https://")) else f"http://{service}"
         api_url = base.rstrip("/") + "/v1/chat/completions"
-        api_key_env = None  # no auth for custom hosts
+        requires_auth = False  # no auth for custom hosts by default
         service_default_model = "gpt-4o-mini"
 
-    # Ensure the correct API key is used for the selected service
-    api_key = get_api_key(api_key_env, service)
+    # Override api_url if custom endpoint is provided
+    if args.api_url:
+        api_url = args.api_url
+
+    # Get API key if required
+    api_key = get_api_key(requires_auth, service, args.api_key)
     model = args.model if args.model else service_default_model
 
+    # Determine if using Ollama native format
+    is_ollama_native = (service == "ollama-cloud")
+
     lines = read_chat_file(args.chat_completion)
     your_name, chat_messages = parse_chat(lines)
-    payload = build_payload(model, your_name, chat_messages, args.temperature, args.prompt, args.max_tokens)
-    reply = send_request(payload, api_url, api_key, verbose=args.verbose, timeout=args.timeout)
+    payload = build_payload(model, your_name, chat_messages, args.temperature, args.prompt, args.max_tokens, is_ollama_native)
+    reply = send_request(payload, api_url, api_key, verbose=args.verbose, timeout=args.timeout, is_ollama_native=is_ollama_native)
     reply_without_name = reply.split(":", 1)[1].strip() if ":" in reply else reply
     print(reply_without_name)