Hmbown · punkcanyang · Jun 5, 2026 · Jun 5, 2026 · Jun 6, 2026 · Jun 6, 2026
diff --git a/config.example.toml b/config.example.toml
@@ -144,6 +144,7 @@ memory_path = "~/.codewhale/memory.md"
 allow_shell = true
 approval_policy = "on-request" # on-request | untrusted | never
 sandbox_mode = "workspace-write" # read-only | workspace-write | danger-full-access | external-sandbox
+# prompt_suggestion = true  # opt-in: show ghost-text follow-up question in composer after each turn
 
 # Typed permission rules live in a sibling `permissions.toml` file, not in
 # config.toml. This schema slice is ask-only and is parsed for follow-up

diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs
@@ -1550,6 +1550,9 @@ pub struct Config {
     /// missing optional file doesn't fail the launch.
     pub instructions: Option<Vec<String>>,
     pub allow_shell: Option<bool>,
+    /// Opt-in ghost-text follow-up prompt suggestion after each completed turn.
+    /// Default: false — the user must explicitly set this to true to enable.
+    pub prompt_suggestion: Option<bool>,
     pub approval_policy: Option<String>,
     pub sandbox_mode: Option<String>,
     pub yolo: Option<bool>,
@@ -2707,6 +2710,11 @@ impl Config {
         self.allow_shell.unwrap_or(false)
     }
 
+    /// Whether ghost-text prompt suggestion is enabled (opt-in, default off).
+    pub fn prompt_suggestion_enabled(&self) -> bool {
+        self.prompt_suggestion.unwrap_or(false)
+    }
+
     /// Return the maximum number of concurrent sub-agents.
     /// Checks `[subagents] max_concurrent` first, then top-level `max_subagents`,
     /// then falls back to `DEFAULT_MAX_SUBAGENTS`.
@@ -4253,6 +4261,7 @@ fn merge_config(base: Config, override_cfg: Config) -> Config {
         // both — they list `~/global.md` inside the project array.
         instructions: override_cfg.instructions.or(base.instructions),
         allow_shell: override_cfg.allow_shell.or(base.allow_shell),
+        prompt_suggestion: override_cfg.prompt_suggestion.or(base.prompt_suggestion),
         yolo: override_cfg.yolo.or(base.yolo),
         approval_policy: override_cfg.approval_policy.or(base.approval_policy),
         sandbox_mode: override_cfg.sandbox_mode.or(base.sandbox_mode),
@@ -5406,6 +5415,28 @@ mod tests {
         );
     }
 
+    #[test]
+    fn prompt_suggestion_defaults_to_false() {
+        let config = Config::default();
+        assert_eq!(
+            config.prompt_suggestion, None,
+            "default Config must not opt in"
+        );
+        assert!(
+            !config.prompt_suggestion_enabled(),
+            "prompt_suggestion must be opt-in (default off)"
+        );
+    }
+
+    #[test]
+    fn prompt_suggestion_enabled_when_set_true() {
+        let config = Config {
+            prompt_suggestion: Some(true),
+            ..Default::default()
+        };
+        assert!(config.prompt_suggestion_enabled());
+    }
+
     #[test]
     fn warns_when_allow_shell_nested_under_general_section() {
         // #2589: the reporter's config nested top-level keys under sections that

diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs
@@ -1182,6 +1182,13 @@ pub struct App {
     pub next_history_revision: u64,
     pub api_messages: Vec<Message>,
     pub is_loading: bool,
+    /// Ghost-text follow-up suggestion shown in the composer when empty.
+    /// Generated asynchronously after each completed turn; cleared on new input.
+    pub prompt_suggestion: Option<String>,
+    /// Monotonic turn counter for stale-suggestion protection. Incremented on
+    /// each TurnStarted; background suggestion tasks capture the token and
+    /// discard their result if the token no longer matches.
+    pub prompt_suggestion_gen: std::sync::atomic::AtomicU64,
     /// Degraded connectivity mode; new user inputs are queued for later retry.
     pub offline_mode: bool,
     /// Whether an `EngineEvent::Error` has already been posted for the
@@ -1521,6 +1528,8 @@ pub struct App {
     /// DeepSeek account balance, refreshed once per turn completion.
     /// Shared cell updated by background fetch tasks; read lock in the UI thread.
     pub balance_cell: std::sync::Arc<std::sync::Mutex<Option<crate::pricing::BalanceInfo>>>,
+    /// Shared cell for async prompt suggestion delivery from background task.
+    pub prompt_suggestion_cell: std::sync::Arc<std::sync::Mutex<Option<(u64, String)>>>,
     /// Tracks whether the initial balance fetch has been attempted for this session.
     pub balance_initiated: bool,
     /// Timestamp of the last balance fetch, used to debounce rapid requests.
@@ -1991,6 +2000,8 @@ impl App {
             next_history_revision: 1,
             api_messages: Vec::new(),
             is_loading: false,
+            prompt_suggestion: None,
+            prompt_suggestion_gen: std::sync::atomic::AtomicU64::new(0),
             offline_mode: false,
             turn_error_posted: false,
             status_message: None,
@@ -2145,6 +2156,7 @@ impl App {
             turn_last_activity_at: None,
             cumulative_turn_duration: std::time::Duration::ZERO,
             balance_cell: std::sync::Arc::new(std::sync::Mutex::new(None)),
+            prompt_suggestion_cell: std::sync::Arc::new(std::sync::Mutex::new(None)),
             balance_initiated: false,
             last_balance_fetch: None,
             runtime_turn_id: None,

diff --git a/crates/tui/src/tui/mod.rs b/crates/tui/src/tui/mod.rs
@@ -51,6 +51,7 @@ pub mod paste;
 pub mod paste_burst;
 pub mod persistence_actor;
 pub mod plan_prompt;
+pub mod prompt_suggestion;
 pub mod provider_picker;
 pub mod scrolling;
 pub mod selection;

diff --git a/crates/tui/src/tui/prompt_suggestion.rs b/crates/tui/src/tui/prompt_suggestion.rs
@@ -0,0 +1,125 @@
+//! Ghost-text follow-up prompt suggestion.
+//!
+//! After each completed turn, a lightweight API call generates ONE short
+//! follow-up question the user might want to ask next. The suggestion is
+//! rendered as dimmed ghost text in the composer when the input is empty.
+
+use std::sync::OnceLock;
+
+use reqwest::header::{AUTHORIZATION, CONTENT_TYPE};
+use serde_json::Value;
+use tracing::debug;
+
+/// Reusable static client — avoids creating a new connection pool per request.
+fn suggestion_client() -> &'static reqwest::Client {
+    static CLIENT: OnceLock<reqwest::Client> = OnceLock::new();
+    CLIENT.get_or_init(reqwest::Client::new)
+}
+
+/// Generate a follow-up prompt suggestion based on recent messages.
+///
+/// Sends the conversation summary to the API with a system prompt that
+/// asks for a single short follow-up question. Returns `None` on failure
+/// or empty result — callers treat this as best-effort.
+pub async fn generate_suggestion(
+    api_key: &str,
+    base_url: &str,
+    model: &str,
+    recent_messages: &str,
+) -> Option<String> {
+    let client = suggestion_client();
+    let body = serde_json::json!({
+        "model": model,
+        "messages": [
+            {
+                "role": "system",
+                "content": "\
+    You are a helpful assistant. Based on the recent conversation context, generate \
+    ONE short follow-up question (under 60 characters) the user might want to ask \
+    next. Reply with ONLY the question text, nothing else — no quotes, no explanations, \
+    no prefixes."
+            },
+            {
+                "role": "user",
+                "content": format!(
+                    "Recent conversation:\n{recent_messages}\n\n\
+                     Generate ONE short follow-up question the user might ask next:"
+                )
+            }
+        ],
+        "max_tokens": 64,
+        "temperature": 0.3,
+        "stream": false
+    });
+
+    let url = format!("{}/chat/completions", base_url.trim_end_matches('/'));
+    debug!(%url, %model, "generating prompt suggestion");
+    let response = match client
+        .post(&url)
+        .header(AUTHORIZATION, format!("Bearer {api_key}"))
+        .header(CONTENT_TYPE, "application/json")
+        .timeout(std::time::Duration::from_secs(10))
+        .json(&body)
+        .send()
+        .await
+    {
+        Ok(r) => r,
+        Err(_) => return None,
+    };
+
+    let value: Value = match response.json().await {
+        Ok(v) => v,
+        Err(_) => return None,
+    };
+
+    let suggestion = value["choices"][0]["message"]["content"]
+        .as_str()
+        .map(|s| s.trim().trim_matches('"').to_string())
+        .filter(|s| !s.is_empty() && s.len() <= 200)?;
+
+    debug!(text = %suggestion, "prompt suggestion generated");
+    Some(suggestion)
+}
+
+/// Extract the first text line from a single message.
+fn message_summary(m: &crate::models::Message) -> Option<String> {
+    let role = match m.role.as_str() {
+        "user" => "User",
+        "assistant" => "Assistant",
+        _ => return None,
+    };
+    let text = m
+        .content
+        .iter()
+        .filter_map(|block| match block {
+            crate::models::ContentBlock::Text { text, .. } => Some(text.as_str()),
+            _ => None,
+        })
+        .collect::<Vec<_>>()
+        .join(" ");
+    let first_line = text.lines().next().unwrap_or("").trim();
+    if first_line.is_empty() {
+        return None;
+    }
+    let truncated: String = first_line
+        .chars()
+        .take(120)
+        .chain(if first_line.chars().count() > 120 {
+            Some('…')
+        } else {
+            None
+        })
+        .collect();
+    Some(format!("{role}: {truncated}"))
+}
+
+/// Build a one-line-per-message summary of recent conversation context.
+/// Takes the last N messages, skipping tool-only messages.
+pub fn summarize_recent_messages(messages: &[crate::models::Message], limit: usize) -> String {
+    let start = messages.len().saturating_sub(limit);
+    messages[start..]
+        .iter()
+        .filter_map(message_summary)
+        .collect::<Vec<_>>()
+        .join("\n")
+}
diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs
@@ -1115,6 +1115,7 @@ async fn run_event_loop(
     // codex's frame coalescing that maps cleanly onto our poll-based loop.
     let mut frame_rate_limiter = crate::tui::frame_rate_limiter::FrameRateLimiter::default();
     let mut web_config_session: Option<WebConfigSession> = None;
+    let mut prev_input_snapshot = String::new();
     let mut terminal_paused_at: Option<Instant> = None;
     let mut force_terminal_repaint = false;
     let mut draws_since_last_full_repaint: u64 = 0;
@@ -1265,6 +1266,24 @@ async fn run_event_loop(
             app.needs_redraw = true;
         }
 
+        // Clear suggestion when the user modifies the input.
+        if app.input != prev_input_snapshot {
+            app.prompt_suggestion = None;
+            prev_input_snapshot = app.input.clone();
+        }
+
+        // Poll prompt suggestion cell from background generation task.
+        // Discard stale results whose generation token no longer matches.
+        if let Ok(mut guard) = app.prompt_suggestion_cell.try_lock()
+            && let Some((gen_token, suggestion)) = guard.take()
+            && gen_token
+                == app
+                    .prompt_suggestion_gen
+                    .load(std::sync::atomic::Ordering::Relaxed)
+        {
+            app.prompt_suggestion = Some(suggestion);
+        }
+
         // First, poll for engine events (non-blocking)
         let mut received_engine_event = false;
         let mut transcript_batch_updated = false;
@@ -1618,6 +1637,9 @@ async fn run_event_loop(
                         app.is_loading = true;
                         app.offline_mode = false;
                         app.turn_error_posted = false;
+                        app.prompt_suggestion = None;
+                        app.prompt_suggestion_gen
+                            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
                         app.dispatch_started_at = None;
                         current_streaming_text.clear();
                         app.streaming_state.reset();
@@ -1819,6 +1841,38 @@ async fn run_event_loop(
                             }
                         }
 
+                        // Generate ghost-text follow-up suggestion asynchronously.
+                        if status == crate::core::events::TurnOutcomeStatus::Completed
+                            && config.prompt_suggestion_enabled()
+                            && app.api_messages.len() >= 2
+                        {
+                            let suggestion_cell = app.prompt_suggestion_cell.clone();
+                            let api_key = config.deepseek_api_key().unwrap_or_default();
+                            let base_url = config.deepseek_base_url();
+                            let model = config.default_model();
+                            let messages: Vec<crate::models::Message> = app.api_messages.clone();
+                            let gen_token = app
+                                .prompt_suggestion_gen
+                                .load(std::sync::atomic::Ordering::Relaxed);
+                            if !api_key.is_empty() {
+                                tokio::spawn(async move {
+                                    let summary =
+                                        crate::tui::prompt_suggestion::summarize_recent_messages(
+                                            &messages, 8,
+                                        );
+                                    if let Some(suggestion) =
+                                        crate::tui::prompt_suggestion::generate_suggestion(
+                                            &api_key, &base_url, &model, &summary,
+                                        )
+                                        .await
+                                        && let Ok(mut guard) = suggestion_cell.lock()
+                                    {
+                                        *guard = Some((gen_token, suggestion));
+                                    }
+                                });
+                            }
+                        }
+
                         // Generate post-turn receipt for completed turns.
                         // Also push a persistent status toast so users always
                         // see the outcome in the footer (not just the 8-second
@@ -3591,6 +3645,14 @@ async fn run_event_loop(
                     if app.is_loading && queue_current_draft_for_next_turn(app) {
                         continue;
                     }
+                    if app.input.is_empty()
+                        && let Some(suggestion) = app.prompt_suggestion.take()
+                    {
+                        app.input = suggestion;
+                        app.cursor_position = app.input.chars().count();
+                        app.needs_redraw = true;
+                        continue;
+                    }
                     let prior_model = app.model.clone();
                     let prior_mode = app.mode;
                     app.cycle_mode();