diff --git a/.github/workflows/release-mac.yml b/.github/workflows/release-mac.yml index 18d4ae9..4cc84f8 100644 --- a/.github/workflows/release-mac.yml +++ b/.github/workflows/release-mac.yml @@ -1,134 +1,193 @@ -# Disabled until Apple signing/notarization secrets are added. -# xcrun notarytool --wait will hang the runner with no diagnostics if secrets are missing. -# Re-enable by uncommenting once the following repo secrets exist: -# APPLE_ID, APPLE_APP_PASSWORD, APPLE_TEAM_ID, -# MACOS_CERTIFICATE_P12_BASE64, MACOS_CERTIFICATE_PASSWORD, MACOS_KEYCHAIN_PASSWORD -# -# name: release-mac -# -# on: -# push: -# branches: [main] -# workflow_dispatch: -# -# permissions: -# contents: write -# -# concurrency: -# group: release-mac -# cancel-in-progress: false -# -# jobs: -# build-and-release: -# name: Build, sign, notarize, release LoopMac -# runs-on: macos-14 -# env: -# APPLE_ID: ${{ secrets.APPLE_ID }} -# APPLE_APP_PASSWORD: ${{ secrets.APPLE_APP_PASSWORD }} -# APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} -# SCHEME: LoopMac -# APP_NAME: Loop -# BUILD_DIR: build -# ARCHIVE_PATH: build/LoopMac.xcarchive -# EXPORT_PATH: build/export -# steps: -# - name: Checkout -# uses: actions/checkout@v4 -# -# - name: Select Xcode -# run: sudo xcode-select -s /Applications/Xcode_15.4.app -# -# - name: Write Secrets.xcconfig from APPLE_TEAM_ID -# run: | -# printf 'DEVELOPMENT_TEAM = %s\n' "$APPLE_TEAM_ID" > Secrets.xcconfig -# -# - name: Import Developer ID signing certificate -# uses: apple-actions/import-codesign-certs@v3 -# with: -# p12-file-base64: ${{ secrets.MACOS_CERTIFICATE_P12_BASE64 }} -# p12-password: ${{ secrets.MACOS_CERTIFICATE_PASSWORD }} -# keychain-password: ${{ secrets.MACOS_KEYCHAIN_PASSWORD }} -# -# - name: Stamp build number from run number -# run: | -# cd Loop.xcodeproj/.. -# xcrun agvtool new-version -all "${{ github.run_number }}" -# -# - name: Generate ExportOptions.plist -# run: | -# cat > ExportOptions.plist < -# -# -# -# methoddeveloper-id -# teamID${APPLE_TEAM_ID} -# signingStylemanual -# signingCertificateDeveloper ID Application -# -# -# EOF -# -# - name: Archive -# run: | -# xcodebuild \ -# -project Loop.xcodeproj \ -# -scheme "$SCHEME" \ -# -configuration Release \ -# -archivePath "$ARCHIVE_PATH" \ -# -destination 'generic/platform=macOS' \ -# archive -# -# - name: Export signed .app -# run: | -# xcodebuild -exportArchive \ -# -archivePath "$ARCHIVE_PATH" \ -# -exportOptionsPlist ExportOptions.plist \ -# -exportPath "$EXPORT_PATH" -# -# - name: Notarize .app -# run: | -# APP_PATH="$EXPORT_PATH/$APP_NAME.app" -# ZIP_PATH="$BUILD_DIR/$APP_NAME.zip" -# ditto -c -k --keepParent "$APP_PATH" "$ZIP_PATH" -# xcrun notarytool submit "$ZIP_PATH" \ -# --apple-id "$APPLE_ID" \ -# --password "$APPLE_APP_PASSWORD" \ -# --team-id "$APPLE_TEAM_ID" \ -# --wait -# xcrun stapler staple "$APP_PATH" -# -# - name: Install create-dmg -# run: brew install create-dmg -# -# - name: Build DMG -# run: | -# DMG_PATH="$BUILD_DIR/$APP_NAME-${{ github.run_number }}.dmg" -# create-dmg \ -# --volname "$APP_NAME" \ -# --window-size 600 400 \ -# --icon "$APP_NAME.app" 175 190 \ -# --app-drop-link 425 190 \ -# --codesign "Developer ID Application" \ -# "$DMG_PATH" \ -# "$EXPORT_PATH/$APP_NAME.app" -# echo "DMG_PATH=$DMG_PATH" >> "$GITHUB_ENV" -# -# - name: Notarize DMG -# run: | -# xcrun notarytool submit "$DMG_PATH" \ -# --apple-id "$APPLE_ID" \ -# --password "$APPLE_APP_PASSWORD" \ -# --team-id "$APPLE_TEAM_ID" \ -# --wait -# xcrun stapler staple "$DMG_PATH" -# -# - name: Create GitHub Release -# env: -# GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} -# run: | -# TAG="v1.0-build${{ github.run_number }}" -# gh release create "$TAG" \ -# "$DMG_PATH" \ -# --title "Loop 1.0 (build ${{ github.run_number }})" \ -# --notes "Auto-built from ${{ github.sha }}" +# Builds, signs, notarizes, and publishes the LoopMac app + a Sparkle auto-update feed. +# +# Required repo secrets: +# APPLE_ID, APPLE_APP_PASSWORD, APPLE_TEAM_ID — notarization (notarytool) +# MACOS_CERTIFICATE_P12_BASE64 — Developer ID Application cert (base64 .p12) +# MACOS_CERTIFICATE_PASSWORD — password for the .p12 +# MACOS_KEYCHAIN_PASSWORD — ephemeral CI keychain password +# SPARKLE_ED_PRIVATE_KEY — Sparkle EdDSA private key (export with +# `generate_keys -x private-key.pem`, paste file contents) +# +# Outputs: +# - Per-build GitHub Release `v1.0-build` with `Loop-.dmg` (human download) +# and `Loop-.zip` (the Sparkle update payload). +# - A fixed `appcast` release whose `appcast.xml` asset the app polls via SUFeedURL. +# The app's Info.plist SUFeedURL must point at: +# https://github.com/getathelas/LoopHarness/releases/download/appcast/appcast.xml + +name: release-mac + +on: + push: + branches: [main] + workflow_dispatch: + +permissions: + contents: write + +concurrency: + group: release-mac + cancel-in-progress: false + +jobs: + build-and-release: + name: Build, sign, notarize, release LoopMac + runs-on: macos-14 + env: + APPLE_ID: ${{ secrets.APPLE_ID }} + APPLE_APP_PASSWORD: ${{ secrets.APPLE_APP_PASSWORD }} + APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }} + SPARKLE_ED_PRIVATE_KEY: ${{ secrets.SPARKLE_ED_PRIVATE_KEY }} + SCHEME: Loop_MacOS + APP_NAME: Loop + # Actual product wrapper name (PRODUCT_NAME = LoopMac); distinct from the + # user-facing APP_NAME used for DMG/zip filenames and the DMG volume. + APP_BUNDLE: LoopMac + SPARKLE_VERSION: "2.9.2" + BUILD_DIR: build + ARCHIVE_PATH: build/LoopMac.xcarchive + EXPORT_PATH: build/export + DOWNLOAD_REPO: getathelas/LoopHarness + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Select Xcode + run: sudo xcode-select -s /Applications/Xcode_15.4.app + + - name: Write Secrets.xcconfig from APPLE_TEAM_ID + run: | + printf 'DEVELOPMENT_TEAM = %s\n' "$APPLE_TEAM_ID" > Secrets.xcconfig + + - name: Import Developer ID signing certificate + uses: apple-actions/import-codesign-certs@v3 + with: + p12-file-base64: ${{ secrets.MACOS_CERTIFICATE_P12_BASE64 }} + p12-password: ${{ secrets.MACOS_CERTIFICATE_PASSWORD }} + keychain-password: ${{ secrets.MACOS_KEYCHAIN_PASSWORD }} + + - name: Stamp build number from run number + run: | + xcrun agvtool new-version -all "${{ github.run_number }}" + + - name: Generate ExportOptions.plist + run: | + cat > ExportOptions.plist < + + + + methoddeveloper-id + teamID${APPLE_TEAM_ID} + signingStylemanual + signingCertificateDeveloper ID Application + + + EOF + + - name: Archive + run: | + xcodebuild \ + -project Loop.xcodeproj \ + -scheme "$SCHEME" \ + -configuration Release \ + -archivePath "$ARCHIVE_PATH" \ + -destination 'generic/platform=macOS' \ + archive + + - name: Export signed .app + run: | + xcodebuild -exportArchive \ + -archivePath "$ARCHIVE_PATH" \ + -exportOptionsPlist ExportOptions.plist \ + -exportPath "$EXPORT_PATH" + + - name: Notarize and staple .app + run: | + APP_PATH="$EXPORT_PATH/$APP_BUNDLE.app" + NOTARIZE_ZIP="$BUILD_DIR/notarize.zip" + ditto -c -k --keepParent "$APP_PATH" "$NOTARIZE_ZIP" + xcrun notarytool submit "$NOTARIZE_ZIP" \ + --apple-id "$APPLE_ID" \ + --password "$APPLE_APP_PASSWORD" \ + --team-id "$APPLE_TEAM_ID" \ + --wait + xcrun stapler staple "$APP_PATH" + + - name: Install create-dmg + run: brew install create-dmg + + - name: Build and notarize DMG + run: | + DMG_PATH="$BUILD_DIR/$APP_NAME-${{ github.run_number }}.dmg" + create-dmg \ + --volname "$APP_NAME" \ + --window-size 600 400 \ + --icon "$APP_BUNDLE.app" 175 190 \ + --app-drop-link 425 190 \ + --codesign "Developer ID Application" \ + "$DMG_PATH" \ + "$EXPORT_PATH/$APP_BUNDLE.app" + xcrun notarytool submit "$DMG_PATH" \ + --apple-id "$APPLE_ID" \ + --password "$APPLE_APP_PASSWORD" \ + --team-id "$APPLE_TEAM_ID" \ + --wait + xcrun stapler staple "$DMG_PATH" + echo "DMG_PATH=$DMG_PATH" >> "$GITHUB_ENV" + + - name: Package Sparkle update zip (stapled .app) + run: | + # Re-zip AFTER stapling so the Sparkle payload carries the notarization + # ticket (the notarize.zip above held the un-stapled app). + ZIP_PATH="$BUILD_DIR/$APP_NAME-${{ github.run_number }}.zip" + ditto -c -k --keepParent "$EXPORT_PATH/$APP_BUNDLE.app" "$ZIP_PATH" + echo "ZIP_PATH=$ZIP_PATH" >> "$GITHUB_ENV" + + - name: Download Sparkle CLI tools + run: | + curl -fsSL -o sparkle.tar.xz \ + "https://github.com/sparkle-project/Sparkle/releases/download/${SPARKLE_VERSION}/Sparkle-${SPARKLE_VERSION}.tar.xz" + mkdir -p sparkle-tools + tar -xf sparkle.tar.xz -C sparkle-tools + echo "SPARKLE_BIN=$PWD/sparkle-tools/bin" >> "$GITHUB_ENV" + + - name: Generate signed appcast.xml + run: | + TAG="v1.0-build${{ github.run_number }}" + # generate_appcast scans a directory of archives, reads each app's + # CFBundleShortVersionString/CFBundleVersion, computes the EdDSA + # signature + length, and writes appcast.xml. --download-url-prefix + # makes the enclosure URLs point at the per-build release assets. + mkdir -p sparkle-feed + cp "$ZIP_PATH" sparkle-feed/ + printf '%s' "$SPARKLE_ED_PRIVATE_KEY" > ed_private_key + "$SPARKLE_BIN/generate_appcast" \ + --ed-key-file ed_private_key \ + --download-url-prefix "https://github.com/${DOWNLOAD_REPO}/releases/download/${TAG}/" \ + sparkle-feed/ + rm -f ed_private_key + echo "APPCAST_PATH=$PWD/sparkle-feed/appcast.xml" >> "$GITHUB_ENV" + + - name: Publish per-build release (DMG + Sparkle zip) + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + TAG="v1.0-build${{ github.run_number }}" + gh release create "$TAG" \ + "$DMG_PATH" "$ZIP_PATH" \ + --title "Loop 1.0 (build ${{ github.run_number }})" \ + --notes "Auto-built from ${{ github.sha }}" + + - name: Publish appcast feed + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # The `appcast` release is a fixed, stable home for the feed so + # SUFeedURL never changes. Create it once; thereafter clobber the asset. + gh release view appcast >/dev/null 2>&1 || \ + gh release create appcast \ + --title "Sparkle update feed" \ + --notes "Auto-update appcast for LoopMac. Do not delete — SUFeedURL points here." \ + --latest=false + gh release upload appcast "$APPCAST_PATH" --clobber diff --git a/.gitignore b/.gitignore index 80ae7ce..92aa7bd 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ .env.* !.env.example Secrets.xcconfig +Secrets_managed.xcconfig *.secrets.xcconfig *.p12 *.mobileprovision @@ -25,6 +26,9 @@ xcuserdata/ *.dSYM.zip *.dSYM *.swiftpm +# Built app bundles / disk images bake in build-time secrets — never commit +*.app/ +*.dmg # ─── macOS ──────────────────────────────────────────────────────────── .DS_Store diff --git a/Loop.xcodeproj/project.pbxproj b/Loop.xcodeproj/project.pbxproj index 69d0c62..82f1db4 100644 --- a/Loop.xcodeproj/project.pbxproj +++ b/Loop.xcodeproj/project.pbxproj @@ -15,6 +15,7 @@ DA17C13000000000000000B5 /* NIOSSH in Frameworks */ = {isa = PBXBuildFile; productRef = DA17C13000000000000000A5 /* NIOSSH */; }; DA17C13000000000000000B6 /* NIOSSH in Frameworks */ = {isa = PBXBuildFile; productRef = DA17C13000000000000000A6 /* NIOSSH */; }; DA17C14000000000000000D4 /* SwiftTerm in Frameworks */ = {isa = PBXBuildFile; productRef = DA17C14000000000000000C4 /* SwiftTerm */; }; + DA17C15000000000000000F5 /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = DA17C15000000000000000E5 /* Sparkle */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -75,6 +76,8 @@ isa = PBXFileSystemSynchronizedBuildFileExceptionSet; membershipExceptions = ( AppDelegate.swift, + AppIntents/AskLoopIntent.swift, + AppIntents/CaptureToLoopIntent.swift, Assets.xcassets, HelperViews/AvatarView.swift, HelperViews/ShimmerLabel.swift, @@ -82,10 +85,13 @@ Info.plist, LifeView/LifeView.swift, MainVC.swift, + Markdown/MarkdownShareToolbar.swift, MarkdownEditorViewController.swift, MessageBox.swift, MessagingCell.swift, MessagingVC.swift, + MusicMiniPlayer/MusicMiniPlayerView.swift, + MusicMiniPlayer/TopBannerScrollView.swift, Onboarding/ActionButtonReminderBarView.swift, Onboarding/OnboardingCardView.swift, Onboarding/OnboardingViewController.swift, @@ -122,16 +128,21 @@ isa = PBXFileSystemSynchronizedBuildFileExceptionSet; membershipExceptions = ( AppDelegate.swift, + AppIntents/AskLoopIntent.swift, + AppIntents/CaptureToLoopIntent.swift, HelperViews/AvatarView.swift, HelperViews/ShimmerLabel.swift, ImageViewerVC.swift, Info.plist, LifeView/LifeView.swift, MainVC.swift, + Markdown/MarkdownShareToolbar.swift, MarkdownEditorViewController.swift, MessageBox.swift, MessagingCell.swift, MessagingVC.swift, + MusicMiniPlayer/MusicMiniPlayerView.swift, + MusicMiniPlayer/TopBannerScrollView.swift, Onboarding/ActionButtonReminderBarView.swift, Onboarding/OnboardingCardView.swift, Onboarding/OnboardingViewController.swift, @@ -218,6 +229,7 @@ files = ( DA17C12F00000000000000F3 /* SwiftGitX in Frameworks */, DA17C13000000000000000B5 /* NIOSSH in Frameworks */, + DA17C15000000000000000F5 /* Sparkle in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -328,6 +340,7 @@ packageProductDependencies = ( DA17C12F00000000000000F5 /* SwiftGitX */, DA17C13000000000000000A5 /* NIOSSH */, + DA17C15000000000000000E5 /* Sparkle */, ); productName = LoopMac; productReference = 6B00010100000000DEADBEEF /* LoopMac.app */; @@ -395,6 +408,7 @@ DA17C12F00000000000000F1 /* XCRemoteSwiftPackageReference "SwiftGitX" */, DA17C13000000000000000A1 /* XCRemoteSwiftPackageReference "swift-nio-ssh" */, DA17C14000000000000000C1 /* XCRemoteSwiftPackageReference "SwiftTerm" */, + DA17C15000000000000000E1 /* XCRemoteSwiftPackageReference "Sparkle" */, ); preferredProjectObjectVersion = 77; productRefGroup = 6A0903842CD69D9500E70D46 /* Products */; @@ -988,6 +1002,14 @@ minimumVersion = 1.13.0; }; }; + DA17C15000000000000000E1 /* XCRemoteSwiftPackageReference "Sparkle" */ = { + isa = XCRemoteSwiftPackageReference; + repositoryURL = "https://github.com/sparkle-project/Sparkle.git"; + requirement = { + kind = upToNextMajorVersion; + minimumVersion = 2.6.0; + }; + }; /* End XCRemoteSwiftPackageReference section */ /* Begin XCSwiftPackageProductDependency section */ @@ -1026,6 +1048,11 @@ package = DA17C14000000000000000C1 /* XCRemoteSwiftPackageReference "SwiftTerm" */; productName = SwiftTerm; }; + DA17C15000000000000000E5 /* Sparkle */ = { + isa = XCSwiftPackageProductDependency; + package = DA17C15000000000000000E1 /* XCRemoteSwiftPackageReference "Sparkle" */; + productName = Sparkle; + }; /* End XCSwiftPackageProductDependency section */ }; rootObject = 6A09037B2CD69D9500E70D46 /* Project object */; diff --git a/Loop.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/Loop.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 07cb199..a6a5281 100644 --- a/Loop.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/Loop.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "1cf24112c43ca282f74e9bee33293c91f9fe480fdd5774517b4d6178244b0066", + "originHash" : "4e2114e4dc2fe281cdfb92b9988c47726084643d6084712c3f60ca812fc12d4b", "pins" : [ { "identity" : "libgit2", @@ -10,6 +10,15 @@ "version" : "1.9.2" } }, + { + "identity" : "sparkle", + "kind" : "remoteSourceControl", + "location" : "https://github.com/sparkle-project/Sparkle.git", + "state" : { + "revision" : "6276ba2b404829d139c45ff98427cf90e2efc59b", + "version" : "2.9.2" + } + }, { "identity" : "swift-argument-parser", "kind" : "remoteSourceControl", diff --git a/LoopIOS/AgentHarness/AgentHarness.swift b/LoopIOS/AgentHarness/AgentHarness.swift index 58e6189..b9ff82f 100644 --- a/LoopIOS/AgentHarness/AgentHarness.swift +++ b/LoopIOS/AgentHarness/AgentHarness.swift @@ -108,7 +108,9 @@ final class AgentHarness { ("Notion", "Read and write Notion pages and databases", NotionSkill.tools), ("Slack", "Read channels/DMs/mentions, search, and send messages with confirmation", SlackSkill.tools), ("Scheduler", "Schedule reminders and recurring background tasks", SchedulerSkill.tools), + ("VM Agents", "Recurring agents that run on your SSH VM on a cron schedule and push results", VMCronSkill.tools), ("Web Search", "Search the web for up-to-date information", ExaSkill.tools), + ("Image Search", "Search the web for real photos and render them inline as a gallery", SerpImageSearchSkill.tools), ("URL Fetch", "Fetch and read a single web page (no API key)", URLFetchSkill.tools), ("Git", "Clone, pull, and check status of git repositories", GitSkill.tools), ("GitHub", "Review/merge/comment on PRs, open PRs and issues, browse repos and notifications", GitHubSkill.tools), @@ -116,6 +118,7 @@ final class AgentHarness { ("File System", "Browse and edit files in the workspace", FileSystemSkill.tools), ("Spec Builder", "Draft execution specs from a goal", SpecBuilderSkill.tools), ("Location", "Look up the device's current location", LocationSkill.tools + MapsSkill.tools), + ("Geocoding", "Convert an address or place name to lat/lon coordinates", GeocodingSkill.tools), ("Image", "Generate images from a text prompt", ImageSkill.tools), ("PDF", "Generate a clean, page-aware PDF from a markdown document", PDFSkill.tools), ("Obsidian", "Read and write the Obsidian vault", ObsidianSkill.tools), @@ -128,10 +131,20 @@ final class AgentHarness { ("Devin", "Dispatch coding tasks to Devin cloud agents (opens PRs, live transcript)", DevinSkill.tools), ("X (Twitter)", "Post tweets to X (Twitter) with confirmation", TwitterSkill.tools), ("SSH", "Execute shell commands on a remote host via SSH", SSHSkill.tools), + ("Google Drive", "Search, read, and create files in Google Drive", GoogleDriveSkill.tools), + ("Google Gmail", "Search, read, and send emails via Gmail", GoogleGmailSkill.tools), + ("Google Calendar", "List and create events on Google Calendar (REST API)", GoogleCalendarSkill.tools), + ("Feed Cards", "Generate visual feed cards (image or markdown poster)", CardSkill.tools), ] #if canImport(HealthKit) && os(iOS) catalog.append(("Apple Health", "Read-only access to steps, distance, workouts, heart rate, sleep, body mass", HealthSkill.tools)) #endif + #if os(iOS) || os(macOS) + catalog.append(("Stories", "Generate a 1080×1920 animated HTML story / infographic that renders as a tappable card", StorySkill.tools)) + #endif + #if os(iOS) + catalog.append(("Browse", "Drive a real WebKit browser on-device to render and navigate JS-heavy pages, with a live preview card + scrubbable replay", BrowseSkill.tools)) + #endif return catalog }() @@ -140,7 +153,9 @@ final class AgentHarness { NotionSkill.systemPromptFragment, SlackSkill.systemPromptFragment, SchedulerSkill.systemPromptFragment, + VMCronSkill.systemPromptFragment, ExaSkill.systemPromptFragment, + SerpImageSearchSkill.systemPromptFragment, URLFetchSkill.systemPromptFragment, GitSkill.systemPromptFragment, GitHubSkill.systemPromptFragment, @@ -149,6 +164,7 @@ final class AgentHarness { SpecBuilderSkill.systemPromptFragment, LocationSkill.systemPromptFragment, MapsSkill.systemPromptFragment, + GeocodingSkill.systemPromptFragment, ImageSkill.systemPromptFragment, PDFSkill.systemPromptFragment, ObsidianSkill.systemPromptFragment, @@ -162,10 +178,21 @@ final class AgentHarness { DevinSkill.systemPromptFragment, TwitterSkill.systemPromptFragment, SSHSkill.systemPromptFragment, + GoogleDriveSkill.systemPromptFragment, + GoogleGmailSkill.systemPromptFragment, + GoogleCalendarSkill.systemPromptFragment, + AgentMailSkill.systemPromptFragment, + CardSkill.systemPromptFragment, ] #if canImport(HealthKit) && os(iOS) fragments.append(HealthSkill.systemPromptFragment) #endif + #if os(iOS) || os(macOS) + fragments.append(StorySkill.systemPromptFragment) + #endif + #if os(iOS) + fragments.append(BrowseSkill.systemPromptFragment) + #endif self.toolsDoc = fragments.joined(separator: "\n\n") self.staticToolsDocLength = toolsDoc.count self.staticToolSchemasCount = toolSchemas.count @@ -321,6 +348,15 @@ final class AgentHarness { if !agents.isEmpty { sections.append("# AGENTS\n\(agents)") } if !heartbeat.isEmpty { sections.append("# HEARTBEAT\n\(heartbeat)") } if !toolsDoc.isEmpty { sections.append(toolsDoc) } + + // Layer 4 — anti-loop prompt injection. When the ToolCallGuard + // detects repeated tool calls, inject a strong system reminder + // telling the model to stop looping and use existing data. + if ToolCallGuard.shared.shouldInjectLoopReminder { + sections.append(ToolCallGuard.loopReminderPrompt) + ToolCallGuard.shared.consumeLoopReminder() + } + return sections.joined(separator: "\n\n") } @@ -394,6 +430,27 @@ final class AgentHarness { var rebuilt: [MessageStruct] = [MessageStruct(role: "system", content: composedSystem)] rebuilt.append(contentsOf: messages.filter { $0.role != "system" }) + // Per-turn vision fallback. If this request will send a raw image but + // the selected model can't see images (e.g. GLM 5.2 on Fireworks), + // route just this request to a vision-capable model — preferring the + // same provider, so GLM 5.2 falls back to Kimi K2.6 on the same key. + // Only the image-bearing turn is rerouted: by the next turn the image + // has been replaced with its text summary (VisionSummaryService), so + // the selected model picks the conversation back up on its own. + let selected = ModelSelectionStore.current + var routeProvider = selected.provider + var modelIDOverride: String? = nil + var modelStampOverride: String? = nil + if !selected.supportsVision, + Self.turnSendsRawImage(rebuilt), + let fallback = ModelSelection.visionCapableFallback(preferring: selected.provider), + fallback != selected { + routeProvider = fallback.provider + modelIDOverride = fallback.apiModelID + modelStampOverride = fallback.stampedMessageModel + print("AgentHarness: \(selected.displayName) can't see images; routing this image turn to \(fallback.displayName)") + } + // Hosted provider selected → talk straight to it with the user's own // key (Settings ▸ Keys). This deliberately bypasses the `Cloud` // backend: the open-source export ships `Cloud.url` as a placeholder, @@ -403,13 +460,13 @@ final class AgentHarness { // fallback is exactly what made a missing key look like "the agent // ignores my model settings". Apple is opt-in via Settings ▸ Model // (handled by the `.apple` branch above). - switch ModelSelectionStore.current.provider { + switch routeProvider { case .anthropic: - AnthropicChat.shared.chat(messages: rebuilt, tools: toolsToSend, onPartial: onPartial, completion: completion) + AnthropicChat.shared.chat(messages: rebuilt, tools: toolsToSend, modelIDOverride: modelIDOverride, modelStampOverride: modelStampOverride, onPartial: onPartial, completion: completion) case .openAI: - OpenAIChat.shared.chat(messages: rebuilt, tools: toolsToSend, onPartial: onPartial, completion: completion) + OpenAIChat.shared.chat(messages: rebuilt, tools: toolsToSend, modelIDOverride: modelIDOverride, modelStampOverride: modelStampOverride, onPartial: onPartial, completion: completion) case .fireworks: - FireworksChat.shared.chat(messages: rebuilt, tools: toolsToSend, onPartial: onPartial, completion: completion) + FireworksChat.shared.chat(messages: rebuilt, tools: toolsToSend, modelIDOverride: modelIDOverride, modelStampOverride: modelStampOverride, onPartial: onPartial, completion: completion) case .apple: // Unreachable — `.apple` returned via offlineRespond above. Kept // so the switch stays exhaustive if providers are added. @@ -417,6 +474,19 @@ final class AgentHarness { } } + /// True when this turn's context contains a ready image attachment that the + /// chat clients will send as a raw image block — i.e. it's the most recent + /// human turn, or it has no text description yet. Mirrors the downgrade + /// condition in `AnthropicChat.wirePayload` / `OpenAIChat.wireMessages`. + private static func turnSendsRawImage(_ messages: [MessageStruct]) -> Bool { + let lastUserId = messages.last { $0.role == "user" }?.id + for m in messages { + guard let f = m.fileAttachment, f.kind == .image, f.status == .ready else { continue } + if m.id == lastUserId || f.visionSummary == nil { return true } + } + return false + } + // MARK: - Offline path // // Slimmed-down responder for when the device is offline. No tools, no diff --git a/LoopIOS/AgentHarness/SkillDispatcher.swift b/LoopIOS/AgentHarness/SkillDispatcher.swift index b5efbaf..ec6fc6b 100644 --- a/LoopIOS/AgentHarness/SkillDispatcher.swift +++ b/LoopIOS/AgentHarness/SkillDispatcher.swift @@ -42,16 +42,43 @@ final class SkillDispatcher { /// result message (role: "function") exactly once, on whichever queue the /// underlying skill resolves on (typically the main queue — same as the /// chat-UI path). + /// + /// The call runs through `ToolCallGuard` first. If the guard detects a + /// duplicate / looping call it short-circuits with a synthetic error and + /// the real skill never executes — saving API quota and preventing the + /// model from burning tokens re-fetching data it already has. func dispatch(_ call: FunctionCallStruct, completion: @escaping (MessageStruct) -> Void) { + // --- Anti-loop guard (layers 1–3) --- + let verdict = ToolCallGuard.shared.evaluate(call: call) + switch verdict { + case .block(var result, _): + if result.callId == nil { result.callId = call.callId } + if result.name == nil { result.name = call.name } + completion(result) + return + case .allow: + break + } + + // Wrap the downstream completion so we can record the result for + // the guard's result-diff layer before handing it back to the caller. + let guardedCompletion: (MessageStruct) -> Void = { result in + ToolCallGuard.shared.recordResult( + call: call, + resultContent: result.content + ) + completion(result) + } + // Built-in skills first — direct references keep this branchless and // avoid the runtime-registration overhead for the common case. if NotionSkill.shared.handles(functionName: call.name) { - NotionSkill.shared.handle(functionCall: call, completion: completion); return + NotionSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if SlackSkill.shared.handles(functionName: call.name) { - SlackSkill.shared.handle(functionCall: call, completion: completion); return + SlackSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if SchedulerSkill.shared.handles(functionName: call.name) { // Scheduling from inside a scheduled-task run is disallowed at @@ -66,77 +93,119 @@ final class SkillDispatcher { )) return } - SchedulerSkill.shared.handle(functionCall: call, completion: completion); return + SchedulerSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if ExaSkill.shared.handles(functionName: call.name) { - ExaSkill.shared.handle(functionCall: call, completion: completion); return + ExaSkill.shared.handle(functionCall: call, completion: guardedCompletion); return + } + if SerpImageSearchSkill.shared.handles(functionName: call.name) { + SerpImageSearchSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if URLFetchSkill.shared.handles(functionName: call.name) { - URLFetchSkill.shared.handle(functionCall: call, completion: completion); return + URLFetchSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if GitSkill.shared.handles(functionName: call.name) { - GitSkill.shared.handle(functionCall: call, completion: completion); return + GitSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if GitHubSkill.shared.handles(functionName: call.name) { - GitHubSkill.shared.handle(functionCall: call, completion: completion); return + GitHubSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if SelfImprovementSkill.shared.handles(functionName: call.name) { - SelfImprovementSkill.shared.handle(functionCall: call, completion: completion); return + SelfImprovementSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if FileSystemSkill.shared.handles(functionName: call.name) { - FileSystemSkill.shared.handle(functionCall: call, completion: completion); return + FileSystemSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if SpecBuilderSkill.shared.handles(functionName: call.name) { - SpecBuilderSkill.shared.handle(functionCall: call, completion: completion); return + SpecBuilderSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if LocationSkill.shared.handles(functionName: call.name) { - LocationSkill.shared.handle(functionCall: call, completion: completion); return + LocationSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if MapsSkill.shared.handles(functionName: call.name) { - MapsSkill.shared.handle(functionCall: call, completion: completion); return + MapsSkill.shared.handle(functionCall: call, completion: guardedCompletion); return + } + if GeocodingSkill.shared.handles(functionName: call.name) { + GeocodingSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if ImageSkill.shared.handles(functionName: call.name) { - ImageSkill.shared.handle(functionCall: call, completion: completion); return + ImageSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if PDFSkill.shared.handles(functionName: call.name) { - PDFSkill.shared.handle(functionCall: call, completion: completion); return + PDFSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if ObsidianSkill.shared.handles(functionName: call.name) { - ObsidianSkill.shared.handle(functionCall: call, completion: completion); return + ObsidianSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if CalendarSkill.shared.handles(functionName: call.name) { - CalendarSkill.shared.handle(functionCall: call, completion: completion); return + CalendarSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if MusicSkill.shared.handles(functionName: call.name) { - MusicSkill.shared.handle(functionCall: call, completion: completion); return + MusicSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if SkillBuilderSkill.shared.handles(functionName: call.name) { - SkillBuilderSkill.shared.handle(functionCall: call, completion: completion); return + SkillBuilderSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if IntegrationSkill.shared.handles(functionName: call.name) { - IntegrationSkill.shared.handle(functionCall: call, completion: completion); return + IntegrationSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if NavigationSkill.shared.handles(functionName: call.name) { - NavigationSkill.shared.handle(functionCall: call, completion: completion); return + NavigationSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if CursorSkill.shared.handles(functionName: call.name) { - CursorSkill.shared.handle(functionCall: call, completion: completion); return + CursorSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if DevinSkill.shared.handles(functionName: call.name) { - DevinSkill.shared.handle(functionCall: call, completion: completion); return + DevinSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if TwitterSkill.shared.handles(functionName: call.name) { - TwitterSkill.shared.handle(functionCall: call, completion: completion); return + TwitterSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if SSHSkill.shared.handles(functionName: call.name) { - SSHSkill.shared.handle(functionCall: call, completion: completion); return + SSHSkill.shared.handle(functionCall: call, completion: guardedCompletion); return + } + if VMCronSkill.shared.handles(functionName: call.name) { + // Like SchedulerSkill, scheduling from inside a headless scheduled + // run is disallowed so a task can't spawn more scheduled work. + if BackgroundScheduler.shared.isRunningHeadless { + completion(MessageStruct( + role: "function", + content: "{\"status\":\"blocked\",\"message\":\"VM-agent tools are unavailable inside a scheduled task run.\"}", + name: call.name + )) + return + } + VMCronSkill.shared.handle(functionCall: call, completion: guardedCompletion); return } if MuniRealtimeSkill.shared.handles(functionName: call.name) { - MuniRealtimeSkill.shared.handle(functionCall: call, completion: completion); return + MuniRealtimeSkill.shared.handle(functionCall: call, completion: guardedCompletion); return + } + if CardSkill.shared.handles(functionName: call.name) { + CardSkill.shared.handle(functionCall: call, completion: completion); return + } + if GoogleDriveSkill.shared.handles(functionName: call.name) { + GoogleDriveSkill.shared.handle(functionCall: call, completion: guardedCompletion); return + } + if GoogleGmailSkill.shared.handles(functionName: call.name) { + GoogleGmailSkill.shared.handle(functionCall: call, completion: guardedCompletion); return + } + if GoogleCalendarSkill.shared.handles(functionName: call.name) { + GoogleCalendarSkill.shared.handle(functionCall: call, completion: guardedCompletion); return + } + if AgentMailSkill.shared.handles(functionName: call.name) { + AgentMailSkill.shared.handle(functionCall: call, completion: completion); return } #if canImport(HealthKit) && os(iOS) if HealthSkill.shared.handles(functionName: call.name) { - HealthSkill.shared.handle(functionCall: call, completion: completion); return + HealthSkill.shared.handle(functionCall: call, completion: guardedCompletion); return + } + #endif + #if os(iOS) + if StorySkill.shared.handles(functionName: call.name) { + StorySkill.shared.handle(functionCall: call, completion: guardedCompletion); return + } + if BrowseSkill.shared.handles(functionName: call.name) { + BrowseSkill.shared.handle(functionCall: call, completion: completion); return } #endif @@ -146,14 +215,14 @@ final class SkillDispatcher { let snapshot = registered lock.unlock() for entry in snapshot where entry.handles(call.name) { - entry.handle(call, completion) + entry.handle(call, guardedCompletion) return } // Dynamic (user-authored JS) skills last — they're hot-loaded so the // registry is the source of truth for what's currently available. if DynamicSkillRegistry.shared.handles(functionName: call.name) { - DynamicSkillRegistry.shared.handle(functionCall: call, completion: completion) + DynamicSkillRegistry.shared.handle(functionCall: call, completion: guardedCompletion) return } @@ -161,7 +230,7 @@ final class SkillDispatcher { // namespaced `__` so they can't collide with anything // checked above. if MCPRegistry.shared.handles(functionName: call.name) { - MCPRegistry.shared.handle(functionCall: call, completion: completion) + MCPRegistry.shared.handle(functionCall: call, completion: guardedCompletion) return } diff --git a/LoopIOS/AgentHarness/ToolCallGuard.swift b/LoopIOS/AgentHarness/ToolCallGuard.swift new file mode 100644 index 0000000..66abc53 --- /dev/null +++ b/LoopIOS/AgentHarness/ToolCallGuard.swift @@ -0,0 +1,258 @@ +// +// ToolCallGuard.swift +// Loop +// +// Anti-loop / duplicate tool-call guard. Sits between the model's tool +// requests and the actual skill dispatch, catching models (Kimi K2 and +// others) that emit the same call in a tight loop and burning tokens + +// API quota for no reason. +// +// Four layers of protection (all configurable): +// +// 1. **Dedup guard** — tracks the last N calls (name + normalized args). +// If a duplicate is detected, returns a synthetic error immediately +// without executing the tool. +// +// 2. **Repeat counter** — hard-caps identical calls at `maxRepeats`. On +// the Nth repeat the turn is aborted with a warning surfaced to the +// user. +// +// 3. **Result diff check** — if a tool returned the exact same payload +// twice in a row for the same (name, args) signature, short-circuits +// the next identical call with a system-level reminder. +// +// 4. **Prompt injection** — when any layer trips, sets a flag that the +// harness reads on the next `buildSystemPrompt` call to inject a +// strong "stop repeating" system reminder into the conversation +// context. +// +// Thread-safe: all mutable state is protected by `NSLock`. The guard is +// singleton per process — all execution paths (MessagingVC, SubAgentRuntime, +// BackgroundScheduler) share the same history so a loop that spans paths +// is still caught. +// + +import Foundation + +final class ToolCallGuard { + + static let shared = ToolCallGuard() + + // MARK: - Configuration + + struct Config { + /// How many recent calls to remember for dedup detection. + var historyWindow: Int = 5 + /// Maximum times the same (name, args) can repeat before hard-abort. + var maxRepeats: Int = 3 + /// Enable/disable each layer independently. + var deduplicationEnabled: Bool = true + var repeatCounterEnabled: Bool = true + var resultDiffEnabled: Bool = true + var promptInjectionEnabled: Bool = true + } + + var config = Config() + + // MARK: - Outcome + + /// What the guard decided about a given call. + enum Verdict { + /// Call is allowed — proceed to the real skill dispatch. + case allow + /// Call is blocked — return the associated synthetic `MessageStruct` + /// to the model instead of executing the tool. The `loopDetected` + /// flag indicates whether prompt injection should fire. + case block(result: MessageStruct, loopDetected: Bool) + } + + // MARK: - Internal state + + /// Fingerprint of a single tool call: name + deterministic JSON of args. + private struct CallSignature: Hashable { + let name: String + let normalizedArgs: String + } + + private struct HistoryEntry { + let signature: CallSignature + let resultContent: String? + } + + /// Ring buffer of recent calls, newest at the end. + private var history: [HistoryEntry] = [] + + /// Running repeat count per signature within the current conversation + /// turn sequence. Reset when `resetHistory()` is called (e.g. on a new + /// user message). + private var repeatCounts: [CallSignature: Int] = [:] + + /// Last result content per signature, for the result-diff check. + private var lastResults: [CallSignature: String] = [:] + + /// When true, the next `buildSystemPrompt` should inject the anti-loop + /// reminder. Cleared after injection. + private(set) var shouldInjectLoopReminder: Bool = false + + /// The anti-loop system message injected when a loop is detected. + static let loopReminderPrompt: String = """ + ⚠️ LOOP DETECTED — STOP REPEATING TOOL CALLS. + + You have called the same tool with the same arguments multiple times \ + and received the same result. This is wasteful and unproductive. + + INSTRUCTIONS: + 1. Do NOT call that tool again with the same arguments. + 2. Use the data you already have to answer the user's question. + 3. If the data is insufficient, try a DIFFERENT tool or DIFFERENT arguments. + 4. If no alternative exists, tell the user what you found and ask for guidance. + + Continuing to loop will cause your turn to be terminated. + """ + + private let lock = NSLock() + + private init() {} + + // MARK: - Public API + + /// Evaluate a tool call before dispatch. Returns `.allow` if the call + /// should proceed, or `.block(result:loopDetected:)` with a synthetic + /// error message if the guard intervenes. + func evaluate(call: FunctionCallStruct) -> Verdict { + lock.lock() + defer { lock.unlock() } + + let sig = signature(for: call) + + // Bump the repeat counter for this signature. + let count = (repeatCounts[sig] ?? 0) + 1 + repeatCounts[sig] = count + + // --- Layer 2: Repeat counter (hard cap) --- + if config.repeatCounterEnabled && count > config.maxRepeats { + shouldInjectLoopReminder = config.promptInjectionEnabled + let msg = MessageStruct( + role: "function", + content: """ + {"status":"error","error":"LOOP ABORTED: You have called \ + \\(call.name) with these exact arguments \(count) times. \ + Your turn is being terminated. Stop repeating and use the \ + data you already have."} + """, + name: call.name + ) + return .block(result: msg, loopDetected: true) + } + + // --- Layer 1: Dedup guard (recent window) --- + if config.deduplicationEnabled && count > 1 { + // The call appeared before in the current sequence. Check + // whether it's within the sliding window. + let windowStart = max(0, history.count - config.historyWindow) + let recentSigs = history[windowStart...].map(\.signature) + if recentSigs.contains(sig) { + shouldInjectLoopReminder = config.promptInjectionEnabled + let msg = MessageStruct( + role: "function", + content: """ + {"status":"error","error":"You already called \ + \\(call.name) with these arguments. Stop looping. \ + Use the result you already received."} + """, + name: call.name + ) + return .block(result: msg, loopDetected: true) + } + } + + // --- Layer 3: Result diff check --- + if config.resultDiffEnabled, let lastResult = lastResults[sig] { + // We'll check after dispatch — but if this is the 2nd+ call + // AND the last result is known, we can preemptively block when + // the model hasn't yet seen a different result. + if count > 1 { + shouldInjectLoopReminder = config.promptInjectionEnabled + let msg = MessageStruct( + role: "function", + content: """ + {"status":"error","error":"You are repeating tool \ + calls and getting the same result. Stop. Use what \ + you already have. Previous result was: \ + \(String(lastResult.prefix(200)))"} + """, + name: call.name + ) + return .block(result: msg, loopDetected: true) + } + } + + // Allowed — record in history. + history.append(HistoryEntry(signature: sig, resultContent: nil)) + trimHistory() + + return .allow + } + + /// Record the result of a tool call that was allowed through. Used by + /// the result-diff layer on subsequent calls. + func recordResult(call: FunctionCallStruct, resultContent: String) { + lock.lock() + defer { lock.unlock() } + let sig = signature(for: call) + lastResults[sig] = resultContent + } + + /// Clear the loop-reminder flag after it has been consumed by the + /// system prompt builder. + func consumeLoopReminder() { + lock.lock() + defer { lock.unlock() } + shouldInjectLoopReminder = false + } + + /// Reset all tracking state. Call this when a new user message arrives + /// so prior tool-call patterns from a different turn don't bleed into + /// the new one. The per-conversation reset is important because the + /// model might legitimately re-use a tool in a different conversational + /// context. + func resetForNewTurn() { + lock.lock() + defer { lock.unlock() } + history.removeAll() + repeatCounts.removeAll() + lastResults.removeAll() + shouldInjectLoopReminder = false + } + + // MARK: - Helpers + + private func signature(for call: FunctionCallStruct) -> CallSignature { + CallSignature( + name: call.name, + normalizedArgs: normalizeArgs(call.arguments) + ) + } + + /// Deterministic JSON serialization of the arguments dictionary so that + /// `{a:1, b:2}` and `{b:2, a:1}` produce the same string. + private func normalizeArgs(_ args: [String: Any]) -> String { + guard !args.isEmpty else { return "{}" } + // JSONSerialization with .sortedKeys gives us a canonical form. + if let data = try? JSONSerialization.data( + withJSONObject: args, + options: [.sortedKeys] + ), let str = String(data: data, encoding: .utf8) { + return str + } + // Fallback: sort keys manually and concatenate. + return args.keys.sorted().map { "\($0)=\(args[$0] ?? "nil")" }.joined(separator: "&") + } + + private func trimHistory() { + let maxEntries = config.historyWindow * 2 + if history.count > maxEntries { + history.removeFirst(history.count - maxEntries) + } + } +} diff --git a/LoopIOS/AgentHarness/ToolRouter.swift b/LoopIOS/AgentHarness/ToolRouter.swift index ae833f3..40f965c 100644 --- a/LoopIOS/AgentHarness/ToolRouter.swift +++ b/LoopIOS/AgentHarness/ToolRouter.swift @@ -91,6 +91,7 @@ struct ToolRouter { "website", "browse", "fetch", ], toolNames: [ "exa_search", "exa_get_contents", "exa_list_websets", "fetch_url", + "image_search", ]), SkillGroup(name: "health", keywords: [ "health", "steps", "workout", "heart rate", "sleep", @@ -115,8 +116,14 @@ struct ToolRouter { ]), SkillGroup(name: "image", keywords: [ "image", "picture", "photo", "generate image", "draw", - "illustration", "dalle", "dall-e", + "illustration", "dalle", "dall-e", "find image", "find me image", + "show me", "real photo", "pics", "pictures of", "photos of", ], toolNames: [ + // Both the web image search and the AI generator are surfaced for + // any image-ish request. The system prompt steers the model to + // prefer image_search for real/existing subjects (cheap) and only + // use generate_image for invented art (expensive). + "image_search", "generate_image", ]), SkillGroup(name: "pdf", keywords: [ @@ -124,6 +131,20 @@ struct ToolRouter { ], toolNames: [ "generate_pdf", ]), + SkillGroup(name: "story", keywords: [ + "story", "stories", "recap", "infographic", "summary card", + "visual summary", "wrapped", "highlight reel", + ], toolNames: [ + "generate_story", + ]), + SkillGroup(name: "browse", keywords: [ + "browse", "open the", "go to", "visit", "navigate to", "website", + "web page", "webpage", "homepage", "home page", "render", + "what's on", "whats on", "check the", "look at the page", + "load the", "the site", "click through", "screenshot the", + ], toolNames: [ + "browse", + ]), SkillGroup(name: "twitter", keywords: [ "tweet", "twitter", "x.com", "post tweet", ], toolNames: [ @@ -149,6 +170,18 @@ struct ToolRouter { ], toolNames: [ "muni_arrivals", ]), + SkillGroup(name: "email", keywords: [ + "email", "e-mail", "mail", "send it to", "send this to", + "inbox", "gmail", "agentmail", "send me", + ], toolNames: [ + "agent_mail", "google_gmail", + ]), + SkillGroup(name: "card", keywords: [ + "card", "feed", "generate a card", "summary card", + "recap", "visual card", "poster", + ], toolNames: [ + "generate_card", + ]), ] /// Tools always included regardless of message content. These are needed @@ -166,6 +199,8 @@ struct ToolRouter { // Scheduler "schedule_task", "schedule_cron", "list_tasks", "list_crons", "delete_task", "delete_cron", "run_task_now", + // VM cron agents (recurring jobs on the SSH VM) + "schedule_vm_agent", "list_vm_agents", "delete_vm_agent", // Integration "list_integrations", "connect_integration", "set_api_key", "list_api_keys", "open_integration_settings", @@ -175,6 +210,11 @@ struct ToolRouter { "save_skill", "list_skills", "delete_skill", // Spec builder "publish_spec_to_notion", + // Browse — the internal per-step action tool used by BrowseSession's + // nested model loop. Kept here so tool-routing on those nested calls + // never strips it (the page-state observations would otherwise match + // an unrelated skill group and filter browse_action out). + "browse_action", ] // MARK: - Selection diff --git a/LoopIOS/AgentView/AgentLargeVC.swift b/LoopIOS/AgentView/AgentLargeVC.swift index f75e1f8..9d0812f 100644 --- a/LoopIOS/AgentView/AgentLargeVC.swift +++ b/LoopIOS/AgentView/AgentLargeVC.swift @@ -81,6 +81,13 @@ final class AgentLargeVC: UIViewController { } @objc private func handlePan(_ pan: UIPanGestureRecognizer) { + // Transform the CONTENT view (`agentView`), not the controller's root + // `view`, and measure translation against the root (which never moves, + // so it's a stable reference). Transforming the constrained child-VC + // root view directly doesn't render during the drag — the move only + // appears on release — which is exactly the "animates on release, not + // during the drag" bug. Sliding the pinned content subview instead + // renders live, mirroring the working ImageViewerVC dismiss. let translation = pan.translation(in: view) switch pan.state { case .changed: @@ -89,11 +96,11 @@ final class AgentLargeVC: UIViewController { let dy = max(0, translation.y) // Rubber-band — feels increasingly resistant the farther you drag. let damped = dy < dismissThreshold ? dy : dismissThreshold + (dy - dismissThreshold) * 0.4 - view.transform = CGAffineTransform(translationX: 0, y: damped) + agentView.transform = CGAffineTransform(translationX: 0, y: damped) // Fade out as the user drags so the dismiss feels imminent before // the threshold trips. let progress = min(1, dy / 300) - view.alpha = 1 - 0.4 * progress + agentView.alpha = 1 - 0.4 * progress case .ended, .cancelled: if translation.y > dismissThreshold { if let onDismiss = onDismiss { @@ -107,8 +114,8 @@ final class AgentLargeVC: UIViewController { usingSpringWithDamping: 0.75, initialSpringVelocity: 0.4, options: [.allowUserInteraction]) { - self.view.transform = .identity - self.view.alpha = 1 + self.agentView.transform = .identity + self.agentView.alpha = 1 } } default: diff --git a/LoopIOS/AgentView/AgentLargeView.swift b/LoopIOS/AgentView/AgentLargeView.swift index 8453cab..8eae92f 100644 --- a/LoopIOS/AgentView/AgentLargeView.swift +++ b/LoopIOS/AgentView/AgentLargeView.swift @@ -336,6 +336,24 @@ final class AgentLargeView: UIView { tickerMask.frame = tickerContainer.bounds } + /// Toggle everything except the backdrop and the orb. Used by the present + /// transition to hold the labels/pill/ticker back until the orb has flown + /// into place — otherwise the chrome lands at full opacity before the orb + /// arrives, which reads as the labels "showing up early." The orb itself + /// is driven separately by `AvatarPopAnimator`. + func setChromeHidden(_ hidden: Bool, animated: Bool) { + let chrome: [UIView] = [ + statusLabel, dismissHint, muteButton, + voicePill, tickerContainer, subAgentScroll, + ] + let apply = { chrome.forEach { $0.alpha = hidden ? 0 : 1 } } + if animated { + UIView.animate(withDuration: 0.25, animations: apply) + } else { + apply() + } + } + // MARK: - Voice pill /// Pump the current voice coordinator state + held flag into the pill's diff --git a/LoopIOS/AppDelegate.swift b/LoopIOS/AppDelegate.swift index 8dc64a9..a2a9afc 100644 --- a/LoopIOS/AppDelegate.swift +++ b/LoopIOS/AppDelegate.swift @@ -47,6 +47,10 @@ class AppDelegate: UIResponder, UIApplicationDelegate { // it likewise must run before this method returns. OpenClawMessagePoller.shared.bootstrap() + // VM cron poller — backstop catch-up for recurring VM agents whose + // completion push was missed. + VMCronPoller.shared.bootstrap() + // APNs registration hooks (inert by default): only registers if the user // has already authorized notifications, so no new prompt. The VM-side // sender that would push on agent completion is not built yet. @@ -106,7 +110,20 @@ extension AppDelegate: UNUserNotificationCenterDelegate { // Bookkeeping — bounded jobs decrement, unbounded re-arm. BackgroundScheduler.shared.notificationDidFire(jobId: jobId) } - // Runner turn/job notifications — no bookkeeping needed, just display. + + // A runner completion push that arrives while the app is foregrounded: + // reconcile the reply straight into its conversation and suppress the + // banner (it's now visible in the chat). Falls back to a banner if there's + // nothing to apply (e.g. couldn't reach the runner to fetch the turn). + if (userInfo["type"] as? String) == "runner_turn" { + Task { + let (_, reconciled) = await self.reconcileRunnerTurn(userInfo) + completionHandler(reconciled ? [] : [.banner, .sound, .list]) + } + return + } + + // Runner job notifications + everything else — just display. completionHandler([.banner, .sound, .list]) } @@ -131,8 +148,19 @@ extension AppDelegate: UNUserNotificationCenterDelegate { return } - // Runner notifications — open the app; the turn id is in userInfo - // for future deep-linking. For now just foreground the app. + // Runner turn completion — reconcile the reply into its conversation and + // open it. Works cold (the app may have been killed before the push). + if (userInfo["type"] as? String) == "runner_turn" { + Task { + let (convId, _) = await self.reconcileRunnerTurn(userInfo) + if let convId { + await MainActor.run { self.openPrefetchedConversation(id: convId) } + } + } + return + } + + // Other runner notifications (jobs) — just foreground. if LoopRunnerPoller.isRunnerNotification(userInfo) { return } @@ -156,13 +184,43 @@ extension AppDelegate: UNUserNotificationCenterDelegate { } } + // MARK: - Runner turn reconciliation + + /// Resolve a completed runner turn into its conversation, exactly once. + /// Fetches the turn's final response (the push body is only a truncated + /// preview) and writes it via the deduped `RunnerTurnApplier`. Returns the + /// conversation id (for opening on tap) and whether the reply is now in the + /// chat (so a foreground push can suppress its redundant banner). + private func reconcileRunnerTurn(_ userInfo: [AnyHashable: Any]) async -> (conversationId: String?, reconciled: Bool) { + guard let turnId = userInfo["turn_id"] as? String else { return (nil, false) } + let convId = (userInfo["conversation_id"] as? String) ?? RunnerTurnApplier.conversationId(forTurn: turnId) + + if RunnerTurnApplier.isApplied(turnId: turnId) { + return (convId, true) + } + // The one-shot delivers the reply text in the push payload (flattened to + // top-level userInfo by the backend) — no fetch needed. + guard let text = userInfo["text"] as? String, !text.isEmpty else { + return (convId, false) + } + let applied = RunnerTurnApplier.applyRunnerTurn(turnId: turnId, conversationId: convId, text: text) + return (convId, applied || RunnerTurnApplier.isApplied(turnId: turnId)) + } + // MARK: - Tap routing helpers private func openPrefetchedConversation(id: String) { + // Cold-start tap (app was killed when the cron/runner push is tapped): + // MessagingVC isn't in the window yet and/or the store hasn't hydrated, + // so both lookups below return nil. Stash the request so MessagingVC + // honors it once it's ready (viewDidLoad / store-ready drain). guard let messagingVC = Self.findMessagingVC(), - let conv = SimpleConversationManager.shared.getConversation(by: id) else { return } + let conv = SimpleConversationManager.shared.getConversation(by: id) else { + PendingConversationOpen.shared.set(id) + return + } DispatchQueue.main.async { - messagingVC.loadConversation(conv) + messagingVC.openConversationFromNotification(conv) } } diff --git a/LoopIOS/AppFlags.swift b/LoopIOS/AppFlags.swift new file mode 100644 index 0000000..e18a08a --- /dev/null +++ b/LoopIOS/AppFlags.swift @@ -0,0 +1,40 @@ +// +// AppFlags.swift +// Loop +// +// Build-time feature flags sourced from Secrets.xcconfig via Info.plist +// (`$(LOOP_FLAG)` substitution), mirroring how KeyStore reads bundled API +// keys. Central, read-only home for flags that change app behavior — add a +// static here and read `AppFlags.` anywhere. (LoopIOS/ is compiled into +// the iOS, Mac, and Vision targets, so these are available on all platforms.) +// + +import Foundation + +enum AppFlags { + + /// Raw `LOOP_FLAG` value from the build config, or nil when unset. Empty or + /// an unexpanded `$(LOOP_FLAG)` placeholder (xcconfig key left blank) count + /// as unset. Most callers want `isManaged`. + static var loopFlag: String? { string(for: "LOOP_FLAG") } + + /// True when the app is in *managed* mode: the execution backend is fixed + /// by the build, so backend-switching UI (e.g. the navbar indicator) shows + /// "Managed" and isn't user-changeable. Set whenever `LOOP_FLAG` is present + /// in Secrets.xcconfig; absent → the app behaves normally. + static var isManaged: Bool { loopFlag != nil } + + /// Label shown in place of the backend name when managed. + static let managedLabel = "Managed" + + // MARK: - Info.plist reader + + /// A non-empty Info.plist string, treating an unexpanded `$(VAR)` + /// placeholder as missing — same rule KeyStore uses for API keys. + private static func string(for key: String) -> String? { + guard let raw = Bundle.main.object(forInfoDictionaryKey: key) as? String else { return nil } + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed.isEmpty || trimmed.hasPrefix("$(") { return nil } + return trimmed + } +} diff --git a/LoopIOS/AppIntents/AskLoopIntent.swift b/LoopIOS/AppIntents/AskLoopIntent.swift new file mode 100644 index 0000000..9ad5dfa --- /dev/null +++ b/LoopIOS/AppIntents/AskLoopIntent.swift @@ -0,0 +1,54 @@ +// +// AskLoopIntent.swift +// Loop +// +// App Intent that lets Siri / Shortcuts send a natural-language query to +// Loop. Opens the app and injects the query into the agent harness's chat. +// +// Usage: "Hey Siri, ask Loop what's on my calendar" +// + +import AppIntents +import UIKit + +@available(iOS 27.0, *) +struct AskLoopIntent: AppIntent { + + static var title: LocalizedStringResource = "Ask Loop" + static var description = IntentDescription( + "Send a question or request to Loop and get a response.", + categoryName: "Loop AI" + ) + + static var openAppWhenRun = true + + @Parameter(title: "Question", description: "The question or request to send to Loop.") + var query: String + + func perform() async throws -> some IntentResult & ProvidesDialog { + guard let sceneDelegate = await currentSceneDelegate() else { + return .result(dialog: "Loop isn't ready yet. Please open the app and try again.") + } + + // Post the query into the chat pipeline on the main thread. The + // notification carries the text; MessagingVC picks it up via + // `handleIntentMessage(_:)` and feeds it through `didSendMessageText`. + await MainActor.run { + NotificationCenter.default.post( + name: .loopIntentMessageReceived, + object: nil, + userInfo: ["message": query] + ) + } + + return .result(dialog: "Asking Loop: \(query)") + } + + @MainActor + private func currentSceneDelegate() -> SceneDelegate? { + UIApplication.shared.connectedScenes + .compactMap { $0 as? UIWindowScene } + .first? + .delegate as? SceneDelegate + } +} diff --git a/LoopIOS/AppIntents/CaptureToLoopIntent.swift b/LoopIOS/AppIntents/CaptureToLoopIntent.swift new file mode 100644 index 0000000..c9027f6 --- /dev/null +++ b/LoopIOS/AppIntents/CaptureToLoopIntent.swift @@ -0,0 +1,65 @@ +// +// CaptureToLoopIntent.swift +// Loop +// +// App Intent that captures the user's current on-screen content description +// and sends it to Loop as a new chat message. On iOS 27 Siri can read what's +// on screen and pass it through the intent's String parameter. +// +// Usage: "Hey Siri, send this to Loop" +// + +import AppIntents +import UIKit + +@available(iOS 27.0, *) +struct CaptureToLoopIntent: AppIntent { + + static var title: LocalizedStringResource = "Send to Loop" + static var description = IntentDescription( + "Capture what's on screen and send it to Loop as a new chat message.", + categoryName: "Loop AI" + ) + + static var openAppWhenRun = true + + @Parameter(title: "Content", description: "The on-screen content to send to Loop.") + var content: String? + + func perform() async throws -> some IntentResult & ProvidesDialog { + let text = content ?? "" + + guard !text.isEmpty else { + return .result(dialog: "There was no content to send to Loop.") + } + + guard let sceneDelegate = await currentSceneDelegate() else { + return .result(dialog: "Loop isn't ready yet. Please open the app and try again.") + } + + // Wrap the captured content so the agent knows it came from an + // on-screen capture, not free-form dictation. + let wrappedMessage = """ + [Captured from screen] + \(text) + """ + + await MainActor.run { + NotificationCenter.default.post( + name: .loopIntentMessageReceived, + object: nil, + userInfo: ["message": wrappedMessage] + ) + } + + return .result(dialog: "Sent to Loop.") + } + + @MainActor + private func currentSceneDelegate() -> SceneDelegate? { + UIApplication.shared.connectedScenes + .compactMap { $0 as? UIWindowScene } + .first? + .delegate as? SceneDelegate + } +} diff --git a/LoopIOS/AppIntents/IntentNotifications.swift b/LoopIOS/AppIntents/IntentNotifications.swift new file mode 100644 index 0000000..8aa1a0c --- /dev/null +++ b/LoopIOS/AppIntents/IntentNotifications.swift @@ -0,0 +1,17 @@ +// +// IntentNotifications.swift +// Loop +// +// Notification names shared between App Intents and the main chat UI. +// Intents post these; MessagingVC subscribes and injects the payload +// into the conversation. +// + +import Foundation + +extension Notification.Name { + /// Posted by AskLoopIntent / CaptureToLoopIntent / SearchLoopIntent when + /// they want to inject a user message into the active conversation. + /// `userInfo["message"]` carries the String payload. + static let loopIntentMessageReceived = Notification.Name("loopIntentMessageReceived") +} diff --git a/LoopIOS/AppIntents/LoopConversationEntity.swift b/LoopIOS/AppIntents/LoopConversationEntity.swift new file mode 100644 index 0000000..9d8f1d7 --- /dev/null +++ b/LoopIOS/AppIntents/LoopConversationEntity.swift @@ -0,0 +1,82 @@ +// +// LoopConversationEntity.swift +// Loop +// +// AppEntity + IndexedEntity representation of a Loop conversation so Siri +// and Spotlight can search across them. +// +// "What did Loop say about my Scout meeting?" — Siri matches the query +// against the indexed title and content description fields and surfaces +// matching conversations. +// + +import AppIntents +import CoreSpotlight +import Foundation + +// MARK: - Conversation Entity + +@available(iOS 27.0, *) +struct LoopConversationEntity: AppEntity, IndexedEntity { + + static var typeDisplayRepresentation: TypeDisplayRepresentation { + TypeDisplayRepresentation( + name: "Loop Conversation", + numericFormat: "\(placeholder: .int) conversations" + ) + } + + static var defaultQuery = LoopConversationQuery() + + var id: String + var title: String + var lastMessage: String + var updatedAt: Date + + var displayRepresentation: DisplayRepresentation { + DisplayRepresentation( + title: "\(title)", + subtitle: "\(lastMessage)" + ) + } + + /// Spotlight-indexable attributes so conversations surface in system search. + var attributeSet: CSSearchableItemAttributeSet? { + let attrs = CSSearchableItemAttributeSet() + attrs.displayName = title + attrs.contentDescription = lastMessage + return attrs + } +} + +// MARK: - Conversation Query + +@available(iOS 27.0, *) +struct LoopConversationQuery: EntityQuery { + + func entities(for identifiers: [String]) async throws -> [LoopConversationEntity] { + let manager = SimpleConversationManager.shared + return identifiers.compactMap { id in + guard let conv = manager.getConversation(by: id) else { return nil } + return LoopConversationEntity(from: conv) + } + } + + func suggestedEntities() async throws -> [LoopConversationEntity] { + let all = SimpleConversationManager.shared.getAllConversations() + let sorted = all.sorted { $0.updatedAt > $1.updatedAt } + return sorted.prefix(20).map { LoopConversationEntity(from: $0) } + } +} + +// MARK: - Convenience initializer + +@available(iOS 27.0, *) +extension LoopConversationEntity { + init(from conversation: SimpleConversation) { + self.id = conversation.id + self.title = conversation.title + self.lastMessage = conversation.messages.last?.content ?? "" + self.updatedAt = conversation.updatedAt + } +} diff --git a/LoopIOS/AppIntents/LoopNoteEntity.swift b/LoopIOS/AppIntents/LoopNoteEntity.swift new file mode 100644 index 0000000..e4779b8 --- /dev/null +++ b/LoopIOS/AppIntents/LoopNoteEntity.swift @@ -0,0 +1,115 @@ +// +// LoopNoteEntity.swift +// Loop +// +// AppEntity + IndexedEntity for Obsidian notes managed through Loop, making +// them searchable via Siri and Spotlight. +// +// "What did Loop say about my Scout meeting?" — Siri matches against +// the indexed note title and snippet. +// + +import AppIntents +import CoreSpotlight +import Foundation + +// MARK: - Note Entity + +@available(iOS 27.0, *) +struct LoopNoteEntity: AppEntity, IndexedEntity { + + static var typeDisplayRepresentation: TypeDisplayRepresentation { + TypeDisplayRepresentation( + name: "Loop Note", + numericFormat: "\(placeholder: .int) notes" + ) + } + + static var defaultQuery = LoopNoteQuery() + + /// Vault-relative path (doubles as the stable identifier). + var id: String + var title: String + var snippet: String + + var displayRepresentation: DisplayRepresentation { + DisplayRepresentation( + title: "\(title)", + subtitle: "\(snippet)" + ) + } + + /// Spotlight-indexable attributes. + var attributeSet: CSSearchableItemAttributeSet? { + let attrs = CSSearchableItemAttributeSet() + attrs.displayName = title + attrs.contentDescription = snippet + return attrs + } +} + +// MARK: - Note Query + +@available(iOS 27.0, *) +struct LoopNoteQuery: EntityQuery { + + func entities(for identifiers: [String]) async throws -> [LoopNoteEntity] { + guard ObsidianClient.isConfigured else { return [] } + + var results: [LoopNoteEntity] = [] + for path in identifiers { + if let entity = try? await fetchNote(path: path) { + results.append(entity) + } + } + return results + } + + func suggestedEntities() async throws -> [LoopNoteEntity] { + guard ObsidianClient.isConfigured else { return [] } + + return await withCheckedContinuation { continuation in + ObsidianClient.shared.today { json, error in + guard let todayPath = json?["path"] as? String else { + continuation.resume(returning: []) + return + } + ObsidianClient.shared.listFolder(path: todayPath) { listJSON, listError in + guard let files = listJSON?["files"] as? [[String: Any]] else { + continuation.resume(returning: []) + return + } + let entities = files.compactMap { file -> LoopNoteEntity? in + guard let name = file["name"] as? String, + let path = file["path"] as? String else { return nil } + return LoopNoteEntity( + id: path, + title: name.replacingOccurrences(of: ".md", with: ""), + snippet: "" + ) + } + continuation.resume(returning: entities) + } + } + } + } + + /// Fetch a single note by path from the Obsidian relay. + private func fetchNote(path: String) async throws -> LoopNoteEntity { + return try await withCheckedThrowingContinuation { continuation in + ObsidianClient.shared.readNote(path: path) { json, error in + if let error = error { + continuation.resume(throwing: error) + return + } + let content = (json?["content"] as? String) ?? "" + let title = path.components(separatedBy: "/").last? + .replacingOccurrences(of: ".md", with: "") ?? path + let snippet = String(content.prefix(200)) + continuation.resume(returning: LoopNoteEntity( + id: path, title: title, snippet: snippet + )) + } + } + } +} diff --git a/LoopIOS/AppIntents/LoopRememberIntent.swift b/LoopIOS/AppIntents/LoopRememberIntent.swift new file mode 100644 index 0000000..110eeac --- /dev/null +++ b/LoopIOS/AppIntents/LoopRememberIntent.swift @@ -0,0 +1,99 @@ +// +// LoopRememberIntent.swift +// Loop +// +// App Intent that appends a quick note to today's Obsidian daily note via +// the existing Obsidian skill / relay. Runs without opening the app so the +// user can capture a thought from any context. +// +// Usage: "Hey Siri, remember to buy dog food for Leo" +// + +import AppIntents +import Foundation + +@available(iOS 27.0, *) +struct LoopRememberIntent: AppIntent { + + static var title: LocalizedStringResource = "Remember with Loop" + static var description = IntentDescription( + "Save a quick note to today's Obsidian daily note via Loop.", + categoryName: "Loop AI" + ) + + /// This intent can run in the background — no need to open the app just + /// to append a line to a note. + static var openAppWhenRun = false + + @Parameter(title: "Note", description: "What you want Loop to remember.") + var note: String + + func perform() async throws -> some IntentResult & ProvidesDialog { + guard ObsidianClient.isConfigured else { + return .result(dialog: "Obsidian isn't connected yet. Open Loop → Settings → Integrations to set it up.") + } + + // Append the note to today's Obsidian daily note. The relay's + // `update_obsidian_note` with mode "append" would work, but we + // first need today's path. Use the convenience `createTodayNote` + // which auto-files into the right day folder; if a note with the + // same title already exists the relay returns the existing path, + // so we fall back to an append update. + let timestamp = LoopRememberIntent.shortTimestamp() + let line = "- \(timestamp) \(note)" + + let result: String = try await withCheckedThrowingContinuation { continuation in + // Try appending to the existing "Quick Notes" today-note first. + ObsidianClient.shared.today { todayJSON, todayError in + guard let todayPath = todayJSON?["path"] as? String else { + // Can't resolve today's folder — create a fresh note. + ObsidianClient.shared.createTodayNote( + title: "Quick Notes", + content: line + ) { json, error in + if let error = error { + continuation.resume(throwing: error) + } else { + let path = (json?["path"] as? String) ?? "today's folder" + continuation.resume(returning: path) + } + } + return + } + + let notePath = todayPath + "/Quick Notes.md" + ObsidianClient.shared.updateNote( + path: notePath, + content: "\n\(line)", + mode: "append" + ) { json, error in + if error != nil { + // Note doesn't exist yet — create it. + ObsidianClient.shared.createTodayNote( + title: "Quick Notes", + content: "# Quick Notes\n\n\(line)" + ) { createJSON, createError in + if let createError = createError { + continuation.resume(throwing: createError) + } else { + let path = (createJSON?["path"] as? String) ?? "today's folder" + continuation.resume(returning: path) + } + } + } else { + continuation.resume(returning: notePath) + } + } + } + } + + return .result(dialog: "Noted! Saved to \(result).") + } + + /// Returns a short `HH:mm` timestamp for the bullet prefix. + private static func shortTimestamp() -> String { + let fmt = DateFormatter() + fmt.dateFormat = "HH:mm" + return fmt.string(from: Date()) + } +} diff --git a/LoopIOS/AppIntents/SearchLoopIntent.swift b/LoopIOS/AppIntents/SearchLoopIntent.swift new file mode 100644 index 0000000..f94150c --- /dev/null +++ b/LoopIOS/AppIntents/SearchLoopIntent.swift @@ -0,0 +1,52 @@ +// +// SearchLoopIntent.swift +// Loop +// +// Dedicated search intent that lets Siri search Loop's conversations and +// notes. Searches locally first, then falls through to the agent. +// +// Usage: "What did Loop say about my Scout meeting?" +// + +import AppIntents +import Foundation + +@available(iOS 27.0, *) +struct SearchLoopIntent: AppIntent { + + static var title: LocalizedStringResource = "Search Loop" + static var description = IntentDescription( + "Search Loop conversations and notes.", + categoryName: "Loop AI" + ) + + static var openAppWhenRun = true + + @Parameter(title: "Query", description: "What to search for in Loop.") + var query: String + + func perform() async throws -> some IntentResult & ProvidesDialog { + // Search conversations locally. + let conversations = SimpleConversationManager.shared.getAllConversations() + let lowerQuery = query.lowercased() + let matches = conversations.filter { conv in + conv.title.lowercased().contains(lowerQuery) || + conv.messages.contains { $0.content.lowercased().contains(lowerQuery) } + }.prefix(5) + + if matches.isEmpty { + // Fall through to the agent — open the app and ask Loop directly. + await MainActor.run { + NotificationCenter.default.post( + name: .loopIntentMessageReceived, + object: nil, + userInfo: ["message": "Search my conversations and notes for: \(query)"] + ) + } + return .result(dialog: "Searching Loop for \"\(query)\"…") + } + + let summaries = matches.map { "• \($0.title)" }.joined(separator: "\n") + return .result(dialog: "Found \(matches.count) conversation(s):\n\(summaries)") + } +} diff --git a/LoopIOS/Assets.xcassets/loop-runner-linux-amd64.dataset/Contents.json b/LoopIOS/Assets.xcassets/loop-runner-linux-amd64.dataset/Contents.json new file mode 100644 index 0000000..5239314 --- /dev/null +++ b/LoopIOS/Assets.xcassets/loop-runner-linux-amd64.dataset/Contents.json @@ -0,0 +1,12 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + }, + "data" : [ + { + "idiom" : "universal", + "filename" : "loop-runner-linux-amd64.gz" + } + ] +} diff --git a/LoopIOS/Assets.xcassets/loop-runner-linux-amd64.dataset/loop-runner-linux-amd64.gz b/LoopIOS/Assets.xcassets/loop-runner-linux-amd64.dataset/loop-runner-linux-amd64.gz new file mode 100644 index 0000000..5df8d04 Binary files /dev/null and b/LoopIOS/Assets.xcassets/loop-runner-linux-amd64.dataset/loop-runner-linux-amd64.gz differ diff --git a/LoopIOS/Assets.xcassets/loop-runner-linux-arm64.dataset/Contents.json b/LoopIOS/Assets.xcassets/loop-runner-linux-arm64.dataset/Contents.json new file mode 100644 index 0000000..64d54a7 --- /dev/null +++ b/LoopIOS/Assets.xcassets/loop-runner-linux-arm64.dataset/Contents.json @@ -0,0 +1,12 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + }, + "data" : [ + { + "idiom" : "universal", + "filename" : "loop-runner-linux-arm64.gz" + } + ] +} diff --git a/LoopIOS/Assets.xcassets/loop-runner-linux-arm64.dataset/loop-runner-linux-arm64.gz b/LoopIOS/Assets.xcassets/loop-runner-linux-arm64.dataset/loop-runner-linux-arm64.gz new file mode 100644 index 0000000..a47aedc Binary files /dev/null and b/LoopIOS/Assets.xcassets/loop-runner-linux-arm64.dataset/loop-runner-linux-arm64.gz differ diff --git a/LoopIOS/Browse/BrowseAttachment.swift b/LoopIOS/Browse/BrowseAttachment.swift new file mode 100644 index 0000000..c138d53 --- /dev/null +++ b/LoopIOS/Browse/BrowseAttachment.swift @@ -0,0 +1,130 @@ +// +// BrowseAttachment.swift +// Loop +// +// Defines the browse attachment type for chat messages. Mirrors the +// StoryAttachment / PDFAttachment pattern: starts at .navigating, updates +// through .reading as the on-device agent drives the page, and finishes at +// .done (with a summary + replay bundle) or .failed (with a reason). +// +// A browse session is STATELESS by design — no cookies, no login persistence. +// Each session writes a `.browsereplay`-style bundle to the workspace at +// workspace://browse// holding per-step screenshot + DOM-snapshot pairs. +// + +import Foundation + +/// One captured step of a browse session — the frame the agent saw at that +/// point in the trip. Persisted (minus the heavy PNG/HTML, which live as +/// sibling files) into the replay bundle's manifest.json so the player can +/// reconstruct a scrubbable timeline. +struct BrowseFrame: Codable, Equatable { + /// Seconds since the session started (monotonic ordering for the scrubber). + let ts: TimeInterval + /// The URL the page was showing when this frame was captured. + let url: String + /// Human-readable description of the action that produced this frame + /// (e.g. "navigate", "click .cta", "scroll down", "read"). + let action: String + /// Filename of the PNG screenshot in the bundle (sibling of manifest.json). + let screenshot: String + /// Filename of the HTML DOM snapshot in the bundle. + let domSnapshot: String + /// Viewport the frame was captured at, e.g. "390x844". + let viewport: String +} + +/// The replay bundle manifest — the index file written at the root of a +/// `workspace://browse//` directory. +struct BrowseReplayManifest: Codable { + let replayId: String + let url: String + let instructions: String + var finalURL: String? + var summary: String? + var frames: [BrowseFrame] +} + +/// A live/replayable browse session attached to a chat message. The cell +/// renders it as a small preview card (latest screenshot + URL + status pill); +/// tapping opens the full-screen player — a read-only live view while the +/// agent drives, flipping to a scrubbable replay once the session ends. +struct BrowseAttachment: Codable { + enum Status: String, Codable, Equatable { + case navigating // loading / moving between pages + case reading // parsing DOM, deciding next action + case done // finished — summary + replay ready + case failed // errored — see failureReason + } + + /// Doubles as the `replay_id`. + let id: String + /// The URL the session was asked to open. + let url: String + /// The natural-language task the agent was given. + let instructions: String + + var status: Status + /// Short status detail shown under the pill (e.g. the current URL, or + /// "Reading hero section"). + var statusDetail: String? + /// Number of agent steps executed so far. + var stepCount: Int + /// Absolute file path to the most recent screenshot PNG — drives the card + /// thumbnail and the live-preview poster frame. + var latestThumbnailPath: String? + /// Absolute path to the replay bundle directory (workspace://browse//). + var replayDirPath: String? + + /// Final grounded summary of what the agent observed (filled on .done). + var summary: String? + /// The URL the session ended on. + var finalURL: String? + + var failureReason: String? + let conversationId: String? + + init(id: String = UUID().uuidString, + url: String, + instructions: String, + status: Status = .navigating, + statusDetail: String? = nil, + stepCount: Int = 0, + latestThumbnailPath: String? = nil, + replayDirPath: String? = nil, + summary: String? = nil, + finalURL: String? = nil, + failureReason: String? = nil, + conversationId: String? = nil) { + self.id = id + self.url = url + self.instructions = instructions + self.status = status + self.statusDetail = statusDetail + self.stepCount = stepCount + self.latestThumbnailPath = latestThumbnailPath + self.replayDirPath = replayDirPath + self.summary = summary + self.finalURL = finalURL + self.failureReason = failureReason + self.conversationId = conversationId + } + + /// The user-facing status pill text — Navigating… / Reading… / Done. + var pillText: String { + switch status { + case .navigating: return "Navigating…" + case .reading: return "Reading…" + case .done: return "Done" + case .failed: return "Failed" + } + } + + /// Host display label for the URL (host only, no scheme/path) for the card. + var displayHost: String { + if let u = URL(string: url), let host = u.host { + return host.hasPrefix("www.") ? String(host.dropFirst(4)) : host + } + return url + } +} diff --git a/LoopIOS/Browse/BrowseGenerationService.swift b/LoopIOS/Browse/BrowseGenerationService.swift new file mode 100644 index 0000000..88b15d3 --- /dev/null +++ b/LoopIOS/Browse/BrowseGenerationService.swift @@ -0,0 +1,80 @@ +// +// BrowseGenerationService.swift +// Loop +// +// Service layer that coordinates browse sessions. Mirrors +// StoryGenerationService: the skill hands it a request, it spins up a +// BrowseSession (the WKWebView driver), drops a live preview card into the +// chat via the host, streams per-step updates, and resolves with the final +// grounded attachment when the agent finishes. +// +// It also keeps a registry of live sessions so the full-screen player can +// borrow the running web view (read-only mirror) while a session is in +// flight, then fall back to the on-disk replay bundle once it ends. +// + +#if os(iOS) + +import UIKit + +/// Implemented by MessagingVC to receive browse lifecycle events. The host +/// owns placing/refreshing the preview card on the assistant message. +protocol BrowseSkillHost: AnyObject { + func browseSkillDidStart(_ attachment: BrowseAttachment) + func browseSkillDidUpdate(_ attachment: BrowseAttachment) + func browseSkillDidFinish(_ attachment: BrowseAttachment) +} + +@MainActor +final class BrowseGenerationService: NSObject { + static let shared = BrowseGenerationService() + + weak var host: BrowseSkillHost? + + private var liveSessions: [String: BrowseSession] = [:] + + private override init() { super.init() } + + /// Live session for an attachment id, if one is still running (used by the + /// full-screen player to mirror what the agent currently sees). + func liveSession(for id: String) -> BrowseSession? { + return liveSessions[id] + } + + /// Run a browse session to completion. Drops the live card immediately, + /// streams updates, and returns the terminal session so the skill can + /// build its grounded tool result. + @discardableResult + func run(url: URL, + instructions: String, + maxSteps: Int, + viewport: CGSize, + conversationId: String?) async -> BrowseSession { + let session = BrowseSession( + attachmentId: UUID().uuidString, + url: url, + instructions: instructions, + maxSteps: maxSteps, + viewport: viewport, + conversationId: conversationId + ) + session.delegate = self + liveSessions[session.attachmentId] = session + + host?.browseSkillDidStart(session.attachment) + + let final = await session.run() + + host?.browseSkillDidFinish(final) + liveSessions[session.attachmentId] = nil + return session + } +} + +extension BrowseGenerationService: BrowseSessionDelegate { + func browseSession(_ session: BrowseSession, didUpdate attachment: BrowseAttachment) { + host?.browseSkillDidUpdate(attachment) + } +} + +#endif diff --git a/LoopIOS/Browse/BrowsePlayerVC.swift b/LoopIOS/Browse/BrowsePlayerVC.swift new file mode 100644 index 0000000..541110d --- /dev/null +++ b/LoopIOS/Browse/BrowsePlayerVC.swift @@ -0,0 +1,360 @@ +// +// BrowsePlayerVC.swift +// Loop +// +// Full-screen viewer for a browse session, presented when the user taps the +// preview card in chat. Two modes: +// +// 1. LIVE (session still running): mirrors the agent's real WKWebView so the +// user can watch it navigate in real time. Read-only — a gesture- +// swallowing overlay eats taps/scrolls so only the agent drives. +// +// 2. REPLAY (session ended): a scrubbable timeline of the frames the agent +// saw — screenshot + DOM-snapshot pairs from the workspace://browse// +// bundle — with play/pause and per-frame Open DOM / Copy text / Share. +// + +#if os(iOS) + +import UIKit + +final class BrowsePlayerVC: UIViewController { + + /// The attachment to display. Set before presentation. + var attachment: BrowseAttachment! + + // Shared chrome + private let closeButton = UIButton(type: .system) + private let statusBanner = UILabel() + + // Live mode + private let liveContainer = UIView() + private let gestureSwallow = UIView() + private var pollTimer: Timer? + private weak var liveSession: BrowseSession? + + // Replay mode + private let imageView = UIImageView() + private let scrubber = UISlider() + private let playPauseButton = UIButton(type: .system) + private let frameLabel = UILabel() + private var frames: [BrowseFrame] = [] + private var replayDir: URL? + private var currentFrame = 0 + private var playTimer: Timer? + private var isPlaying = false + + // MARK: - Lifecycle + + override func viewDidLoad() { + super.viewDidLoad() + view.backgroundColor = .black + setupCloseButton() + setupStatusBanner() + + if attachment.status == .navigating || attachment.status == .reading, + let session = BrowseGenerationService.shared.liveSession(for: attachment.id) { + liveSession = session + setupLiveMode(session: session) + } else { + setupReplayMode() + } + } + + override var prefersStatusBarHidden: Bool { true } + + override func viewWillDisappear(_ animated: Bool) { + super.viewWillDisappear(animated) + pollTimer?.invalidate() + playTimer?.invalidate() + liveSession?.endMirroring() + } + + // MARK: - Chrome + + private func setupCloseButton() { + closeButton.translatesAutoresizingMaskIntoConstraints = false + let cfg = UIImage.SymbolConfiguration(pointSize: 16, weight: .semibold) + closeButton.setImage(UIImage(systemName: "xmark", withConfiguration: cfg), for: .normal) + closeButton.tintColor = .white + closeButton.backgroundColor = UIColor.white.withAlphaComponent(0.18) + closeButton.layer.cornerRadius = 18 + closeButton.addTarget(self, action: #selector(closeTapped), for: .touchUpInside) + view.addSubview(closeButton) + NSLayoutConstraint.activate([ + closeButton.topAnchor.constraint(equalTo: view.safeAreaLayoutGuide.topAnchor, constant: 10), + closeButton.trailingAnchor.constraint(equalTo: view.trailingAnchor, constant: -16), + closeButton.widthAnchor.constraint(equalToConstant: 36), + closeButton.heightAnchor.constraint(equalToConstant: 36), + ]) + } + + private func setupStatusBanner() { + statusBanner.translatesAutoresizingMaskIntoConstraints = false + statusBanner.font = .systemFont(ofSize: 13, weight: .semibold) + statusBanner.textColor = .white + statusBanner.textAlignment = .center + statusBanner.numberOfLines = 1 + statusBanner.lineBreakMode = .byTruncatingMiddle + statusBanner.backgroundColor = UIColor.black.withAlphaComponent(0.4) + statusBanner.layer.cornerRadius = 10 + statusBanner.clipsToBounds = true + view.addSubview(statusBanner) + NSLayoutConstraint.activate([ + statusBanner.centerYAnchor.constraint(equalTo: closeButton.centerYAnchor), + statusBanner.leadingAnchor.constraint(equalTo: view.leadingAnchor, constant: 16), + statusBanner.trailingAnchor.constraint(equalTo: closeButton.leadingAnchor, constant: -12), + statusBanner.heightAnchor.constraint(equalToConstant: 32), + ]) + } + + @objc private func closeTapped() { dismiss(animated: true) } + + // MARK: - Live mode + + private func setupLiveMode(session: BrowseSession) { + liveContainer.translatesAutoresizingMaskIntoConstraints = false + liveContainer.backgroundColor = .black + view.insertSubview(liveContainer, at: 0) + NSLayoutConstraint.activate([ + liveContainer.topAnchor.constraint(equalTo: statusBanner.bottomAnchor, constant: 12), + liveContainer.leadingAnchor.constraint(equalTo: view.leadingAnchor), + liveContainer.trailingAnchor.constraint(equalTo: view.trailingAnchor), + liveContainer.bottomAnchor.constraint(equalTo: view.bottomAnchor), + ]) + + session.mirror(into: liveContainer) + + // Swallow all touches so the user can watch but not drive. + gestureSwallow.translatesAutoresizingMaskIntoConstraints = false + gestureSwallow.backgroundColor = .clear + gestureSwallow.isUserInteractionEnabled = true + view.addSubview(gestureSwallow) + NSLayoutConstraint.activate([ + gestureSwallow.topAnchor.constraint(equalTo: liveContainer.topAnchor), + gestureSwallow.leadingAnchor.constraint(equalTo: liveContainer.leadingAnchor), + gestureSwallow.trailingAnchor.constraint(equalTo: liveContainer.trailingAnchor), + gestureSwallow.bottomAnchor.constraint(equalTo: liveContainer.bottomAnchor), + ]) + + refreshLiveBanner(session.attachment) + pollTimer = Timer.scheduledTimer(withTimeInterval: 0.4, repeats: true) { [weak self] _ in + self?.tickLive() + } + } + + private func tickLive() { + guard let session = liveSession else { transitionToReplay(); return } + let a = session.attachment + refreshLiveBanner(a) + if a.status == .done || a.status == .failed || + BrowseGenerationService.shared.liveSession(for: attachment.id) == nil { + transitionToReplay() + } + } + + private func refreshLiveBanner(_ a: BrowseAttachment) { + let detail = a.statusDetail.map { " · \($0)" } ?? "" + statusBanner.text = "\(a.pillText)\(detail) (watching — read-only)" + } + + private func transitionToReplay() { + pollTimer?.invalidate() + pollTimer = nil + liveSession?.endMirroring() + gestureSwallow.removeFromSuperview() + liveContainer.removeFromSuperview() + liveSession = nil + // Refresh attachment status from disk-backed manifest. + setupReplayMode() + } + + // MARK: - Replay mode + + private func setupReplayMode() { + loadManifest() + + imageView.translatesAutoresizingMaskIntoConstraints = false + imageView.contentMode = .scaleAspectFit + imageView.backgroundColor = .black + imageView.isUserInteractionEnabled = true + view.insertSubview(imageView, at: 0) + + frameLabel.translatesAutoresizingMaskIntoConstraints = false + frameLabel.font = .systemFont(ofSize: 12, weight: .medium) + frameLabel.textColor = UIColor.white.withAlphaComponent(0.85) + frameLabel.textAlignment = .center + frameLabel.numberOfLines = 2 + view.addSubview(frameLabel) + + scrubber.translatesAutoresizingMaskIntoConstraints = false + scrubber.minimumValue = 0 + scrubber.maximumValue = Float(max(0, frames.count - 1)) + scrubber.minimumTrackTintColor = .white + scrubber.addTarget(self, action: #selector(scrubChanged), for: .valueChanged) + + playPauseButton.translatesAutoresizingMaskIntoConstraints = false + playPauseButton.tintColor = .white + playPauseButton.setImage(UIImage(systemName: "play.fill"), for: .normal) + playPauseButton.addTarget(self, action: #selector(togglePlay), for: .touchUpInside) + view.addSubview(playPauseButton) + view.addSubview(scrubber) + + NSLayoutConstraint.activate([ + imageView.topAnchor.constraint(equalTo: statusBanner.bottomAnchor, constant: 12), + imageView.leadingAnchor.constraint(equalTo: view.leadingAnchor), + imageView.trailingAnchor.constraint(equalTo: view.trailingAnchor), + imageView.bottomAnchor.constraint(equalTo: frameLabel.topAnchor, constant: -10), + + frameLabel.leadingAnchor.constraint(equalTo: view.leadingAnchor, constant: 20), + frameLabel.trailingAnchor.constraint(equalTo: view.trailingAnchor, constant: -20), + frameLabel.bottomAnchor.constraint(equalTo: scrubber.topAnchor, constant: -10), + + playPauseButton.leadingAnchor.constraint(equalTo: view.leadingAnchor, constant: 16), + playPauseButton.centerYAnchor.constraint(equalTo: scrubber.centerYAnchor), + playPauseButton.widthAnchor.constraint(equalToConstant: 36), + + scrubber.leadingAnchor.constraint(equalTo: playPauseButton.trailingAnchor, constant: 8), + scrubber.trailingAnchor.constraint(equalTo: view.trailingAnchor, constant: -20), + scrubber.bottomAnchor.constraint(equalTo: view.safeAreaLayoutGuide.bottomAnchor, constant: -16), + ]) + + let longPress = UILongPressGestureRecognizer(target: self, action: #selector(handleLongPress)) + imageView.addGestureRecognizer(longPress) + + statusBanner.text = frames.isEmpty ? "Replay · no frames captured" : "Replay · \(frames.count) frames" + if frames.isEmpty { + frameLabel.text = attachment.summary ?? "Nothing was captured." + } else { + showFrame(0) + } + } + + private func loadManifest() { + let dir: URL + if let p = attachment.replayDirPath { + dir = URL(fileURLWithPath: p) + } else { + dir = BrowseSession.workspaceBrowseDir().appendingPathComponent(attachment.id) + } + replayDir = dir + let manifestURL = dir.appendingPathComponent("manifest.json") + guard let data = try? Data(contentsOf: manifestURL), + let manifest = try? JSONDecoder().decode(BrowseReplayManifest.self, from: data) else { + frames = [] + return + } + frames = manifest.frames + scrubber.maximumValue = Float(max(0, frames.count - 1)) + } + + private func showFrame(_ index: Int) { + guard index >= 0, index < frames.count, let dir = replayDir else { return } + currentFrame = index + let frame = frames[index] + if let img = UIImage(contentsOfFile: dir.appendingPathComponent(frame.screenshot).path) { + imageView.image = img + } + scrubber.value = Float(index) + frameLabel.text = "Step \(index + 1)/\(frames.count) · \(frame.action)\n\(frame.url)" + } + + @objc private func scrubChanged() { + showFrame(Int(scrubber.value.rounded())) + } + + @objc private func togglePlay() { + isPlaying.toggle() + playPauseButton.setImage(UIImage(systemName: isPlaying ? "pause.fill" : "play.fill"), for: .normal) + if isPlaying { + if currentFrame >= frames.count - 1 { showFrame(0) } + playTimer = Timer.scheduledTimer(withTimeInterval: 1.1, repeats: true) { [weak self] _ in + guard let self else { return } + if self.currentFrame >= self.frames.count - 1 { + self.togglePlay() + } else { + self.showFrame(self.currentFrame + 1) + } + } + } else { + playTimer?.invalidate() + playTimer = nil + } + } + + // MARK: - Per-frame actions + + @objc private func handleLongPress(_ g: UILongPressGestureRecognizer) { + guard g.state == .began, currentFrame < frames.count, let dir = replayDir else { return } + let frame = frames[currentFrame] + let sheet = UIAlertController(title: "Step \(currentFrame + 1)", message: frame.url, preferredStyle: .actionSheet) + + sheet.addAction(UIAlertAction(title: "Open DOM", style: .default) { [weak self] _ in + self?.openDOM(at: dir.appendingPathComponent(frame.domSnapshot)) + }) + sheet.addAction(UIAlertAction(title: "Copy text", style: .default) { [weak self] _ in + self?.copyText(at: dir.appendingPathComponent(frame.domSnapshot)) + }) + sheet.addAction(UIAlertAction(title: "Share screenshot", style: .default) { [weak self] _ in + self?.shareScreenshot(at: dir.appendingPathComponent(frame.screenshot)) + }) + sheet.addAction(UIAlertAction(title: "Cancel", style: .cancel)) + if let pop = sheet.popoverPresentationController { + pop.sourceView = imageView + pop.sourceRect = CGRect(x: imageView.bounds.midX, y: imageView.bounds.midY, width: 1, height: 1) + } + present(sheet, animated: true) + } + + private func openDOM(at url: URL) { + guard let html = try? String(contentsOf: url, encoding: .utf8) else { return } + let vc = UIViewController() + vc.view.backgroundColor = .systemBackground + let tv = UITextView() + tv.translatesAutoresizingMaskIntoConstraints = false + tv.isEditable = false + tv.font = .monospacedSystemFont(ofSize: 11, weight: .regular) + tv.text = html + vc.view.addSubview(tv) + NSLayoutConstraint.activate([ + tv.topAnchor.constraint(equalTo: vc.view.safeAreaLayoutGuide.topAnchor), + tv.leadingAnchor.constraint(equalTo: vc.view.leadingAnchor), + tv.trailingAnchor.constraint(equalTo: vc.view.trailingAnchor), + tv.bottomAnchor.constraint(equalTo: vc.view.bottomAnchor), + ]) + let nav = UINavigationController(rootViewController: vc) + vc.title = "DOM Snapshot" + vc.navigationItem.leftBarButtonItem = UIBarButtonItem(barButtonSystemItem: .done, target: self, action: #selector(dismissPresented)) + present(nav, animated: true) + } + + @objc private func dismissPresented() { presentedViewController?.dismiss(animated: true) } + + private func copyText(at domURL: URL) { + guard let html = try? String(contentsOf: domURL, encoding: .utf8) else { return } + UIPasteboard.general.string = BrowsePlayerVC.stripTags(html) + } + + private func shareScreenshot(at url: URL) { + guard FileManager.default.fileExists(atPath: url.path) else { return } + let av = UIActivityViewController(activityItems: [url], applicationActivities: nil) + if let pop = av.popoverPresentationController { + pop.sourceView = imageView + pop.sourceRect = CGRect(x: imageView.bounds.midX, y: imageView.bounds.midY, width: 1, height: 1) + } + present(av, animated: true) + } + + private static func stripTags(_ html: String) -> String { + var s = html + // Drop script/style blocks first, then remaining tags. + for pattern in ["", "", "<[^>]+>"] { + s = s.replacingOccurrences(of: pattern, with: " ", options: .regularExpression) + } + return s.replacingOccurrences(of: "[ \\t]+", with: " ", options: .regularExpression) + .replacingOccurrences(of: "\\n{3,}", with: "\n\n", options: .regularExpression) + .trimmingCharacters(in: .whitespacesAndNewlines) + } +} + +#endif diff --git a/LoopIOS/Browse/BrowseSession.swift b/LoopIOS/Browse/BrowseSession.swift new file mode 100644 index 0000000..ecfe1b4 --- /dev/null +++ b/LoopIOS/Browse/BrowseSession.swift @@ -0,0 +1,721 @@ +// +// BrowseSession.swift +// Loop +// +// Drives a real WKWebView on-device so the agent can fetch, render, and +// navigate JavaScript-heavy pages. This is the engine behind the `browse` +// skill: it owns one fresh `WKWebsiteDataStore.nonPersistent()` web view +// (stateless — no cookies, no login persistence), injects a JS bridge so the +// agent can read/act on the DOM, and runs an on-device agent loop: +// +// load → read page state → model picks next action → execute → capture +// a frame (screenshot + DOM snapshot) → repeat until finish / max_steps. +// +// Each step is persisted into a replay bundle at workspace://browse// so +// the chat card can flip into a scrubbable replay when the session ends. +// +// iOS-first; the driver stays UIKit/WebKit so a Mac fast-follow can host the +// same `WKWebView` in an AppKit window with identical behaviour. +// + +#if os(iOS) + +import UIKit +import WebKit + +/// Receives per-step progress so the host (MessagingVC) can keep the live +/// preview card and any open full-screen player in sync. +@MainActor +protocol BrowseSessionDelegate: AnyObject { + func browseSession(_ session: BrowseSession, didUpdate attachment: BrowseAttachment) +} + +@MainActor +final class BrowseSession: NSObject { + + // MARK: - Inputs + + let attachmentId: String + let startURL: URL + let instructions: String + let maxSteps: Int + let viewport: CGSize + let conversationId: String? + + weak var delegate: BrowseSessionDelegate? + + // MARK: - Live state + + private(set) var attachment: BrowseAttachment + /// The live web view — exposed so the full-screen player can mirror what + /// the agent is looking at (read-only) while the session runs. + let webView: WKWebView + + /// Whether a full-screen player has borrowed the web view. While borrowed + /// the session leaves the view in the player's hierarchy; otherwise it + /// lives in an offscreen host so WebKit keeps rendering/snapshotting. + private(set) var isMirroredFullScreen = false + + // MARK: - Internals + + private let offscreenHost = UIView() + private var startTime = Date() + private var frames: [BrowseFrame] = [] + private var convoMessages: [MessageStruct] = [] + private var finished = false + private var loadContinuation: CheckedContinuation? + + private let replayDir: URL + + // MARK: - Init + + init(attachmentId: String, + url: URL, + instructions: String, + maxSteps: Int, + viewport: CGSize, + conversationId: String?) { + self.attachmentId = attachmentId + self.startURL = url + self.instructions = instructions + self.maxSteps = maxSteps + self.viewport = viewport + self.conversationId = conversationId + + self.replayDir = BrowseSession.workspaceBrowseDir() + .appendingPathComponent(attachmentId, isDirectory: true) + + // Fresh, non-persistent data store every session — no cookies / no + // login carried across calls. This is the statelessness guarantee. + let config = WKWebViewConfiguration() + config.websiteDataStore = .nonPersistent() + config.allowsInlineMediaPlayback = true + config.mediaTypesRequiringUserActionForPlayback = .all + let prefs = WKWebpagePreferences() + prefs.allowsContentJavaScript = true + config.defaultWebpagePreferences = prefs + + let wv = WKWebView(frame: CGRect(origin: .zero, size: viewport), configuration: config) + wv.customUserAgent = "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1" + wv.allowsLinkPreview = false + self.webView = wv + + self.attachment = BrowseAttachment( + id: attachmentId, + url: url.absoluteString, + instructions: instructions, + status: .navigating, + statusDetail: url.host, + replayDirPath: replayDir.path, + conversationId: conversationId + ) + + super.init() + + wv.navigationDelegate = self + wv.uiDelegate = self + + // Park the web view offscreen inside the key window so WebKit treats + // it as on-screen (required for full JS rendering + snapshotting). + offscreenHost.frame = CGRect(origin: CGPoint(x: -10_000, y: 0), size: viewport) + offscreenHost.isUserInteractionEnabled = false + offscreenHost.addSubview(wv) + BrowseSession.keyWindow()?.addSubview(offscreenHost) + + try? FileManager.default.createDirectory(at: replayDir, withIntermediateDirectories: true) + } + + // MARK: - Full-screen mirroring (read-only live view) + + /// Move the live web view into `container` so the player can show exactly + /// what the agent sees. Read-only: the player swallows gestures. + func mirror(into container: UIView) { + isMirroredFullScreen = true + webView.translatesAutoresizingMaskIntoConstraints = false + container.addSubview(webView) + NSLayoutConstraint.activate([ + webView.topAnchor.constraint(equalTo: container.topAnchor), + webView.leadingAnchor.constraint(equalTo: container.leadingAnchor), + webView.trailingAnchor.constraint(equalTo: container.trailingAnchor), + webView.bottomAnchor.constraint(equalTo: container.bottomAnchor), + ]) + } + + /// Return the web view to its offscreen host (player dismissed). + func endMirroring() { + isMirroredFullScreen = false + webView.removeFromSuperview() + webView.translatesAutoresizingMaskIntoConstraints = true + webView.frame = CGRect(origin: .zero, size: viewport) + offscreenHost.addSubview(webView) + } + + // MARK: - Run + + /// Execute the full browse session. Resolves when the agent finishes, + /// errors, or the step / time budget is exhausted. Returns the terminal + /// attachment (status .done or .failed). + func run() async -> BrowseAttachment { + startTime = Date() + + // Hard session timeout (default 60s) races the agent loop so a stuck + // page can never hold the runtime open. + let result = await withTaskGroup(of: BrowseAttachment?.self) { group -> BrowseAttachment in + group.addTask { [weak self] in + guard let self else { return nil } + return await self.driveLoop() + } + group.addTask { [weak self] in + try? await Task.sleep(nanoseconds: UInt64(BrowseSession.sessionTimeout * 1_000_000_000)) + guard let self else { return nil } + return await self.timedOutAttachment() + } + // First non-nil wins; cancel the rest. + for await value in group { + if let value { + group.cancelAll() + return value + } + } + return self.attachment + } + + await persistManifest() + teardown() + return result + } + + private func timedOutAttachment() -> BrowseAttachment { + if finished { return attachment } + finished = true + attachment.status = .done + attachment.statusDetail = "Stopped at time limit" + if (attachment.summary ?? "").isEmpty { + attachment.summary = "Browse session hit the \(Int(BrowseSession.sessionTimeout))s time limit before finishing. Partial observations were captured." + } + attachment.finalURL = webView.url?.absoluteString ?? attachment.url + return attachment + } + + // MARK: - The agent loop + + private func driveLoop() async -> BrowseAttachment { + // 1. Navigate to the start URL. + await load(startURL) + guard !finished else { return attachment } + + // 2. Capture the initial frame and seed the model. + let initialState = await readPageState() + await captureFrame(action: "navigate") + updateStatus(.reading, detail: webView.url?.host) + + convoMessages = [ + MessageStruct(role: "system", content: Self.systemPrompt(viewport: viewport)), + MessageStruct(role: "user", content: """ + Task: \(instructions) + + You are now on the page. Current state: + \(initialState) + + Decide the next action by calling browse_action. When you have + gathered enough to answer the task, call browse_action with + action "finish" and provide a grounded summary + observations. + """), + ] + + // 3. Step until finish / budget. + var step = 0 + while step < maxSteps && !finished { + step += 1 + guard let reply = await modelCall() else { + // Model unreachable — finish gracefully on what we have. + finish(summary: "Couldn't reach the model to continue browsing. Captured the initial page state.", + observations: []) + break + } + convoMessages.append(reply) + + guard let call = reply.functions.first(where: { $0.name == "browse_action" }) ?? reply.functions.first else { + // Plain-text reply → treat as the final summary. + let text = reply.content.trimmingCharacters(in: .whitespacesAndNewlines) + finish(summary: text.isEmpty ? "Finished browsing." : text, observations: []) + break + } + + let (observation, isFinish) = await execute(call) + // Pair the tool result back so providers that require tool_use → + // tool_result pairing (Anthropic) stay happy on the next call. + convoMessages.append(MessageStruct( + role: "function", + content: observation, + name: "browse_action", + callId: call.callId + )) + + attachment.stepCount = step + emit() + + if isFinish { break } + } + + if !finished { + // Ran out of steps without an explicit finish — summarize what we have. + finish(summary: "Reached the \(maxSteps)-step limit. " + (attachment.summary ?? "Captured the pages visited above."), + observations: []) + } + return attachment + } + + // MARK: - Action execution (JS bridge) + + /// Returns (observation text for the model, isFinish). + private func execute(_ call: FunctionCallStruct) async -> (String, Bool) { + let args = call.arguments + let action = (args["action"] as? String ?? "read").lowercased() + let selector = args["selector"] as? String + let text = args["text"] as? String + let js = args["js"] as? String + + switch action { + case "finish": + let summary = (args["summary"] as? String) ?? "Finished browsing." + let observations = (args["observations"] as? [String]) ?? [] + finish(summary: summary, observations: observations) + await captureFrame(action: "finish") + return ("Session finished.", true) + + case "click": + updateStatus(.navigating, detail: selector) + let ok = await bridgeClick(selector ?? "") + await waitForSettle() + await captureFrame(action: "click \(selector ?? "")") + let state = await readPageState() + updateStatus(.reading, detail: webView.url?.host) + return (ok ? "Clicked \(selector ?? ""). New page state:\n\(state)" + : "No element matched \(selector ?? ""). Page unchanged:\n\(state)", false) + + case "type": + let ok = await bridgeType(selector ?? "", text: text ?? "") + await captureFrame(action: "type \(selector ?? "")") + return (ok ? "Typed into \(selector ?? "")." + : "No input matched \(selector ?? "").", false) + + case "scroll": + let dir = (args["direction"] as? String) ?? "down" + await bridgeScroll(direction: dir, selector: selector) + await captureFrame(action: "scroll \(dir)") + let state = await readPageState() + return ("Scrolled \(dir). Visible content:\n\(state)", false) + + case "wait_for", "waitfor": + let appeared = await bridgeWaitFor(selector ?? "", timeout: 5) + await captureFrame(action: "wait_for \(selector ?? "")") + let state = await readPageState() + return (appeared ? "Element \(selector ?? "") appeared.\n\(state)" + : "Timed out waiting for \(selector ?? "").\n\(state)", false) + + case "query", "queryselectorall": + let list = await bridgeQueryAll(selector ?? "") + return ("querySelectorAll(\(selector ?? "")) → \(list.count) matches:\n" + + list.prefix(40).enumerated().map { "[\($0.offset)] \($0.element)" }.joined(separator: "\n"), false) + + case "eval", "eval_js", "evaljs": + let out = await bridgeEval(js ?? "") + await captureFrame(action: "eval") + return ("evalJS result: \(out)", false) + + case "read", "get_text", "gettext": + fallthrough + default: + updateStatus(.reading, detail: webView.url?.host) + let state = await readPageState(selector: selector) + await captureFrame(action: selector == nil ? "read" : "read \(selector!)") + return ("Page content:\n\(state)", false) + } + } + + // MARK: - JS bridge primitives + + /// getText — readable text of the page (or a selector subtree), trimmed. + private func readPageState(selector: String? = nil) async -> String { + let scope = selector.map { "document.querySelector(\(jsString($0)))" } ?? "document.body" + let js = """ + (function(){ + var el = \(scope); + if(!el) return JSON.stringify({error:"no element"}); + var text = (el.innerText||"").replace(/\\n{3,}/g,"\\n\\n").trim(); + var title = document.title||""; + var url = location.href; + return JSON.stringify({title:title,url:url,text:text.slice(0,6000)}); + })() + """ + guard let raw = await eval(js) as? String, + let data = raw.data(using: .utf8), + let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else { + return "(could not read page)" + } + if let err = obj["error"] as? String { return "(\(err))" } + let title = obj["title"] as? String ?? "" + let url = obj["url"] as? String ?? "" + let text = obj["text"] as? String ?? "" + return "URL: \(url)\nTitle: \(title)\n---\n\(text)" + } + + private func bridgeClick(_ selector: String) async -> Bool { + let js = """ + (function(){ + var el = document.querySelector(\(jsString(selector))); + if(!el) return false; + el.scrollIntoView({block:"center"}); + el.click(); + return true; + })() + """ + return (await eval(js) as? Bool) ?? false + } + + private func bridgeType(_ selector: String, text: String) async -> Bool { + let js = """ + (function(){ + var el = document.querySelector(\(jsString(selector))); + if(!el) return false; + el.focus(); + el.value = \(jsString(text)); + el.dispatchEvent(new Event('input',{bubbles:true})); + el.dispatchEvent(new Event('change',{bubbles:true})); + return true; + })() + """ + return (await eval(js) as? Bool) ?? false + } + + private func bridgeScroll(direction: String, selector: String?) async { + let js: String + if let selector { + js = "var e=document.querySelector(\(jsString(selector))); if(e) e.scrollIntoView({block:'center'});" + } else { + let delta = direction.lowercased() == "up" ? "-window.innerHeight*0.8" : "window.innerHeight*0.8" + js = "window.scrollBy(0, \(delta));" + } + _ = await eval(js) + await waitForSettle(0.3) + } + + private func bridgeWaitFor(_ selector: String, timeout: TimeInterval) async -> Bool { + let deadline = Date().addingTimeInterval(timeout) + while Date() < deadline { + let present = (await eval("!!document.querySelector(\(jsString(selector)))") as? Bool) ?? false + if present { return true } + try? await Task.sleep(nanoseconds: 250_000_000) + } + return false + } + + private func bridgeQueryAll(_ selector: String) async -> [String] { + let js = """ + (function(){ + var nodes = document.querySelectorAll(\(jsString(selector))); + var out = []; + for(var i=0;i String { + let wrapped = "(function(){ try { return JSON.stringify((function(){ \(js) })()); } catch(e){ return 'error: '+e.message; } })()" + let out = await eval(wrapped) + if let s = out as? String { return String(s.prefix(2000)) } + if let v = out { return String(describing: v).prefix(2000).description } + return "undefined" + } + + // MARK: - WebKit primitives + + private func load(_ url: URL) async { + updateStatus(.navigating, detail: url.host) + await withCheckedContinuation { (cont: CheckedContinuation) in + self.loadContinuation = cont + var req = URLRequest(url: url) + req.timeoutInterval = 30 + webView.load(req) + } + } + + /// Let JS settle after an interaction / SPA navigation. + private func waitForSettle(_ seconds: TimeInterval = 0.8) async { + try? await Task.sleep(nanoseconds: UInt64(seconds * 1_000_000_000)) + } + + private func eval(_ js: String) async -> Any? { + await withCheckedContinuation { (cont: CheckedContinuation) in + webView.evaluateJavaScript(js) { result, _ in + cont.resume(returning: result) + } + } + } + + // MARK: - Frame capture + replay persistence + + private func captureFrame(action: String) async { + let ts = Date().timeIntervalSince(startTime) + let index = frames.count + let pngName = String(format: "frame_%03d.png", index) + let domName = String(format: "frame_%03d.html", index) + + // DOM snapshot. + let dom = (await eval("document.documentElement.outerHTML") as? String) ?? "" + try? dom.data(using: .utf8)?.write(to: replayDir.appendingPathComponent(domName)) + + // Screenshot. + if let image = await snapshot(), let png = image.pngData() { + let url = replayDir.appendingPathComponent(pngName) + try? png.write(to: url) + attachment.latestThumbnailPath = url.path + } + + let frame = BrowseFrame( + ts: ts, + url: webView.url?.absoluteString ?? attachment.url, + action: action, + screenshot: pngName, + domSnapshot: domName, + viewport: "\(Int(viewport.width))x\(Int(viewport.height))" + ) + frames.append(frame) + emit() + } + + private func snapshot() async -> UIImage? { + let config = WKSnapshotConfiguration() + config.rect = CGRect(origin: .zero, size: viewport) + return await withCheckedContinuation { (cont: CheckedContinuation) in + webView.takeSnapshot(with: config) { image, _ in + cont.resume(returning: image) + } + } + } + + private func persistManifest() async { + let manifest = BrowseReplayManifest( + replayId: attachmentId, + url: startURL.absoluteString, + instructions: instructions, + finalURL: attachment.finalURL, + summary: attachment.summary, + frames: frames + ) + if let data = try? JSONEncoder().encode(manifest) { + try? data.write(to: replayDir.appendingPathComponent("manifest.json")) + } + } + + // MARK: - Model call + + private func modelCall() async -> MessageStruct? { + let messages = convoMessages + return await withCheckedContinuation { (cont: CheckedContinuation) in + Cloud.connection.chat(messages: messages, tools: BrowseSkill.actionTools) { response, _ in + cont.resume(returning: response) + } + } + } + + // MARK: - State helpers + + private func finish(summary: String, observations: [String]) { + guard !finished else { return } + finished = true + var body = summary + if !observations.isEmpty { + body += "\n\nObservations:\n" + observations.map { "• \($0)" }.joined(separator: "\n") + } + attachment.summary = body + attachment.finalURL = webView.url?.absoluteString ?? attachment.url + attachment.status = .done + attachment.statusDetail = nil + emit() + } + + private func fail(_ reason: String) { + guard !finished else { return } + finished = true + attachment.status = .failed + attachment.failureReason = reason + attachment.finalURL = webView.url?.absoluteString + loadContinuation?.resume() + loadContinuation = nil + emit() + } + + private func updateStatus(_ status: BrowseAttachment.Status, detail: String?) { + attachment.status = status + attachment.statusDetail = detail + emit() + } + + private func emit() { + delegate?.browseSession(self, didUpdate: attachment) + } + + private func teardown() { + webView.stopLoading() + webView.navigationDelegate = nil + webView.uiDelegate = nil + if !isMirroredFullScreen { + webView.removeFromSuperview() + offscreenHost.removeFromSuperview() + } + } + + // MARK: - Structured-observations / final tool result + + /// The JSON the skill returns to the model: { summary, final_url, + /// observations[], replay_id }. + func toolResultJSON() -> String { + var obj: [String: Any] = [ + "summary": attachment.summary ?? "", + "final_url": attachment.finalURL ?? attachment.url, + "replay_id": attachmentId, + "steps": attachment.stepCount, + ] + if attachment.status == .failed { + obj["error"] = attachment.failureReason ?? "browse failed" + } + if let data = try? JSONSerialization.data(withJSONObject: obj, options: [.prettyPrinted]), + let s = String(data: data, encoding: .utf8) { + return s + } + return attachment.summary ?? "{}" + } + + // MARK: - Static helpers + + private func jsString(_ s: String) -> String { + let data = (try? JSONSerialization.data(withJSONObject: [s])) ?? Data("[\"\"]".utf8) + let arr = String(data: data, encoding: .utf8) ?? "[\"\"]" + // Strip the surrounding [ ] to get a single JSON string literal. + return String(arr.dropFirst().dropLast()) + } + + static let sessionTimeout: TimeInterval = 60 + + static func workspaceBrowseDir() -> URL { + let docs = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first! + let dir = docs.appendingPathComponent("browse", isDirectory: true) + try? FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) + return dir + } + + static func systemPrompt(viewport: CGSize) -> String { + return """ + You are an autonomous web-browsing agent driving a real WebKit web view \ + on an iPhone-sized viewport (\(Int(viewport.width))×\(Int(viewport.height))). \ + Each turn you call the `browse_action` tool exactly once to read or act on \ + the current page, and you receive the resulting page state back. + + Available actions (the `action` field): + - "read": return the readable text of the page (optionally pass `selector`). + - "click": click the element matching `selector`. + - "type": type `text` into the input matching `selector`. + - "scroll": scroll the page (`direction`: "down"/"up", or pass a `selector` to scroll to). + - "wait_for": wait until `selector` appears. + - "query": list elements matching `selector` (querySelectorAll). + - "eval_js": run `js` and return the result (advanced). + - "finish": end the session. Provide a grounded `summary` of what you \ + observed (answer the task) and an `observations` array of short bullet strings. + + Rules: + - Ground every claim in text you actually read from the page — never invent content. + - Be efficient: a few reads/scrolls/clicks, then finish. Do not loop aimlessly. + - If the task is just "look at the page and report", read it, scroll once or \ + twice for more content, then finish. + - Always call finish before you run out of steps. + """ + } +} + +// MARK: - WKNavigationDelegate / WKUIDelegate (safety + load tracking) + +extension BrowseSession: WKNavigationDelegate, WKUIDelegate { + + func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!) { + loadContinuation?.resume() + loadContinuation = nil + } + + func webView(_ webView: WKWebView, didFail navigation: WKNavigation!, withError error: Error) { + loadContinuation?.resume() + loadContinuation = nil + } + + func webView(_ webView: WKWebView, didFailProvisionalNavigation navigation: WKNavigation!, withError error: Error) { + // A failed *initial* provisional load is fatal for the session. + if frames.isEmpty { + fail("Couldn't load \(startURL.absoluteString): \(error.localizedDescription)") + } else { + loadContinuation?.resume() + loadContinuation = nil + } + } + + func webView(_ webView: WKWebView, + decidePolicyFor navigationAction: WKNavigationAction, + decisionHandler: @escaping (WKNavigationActionPolicy) -> Void) { + // Block downloads and non-http(s) schemes (tel:, mailto:, app links). + if let url = navigationAction.request.url, + let scheme = url.scheme?.lowercased(), + scheme != "http", scheme != "https", scheme != "about" { + decisionHandler(.cancel) + return + } + decisionHandler(.allow) + } + + // Block popups / new windows — keep everything in the one driven view. + func webView(_ webView: WKWebView, + createWebViewWith configuration: WKWebViewConfiguration, + for navigationAction: WKNavigationAction, + windowFeatures: WKWindowFeatures) -> WKWebView? { + if let url = navigationAction.request.url { + webView.load(URLRequest(url: url)) + } + return nil + } + + // Auto-dismiss JS dialogs so a blocking alert can't wedge the session. + func webView(_ webView: WKWebView, + runJavaScriptAlertPanelWithMessage message: String, + initiatedByFrame frame: WKFrameInfo, + completionHandler: @escaping () -> Void) { + completionHandler() + } + + func webView(_ webView: WKWebView, + runJavaScriptConfirmPanelWithMessage message: String, + initiatedByFrame frame: WKFrameInfo, + completionHandler: @escaping (Bool) -> Void) { + completionHandler(false) + } + + static func keyWindow() -> UIWindow? { + return UIApplication.shared.connectedScenes + .compactMap { $0 as? UIWindowScene } + .flatMap { $0.windows } + .first { $0.isKeyWindow } ?? + UIApplication.shared.connectedScenes + .compactMap { $0 as? UIWindowScene } + .flatMap { $0.windows } + .first + } +} + +#endif diff --git a/LoopIOS/Browse/BrowseSkill.swift b/LoopIOS/Browse/BrowseSkill.swift new file mode 100644 index 0000000..3f5a07c --- /dev/null +++ b/LoopIOS/Browse/BrowseSkill.swift @@ -0,0 +1,233 @@ +// +// BrowseSkill.swift +// Loop +// +// Skill definition for `browse` — drive a real on-device WebKit web view to +// fetch, render, and navigate JavaScript-heavy pages on the user's behalf. +// Unlike `fetch_url` / `exa_search` (fast, text-only), `browse` exists for +// when the UI and visual interface actually matter: "check the Apple homepage +// and tell me what changed", anything needing JS rendering, clicking through, +// or seeing the page. +// +// Visible by default: each session drops a live preview card on the assistant +// message (like the story skill) and, when it ends, the card flips into a +// scrubbable replay of the frames the agent saw. +// +// Blocking-by-design: the handler runs the whole session and returns the +// grounded { summary, final_url, observations[], replay_id } so the model's +// reply is anchored in what it actually saw. +// + +#if os(iOS) + +import UIKit + +final class BrowseSkill { + static let shared = BrowseSkill() + private init() {} + + static let systemPromptFragment: String = """ +You can drive a real on-device web browser with the `browse` tool. It opens a \ +WebKit web view, renders the page (JavaScript included), and an internal agent \ +loop reads the DOM, scrolls, and clicks through to accomplish a natural-language \ +task — capturing a screenshot + DOM snapshot at every step. + +When to use `browse` (vs the faster text-only `fetch_url` / `exa_search`): +- The visual interface or live UI matters: "check the Apple homepage and tell me \ + what changed", "what's on the front page of X right now", "see if the hero \ + section changed". +- The page is JavaScript-heavy and `fetch_url` returns empty/garbled text. +- The task needs interaction: clicking through, filling a field, scrolling to \ + load more, navigating between pages. + +How to call it: +- `url` (required): the page to open. +- `instructions`: what to look for / do, in natural language (e.g. "see if \ + anything's changed on the hero section, report the headline and main CTA"). +- `max_steps` (optional, default 10): how many read/click/scroll steps the \ + internal agent may take. +- `viewport` (optional, e.g. "390x844"): defaults to iPhone-sized. + +The tool BLOCKS until the session finishes and returns a grounded summary plus \ +a replay id. A live preview card appears in the chat while it runs and becomes a \ +scrubbable replay afterward — so after it returns, write a short reply grounded in \ +the returned summary; don't restate the whole page. Each session is stateless \ +(no cookies, fresh data store every call), so if you need a before/after diff, \ +browse the baseline in the same call. +""" + + /// The user-facing tool the primary agent calls. + static let tools: [[String: Any]] = [ + [ + "type": "function", + "function": [ + "name": "browse", + "description": "Drive a real on-device WebKit browser to open, render (JS included), and navigate a web page to accomplish a natural-language task. Use when the visual/live UI matters or the page is JS-heavy — not for plain text (use fetch_url for that). Shows a live preview card and returns a grounded { summary, final_url, observations, replay_id }.", + "parameters": [ + "type": "object", + "properties": [ + "url": [ + "type": "string", + "description": "The page to open (http/https). Scheme optional — 'apple.com' works.", + ], + "instructions": [ + "type": "string", + "description": "Natural-language task: what to look for or do, e.g. 'see if the hero section changed and report the headline and CTA'.", + ], + "max_steps": [ + "type": "integer", + "description": "Max read/click/scroll steps the internal agent may take. Default 10.", + ], + "viewport": [ + "type": "string", + "description": "Viewport size as WxH, e.g. '390x844'. Defaults to iPhone-sized.", + ], + ] as [String: Any], + "required": ["url"], + ] as [String: Any], + ] as [String: Any], + ] + ] + + /// The internal tool the browse agent loop drives (one action per step). + /// Not exposed to the primary agent — only sent on the nested model calls + /// inside BrowseSession. Registered in ToolRouter.coreToolNames so routing + /// never strips it from those nested calls. + static let actionTools: [[String: Any]] = [ + [ + "type": "function", + "function": [ + "name": "browse_action", + "description": "Perform one browsing action on the current page and get the resulting page state back.", + "parameters": [ + "type": "object", + "properties": [ + "action": [ + "type": "string", + "enum": ["read", "click", "type", "scroll", "wait_for", "query", "eval_js", "finish"], + "description": "The action to perform this step.", + ], + "selector": [ + "type": "string", + "description": "CSS selector for click/type/scroll/wait_for/query/read.", + ], + "text": [ + "type": "string", + "description": "Text to type (action 'type').", + ], + "direction": [ + "type": "string", + "enum": ["down", "up"], + "description": "Scroll direction (action 'scroll').", + ], + "js": [ + "type": "string", + "description": "JavaScript body to evaluate (action 'eval_js'). Should `return` a value.", + ], + "summary": [ + "type": "string", + "description": "Grounded summary answering the task (action 'finish').", + ], + "observations": [ + "type": "array", + "items": ["type": "string"], + "description": "Short bullet observations gathered from the page (action 'finish').", + ], + ] as [String: Any], + "required": ["action"], + ] as [String: Any], + ] as [String: Any], + ] + ] + + static let toolNames: Set = ["browse"] + + func handles(functionName: String) -> Bool { + return BrowseSkill.toolNames.contains(functionName) + } + + func statusText(for call: FunctionCallStruct) -> String? { + guard call.name == "browse" else { return nil } + if let raw = (call.arguments["url"] as? String), !raw.isEmpty { + let host = URL(string: raw.contains("://") ? raw : "https://\(raw)")?.host ?? raw + return "browsing \(host)" + } + return "browsing the web" + } + + // MARK: - Dispatch + + func handle(functionCall: FunctionCallStruct, + completion: @escaping (MessageStruct) -> Void) { + guard functionCall.name == "browse" else { + completion(MessageStruct(role: "function", + content: "Unknown tool \(functionCall.name).", + name: functionCall.name)) + return + } + + let raw = (functionCall.arguments["url"] as? String)? + .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + guard !raw.isEmpty else { + completion(MessageStruct(role: "function", + content: "I need a `url` to browse.", + name: "browse")) + return + } + let normalized = raw.contains("://") ? raw : "https://\(raw)" + guard let url = URL(string: normalized), url.host != nil else { + completion(MessageStruct(role: "function", + content: "'\(raw)' isn't a valid http/https URL.", + name: "browse")) + return + } + + let instructions = (functionCall.arguments["instructions"] as? String)? + .trimmingCharacters(in: .whitespacesAndNewlines) + .nilIfEmpty ?? "Open the page and report what you see." + let maxSteps = max(1, min(20, BrowseSkill.intArg(functionCall.arguments["max_steps"]) ?? 10)) + let viewport = BrowseSkill.parseViewport(functionCall.arguments["viewport"] as? String) + let convId = functionCall.conversationId + + Task { @MainActor in + let session = await BrowseGenerationService.shared.run( + url: url, + instructions: instructions, + maxSteps: maxSteps, + viewport: viewport, + conversationId: convId + ) + completion(MessageStruct( + role: "function", + content: session.toolResultJSON(), + name: "browse" + )) + } + } + + // MARK: - Helpers + + private static func intArg(_ value: Any?) -> Int? { + if let i = value as? Int { return i } + if let d = value as? Double { return Int(d) } + if let s = value as? String { return Int(s) } + return nil + } + + private static func parseViewport(_ s: String?) -> CGSize { + let fallback = CGSize(width: 390, height: 844) // iPhone-sized default + guard let s = s?.lowercased() else { return fallback } + let parts = s.split(separator: "x") + guard parts.count == 2, + let w = Double(parts[0].trimmingCharacters(in: .whitespaces)), + let h = Double(parts[1].trimmingCharacters(in: .whitespaces)), + w > 100, h > 100 else { return fallback } + return CGSize(width: w, height: h) + } +} + +private extension String { + var nilIfEmpty: String? { isEmpty ? nil : self } +} + +#endif diff --git a/LoopIOS/Data/AnthropicChat.swift b/LoopIOS/Data/AnthropicChat.swift index 8233a23..b64e599 100644 --- a/LoopIOS/Data/AnthropicChat.swift +++ b/LoopIOS/Data/AnthropicChat.swift @@ -59,6 +59,8 @@ final class AnthropicChat { func chat(messages: [MessageStruct], tools: [[String: Any]]? = nil, + modelIDOverride: String? = nil, + modelStampOverride: String? = nil, onPartial: ((String) -> Void)? = nil, completion: @escaping (MessageStruct?, Error?) -> Void) { @@ -69,7 +71,11 @@ final class AnthropicChat { return } - let modelID = ModelSelectionStore.current.apiModelID ?? "claude-sonnet-4-6" + // `modelIDOverride` lets non-agent callers (e.g. VisionSummaryService's + // background image-description pass) pin a specific Claude model + // regardless of the user's current selection, which may be a non-Claude + // provider whose wire id Anthropic would reject. + let modelID = modelIDOverride ?? ModelSelectionStore.current.apiModelID ?? "claude-sonnet-4-6" let (system, wire) = Self.wirePayload(from: messages) var body: [String: Any] = [ @@ -137,7 +143,7 @@ final class AnthropicChat { let msg = MessageStruct( role: "assistant", content: r.content, - model: ModelSelectionStore.current.stampedMessageModel, + model: modelStampOverride ?? ModelSelectionStore.current.stampedMessageModel, functions: r.toolCalls, tokenUsage: r.usage, ttft: r.ttft) @@ -149,6 +155,7 @@ final class AnthropicChat { let task = streamingSession.dataTask(with: req) streamingSessionDelegate.register(task: task, reader: reader) + LocalInferenceController.shared.track(task) task.resume() } @@ -202,10 +209,19 @@ final class AnthropicChat { // can trip the orphan-tool_use sanitizer below. let filteredMessages = messages.filter { m in guard m.role == "assistant" else { return true } - if m.id.hasPrefix("image-") || m.id.hasPrefix("pdf-") { return false } + if m.id.hasPrefix("image-") || m.id.hasPrefix("pdf-") || m.id.hasPrefix("story-") { return false } return true } + // Id of the most recent genuine *human* turn. An image is sent at full + // resolution only while its message is this last user turn; on every + // later turn we swap the base64 block for a cached text description + // (see the image branch below). Tool results are `role:"function"`, so + // an in-turn agent loop doesn't advance this — the image stays raw for + // the whole turn that introduced it, then downgrades once the human + // sends the next message. + let lastUserMessageId = filteredMessages.last { $0.role == "user" }?.id + for m in filteredMessages { if m.role == "system" { if !m.content.isEmpty { systemParts.append(m.content) } @@ -272,9 +288,20 @@ final class AnthropicChat { let role = (m.role == "assistant") ? "assistant" : "user" if let f = m.fileAttachment, f.status == .ready, f.kind == .image, let imageBlock = imageBlock(for: f) { - let text = m.content.isEmpty ? f.assistantHint - : "\(m.content)\n\n\(f.assistantHint)" - seq.append((role, [textBlock(text), imageBlock])) + // Downgrade the image to its cached description on every turn + // after the one that introduced it — unless no summary exists + // yet, in which case we re-send the raw image so the model + // never loses sight of it (correctness over token savings). + let isIntroducingTurn = (m.id == lastUserMessageId) + if !isIntroducingTurn, let summaryHint = f.imageSummaryHint { + let text = m.content.isEmpty ? summaryHint + : "\(m.content)\n\n\(summaryHint)" + seq.append((role, [textBlock(text)])) + } else { + let text = m.content.isEmpty ? f.assistantHint + : "\(m.content)\n\n\(f.assistantHint)" + seq.append((role, [textBlock(text), imageBlock])) + } } else { var content = m.content if let f = m.fileAttachment, f.status == .ready { @@ -423,6 +450,13 @@ final class AnthropicChat { anthropicTools(from: tools) } + /// Expose `wirePayload(from:)` for unit tests so the image-downgrade logic + /// (raw image on the introducing turn, cached description afterwards) can + /// be asserted without a network call. + static func testableWirePayload(from messages: [MessageStruct]) -> (String?, [[String: Any]]) { + wirePayload(from: messages) + } + // MARK: - Errors private static func error(_ message: String) -> NSError { diff --git a/LoopIOS/Data/AnthropicStreamReader.swift b/LoopIOS/Data/AnthropicStreamReader.swift index 2e9da43..1fc50e2 100644 --- a/LoopIOS/Data/AnthropicStreamReader.swift +++ b/LoopIOS/Data/AnthropicStreamReader.swift @@ -40,6 +40,10 @@ final class AnthropicStreamReader: NSObject, URLSessionDataDelegate { private var toolUseBlocks: [Int: ToolUseAccumulator] = [:] /// Track content block types by index so deltas can route correctly. private var blockTypes: [Int: String] = [:] + /// Once a tool_use block starts, suppress `onDelta` for subsequent text + /// deltas so pre-tool thinking text doesn't leak into the streaming + /// bubble as visible assistant prose. + private var sawToolUse = false private var lineBuffer = "" private var currentEventType = "" @@ -133,6 +137,7 @@ final class AnthropicStreamReader: NSObject, URLSessionDataDelegate { let type = block["type"] as? String else { return } blockTypes[idx] = type if type == "tool_use" { + sawToolUse = true var acc = ToolUseAccumulator() if let id = block["id"] as? String { acc.id = id } if let name = block["name"] as? String { acc.name = name } @@ -146,7 +151,14 @@ final class AnthropicStreamReader: NSObject, URLSessionDataDelegate { if deltaType == "text_delta", let text = delta["text"] as? String { contentBuffer += text - onDelta?(text) + // Only stream text to the live bubble while no tool_use block + // has been seen. Once tool calls start, the pre-tool prose is + // captured in contentBuffer for the disclosure but suppressed + // from the streaming partial so the user doesn't see raw + // tool-invocation reasoning leak into the main transcript. + if !sawToolUse { + onDelta?(text) + } } else if deltaType == "input_json_delta", let partial = delta["partial_json"] as? String { toolUseBlocks[idx]?.inputJSON += partial diff --git a/LoopIOS/Data/ConversationFileStore.swift b/LoopIOS/Data/ConversationFileStore.swift index 35b6405..4851e05 100644 --- a/LoopIOS/Data/ConversationFileStore.swift +++ b/LoopIOS/Data/ConversationFileStore.swift @@ -95,6 +95,11 @@ final class ConversationFileStore: ConversationStore { private var inflightHydrations: Set = [] /// True while the metadata-query-triggered refresh is running. private var inflightRefresh: Bool = false + /// Ids whose files were missing from disk on the most recent + /// `surgicalRefresh`. Eviction is deferred: an id is only evicted if + /// it's still missing on the *next* refresh — this prevents transient + /// iCloud download/rename operations from evicting the active convo. + private var pendingEvictions: Set = [] /// Flips true once `bootstrap()` has resolved the container and finished /// pass-1 enumeration. Reads return an empty cache until then; UI observes /// `.conversationStoreDidBecomeReady` to load the real last conversation. @@ -219,6 +224,22 @@ final class ConversationFileStore: ConversationStore { return msgs } + /// Whether a conversation's messages have been fully parsed from disk. + /// MessagingVC uses this to avoid rendering a blank screen when the store + /// has only meta-stub data for the conversation. + func isHydrated(id: String) -> Bool { + cacheLock.lock(); defer { cacheLock.unlock() } + return hydratedIds.contains(id) + } + + /// Kick off an async hydration for `id` if it hasn't been hydrated yet. + /// Public entry point for callers that detect a not-yet-hydrated row and + /// want the store to prioritize it (e.g. the active conversation on + /// cold start). The store posts `.conversationStoreDidChange` when done. + func requestHydrationIfNeeded(id: String) { + hydrateAsync(id: id) + } + /// True while the initial bootstrap, pass-2 hydration, or a metadata-driven /// refresh is active. Always false on the local backend (no remote sync to /// wait for) once ready. @@ -478,12 +499,32 @@ final class ConversationFileStore: ConversationStore { // needed) and replace the cache entry's messages. Posts a coalesced // change notification when the batch settles. + /// Id to hydrate first in pass-2. Set by the UI layer (via + /// `prioritizeHydration(id:)`) before pass-2 runs so the conversation + /// the user is looking at doesn't wait behind 50 others. + private var prioritizedHydrationId: String? + + /// Ask pass-2 to hydrate this conversation first. Safe to call from any + /// thread; the value is read once at the start of `scheduleFullHydration`. + func prioritizeHydration(id: String) { + cacheLock.lock() + prioritizedHydrationId = id + cacheLock.unlock() + } + private func scheduleFullHydration() { ioQueue.async { [weak self] in guard let self = self else { return } // Snapshot ids to hydrate from current cache. self.cacheLock.lock() - let ids = self.orderedIds + var ids = self.orderedIds + // Move the prioritized id to the front so the active conversation + // hydrates before everything else. + if let prio = self.prioritizedHydrationId, + let idx = ids.firstIndex(of: prio), idx != 0 { + ids.remove(at: idx) + ids.insert(prio, at: 0) + } for id in ids where !self.hydratedIds.contains(id) { self.inflightHydrations.insert(id) } @@ -682,14 +723,22 @@ final class ConversationFileStore: ConversationStore { // Evict cache entries whose files disappeared, except those with // pending writes (the disk write probably hasn't landed yet). + // Use a "marked for eviction" set: don't evict on the first miss + // — only evict if the file was also missing on the previous refresh. + // This prevents transient iCloud download/rename operations from + // evicting the active conversation mid-sync. cacheLock.lock() let cachedIds = Set(cache.keys) - let evictable = cachedIds.subtracting(onDiskIds).subtracting(pendingWrites) - for id in evictable { + let missingIds = cachedIds.subtracting(onDiskIds).subtracting(pendingWrites) + let confirmedEvictions = missingIds.intersection(pendingEvictions) + for id in confirmedEvictions { cache.removeValue(forKey: id) hydratedIds.remove(id) anyChange = true } + // IDs missing for the first time are staged; they'll be evicted on + // the next refresh if they're still gone. + pendingEvictions = missingIds.subtracting(confirmedEvictions) if anyChange { recomputeOrderedIdsLocked() } cacheLock.unlock() diff --git a/LoopIOS/Data/ConversationTitleService.swift b/LoopIOS/Data/ConversationTitleService.swift index f214030..cf72f08 100644 --- a/LoopIOS/Data/ConversationTitleService.swift +++ b/LoopIOS/Data/ConversationTitleService.swift @@ -153,7 +153,7 @@ final class ConversationTitleService { guard msg.role == role else { return false } guard !msg.content.isEmpty else { return false } guard msg.onboardingCard == nil else { return false } - if msg.id.hasPrefix("image-") || msg.id.hasPrefix("pdf-") { return false } + if msg.id.hasPrefix("image-") || msg.id.hasPrefix("pdf-") || msg.id.hasPrefix("story-") { return false } return true } diff --git a/LoopIOS/Data/FireworksChat.swift b/LoopIOS/Data/FireworksChat.swift index b57a0ad..b3ef76f 100644 --- a/LoopIOS/Data/FireworksChat.swift +++ b/LoopIOS/Data/FireworksChat.swift @@ -59,6 +59,8 @@ final class FireworksChat { func chat(messages: [MessageStruct], tools: [[String: Any]]? = nil, + modelIDOverride: String? = nil, + modelStampOverride: String? = nil, onPartial: ((String) -> Void)? = nil, completion: @escaping (MessageStruct?, Error?) -> Void) { @@ -69,7 +71,10 @@ final class FireworksChat { return } - let modelID = ModelSelectionStore.current.apiModelID ?? "accounts/fireworks/models/kimi-k2p6" + // `modelIDOverride` lets a caller pin a specific Fireworks model + // regardless of the user's selection — used by the per-turn vision + // fallback (e.g. GLM 5.2 → Kimi K2.6 for an image turn). + let modelID = modelIDOverride ?? ModelSelectionStore.current.apiModelID ?? "accounts/fireworks/models/kimi-k2p6" var body: [String: Any] = [ "model": modelID, @@ -107,7 +112,7 @@ final class FireworksChat { let msg = MessageStruct( role: "assistant", content: r.content, - model: ModelSelectionStore.current.stampedMessageModel, + model: modelStampOverride ?? ModelSelectionStore.current.stampedMessageModel, functions: r.toolCalls, reasoningContent: r.reasoningContent, tokenUsage: r.usage, @@ -120,6 +125,7 @@ final class FireworksChat { let task = streamingSession.dataTask(with: req) streamingSessionDelegate.register(task: task, reader: reader) + LocalInferenceController.shared.track(task) task.resume() } diff --git a/LoopIOS/Data/LocalInferenceController.swift b/LoopIOS/Data/LocalInferenceController.swift new file mode 100644 index 0000000..1d79c56 --- /dev/null +++ b/LoopIOS/Data/LocalInferenceController.swift @@ -0,0 +1,49 @@ +// +// LocalInferenceController.swift +// Loop +// +// A tiny process-wide handle on the in-flight local inference request so the +// background-handoff path can hard-cancel it. The provider clients +// (AnthropicChat / OpenAIChat / FireworksChat) all create a single streaming +// `URLSessionDataTask` per turn but never retained it, so there was no way to +// stop a local turn when handing it off to a runner — it would keep running in +// iOS's background grace window and "win" the race. Each client now `track`s +// its task here; the handoff calls `cancelActive()`. +// +// We hold the task weakly: URLSession retains it until completion, so the weak +// ref stays valid while running and auto-clears once finished (cancel then +// no-ops). One active local turn at a time on device, so a single slot is +// enough. +// + +import Foundation + +final class LocalInferenceController { + static let shared = LocalInferenceController() + private init() {} + + private let lock = NSLock() + private weak var activeTask: URLSessionTask? + + /// Record the streaming task for the current local turn. + func track(_ task: URLSessionTask) { + lock.lock(); activeTask = task; lock.unlock() + } + + /// Cancel the in-flight local inference request, if any is still running. + /// Returns true if a running task was cancelled. The cancelled task's + /// completion fires with a cancellation error, which the handoff guard + /// (`abandonedLocalTurns`) swallows so no stray error message is written. + @discardableResult + func cancelActive() -> Bool { + lock.lock() + let task = activeTask + activeTask = nil + lock.unlock() + guard let task = task, task.state == .running || task.state == .suspended else { + return false + } + task.cancel() + return true + } +} diff --git a/LoopIOS/Data/ModelSelection.swift b/LoopIOS/Data/ModelSelection.swift index f151ddb..ce931dc 100644 --- a/LoopIOS/Data/ModelSelection.swift +++ b/LoopIOS/Data/ModelSelection.swift @@ -94,8 +94,9 @@ enum ModelSelection: String, CaseIterable { case claudeSonnet46 = "claudeSonnet46" case claudeHaiku45 = "claudeHaiku45" - // Fireworks — Kimi K2.6 served via Fireworks inference. + // Fireworks — models served via Fireworks inference. case fireworksKimiK26 = "fireworksKimiK26" + case fireworksGLM52 = "fireworksGLM52" var provider: ModelProvider { switch self { @@ -105,7 +106,7 @@ enum ModelSelection: String, CaseIterable { return .openAI case .claudeOpus47, .claudeSonnet46, .claudeHaiku45: return .anthropic - case .fireworksKimiK26: + case .fireworksKimiK26, .fireworksGLM52: return .fireworks } } @@ -122,6 +123,7 @@ enum ModelSelection: String, CaseIterable { case .claudeSonnet46: return "Claude Sonnet 4.6" case .claudeHaiku45: return "Claude Haiku 4.5" case .fireworksKimiK26: return "Kimi K2.6" + case .fireworksGLM52: return "GLM 5.2" } } @@ -140,6 +142,7 @@ enum ModelSelection: String, CaseIterable { case .claudeSonnet46: return "claude-sonnet-4-6" case .claudeHaiku45: return "claude-haiku-4-5-20251001" case .fireworksKimiK26: return "accounts/fireworks/models/kimi-k2p6" + case .fireworksGLM52: return "accounts/fireworks/models/glm-5p2" } } @@ -166,6 +169,7 @@ enum ModelSelection: String, CaseIterable { case .claudeSonnet46: return 200_000 case .claudeHaiku45: return 200_000 case .fireworksKimiK26: return 131_072 + case .fireworksGLM52: return 1_048_576 } } @@ -193,6 +197,53 @@ enum ModelSelection: String, CaseIterable { case .fireworks: return .fireworks } } + + /// Whether this model can accept image input. Drives the per-turn vision + /// fallback: a turn that must send a raw image is routed to a vision-capable + /// model when the selected one can't see images (see + /// `AgentHarness.chat`). If a provider adds/removes vision support, this is + /// the one place to update. + var supportsVision: Bool { + switch self { + case .appleFoundation: + return false + case .gpt55, .gpt51, .gpt41, .gpt4o: + return true + case .claudeOpus47, .claudeSonnet46, .claudeHaiku45: + return true + case .fireworksKimiK26: + return true + case .fireworksGLM52: + // GLM 5.2 on Fireworks is text-only — image turns fall back to Kimi. + return false + } + } + + /// Whether this model can actually run right now: a hosted model needs its + /// API key configured; Apple needs the on-device model available. + var isUsable: Bool { + guard let key = requiredKey else { + return ModelProvider.isAppleFoundationAvailable + } + return KeyStore.shared.source(for: key) != .missing + } + + /// A vision-capable, currently-usable model to handle an image turn when the + /// selected model can't. Prefers the same provider (so keys/billing stay + /// consistent — e.g. GLM 5.2 → Kimi K2.6 on Fireworks), then falls back to + /// the first usable vision model on any hosted provider. Returns nil when no + /// vision-capable model is configured. Apple is never returned (no vision). + static func visionCapableFallback(preferring provider: ModelProvider) -> ModelSelection? { + if let sameProvider = models(for: provider).first(where: { $0.supportsVision && $0.isUsable }) { + return sameProvider + } + for other in [ModelProvider.anthropic, .openAI, .fireworks] where other != provider { + if let model = models(for: other).first(where: { $0.supportsVision && $0.isUsable }) { + return model + } + } + return nil + } } enum ModelSelectionStore { diff --git a/LoopIOS/Data/OpenAIChat.swift b/LoopIOS/Data/OpenAIChat.swift index fa00ff4..8726ad8 100644 --- a/LoopIOS/Data/OpenAIChat.swift +++ b/LoopIOS/Data/OpenAIChat.swift @@ -71,6 +71,8 @@ final class OpenAIChat { /// a tool. func chat(messages: [MessageStruct], tools: [[String: Any]]? = nil, + modelIDOverride: String? = nil, + modelStampOverride: String? = nil, onPartial: ((String) -> Void)? = nil, completion: @escaping (MessageStruct?, Error?) -> Void) { @@ -83,8 +85,10 @@ final class OpenAIChat { // The selected OpenAI model's wire id. Falls back to gpt-5.5 only if // somehow called for a non-OpenAI selection (routing shouldn't let - // that happen). - let modelID = ModelSelectionStore.current.apiModelID ?? "gpt-5.5" + // that happen). `modelIDOverride` lets background callers (e.g. + // VisionSummaryService) pin a cheaper vision model regardless of the + // user's current selection. + let modelID = modelIDOverride ?? ModelSelectionStore.current.apiModelID ?? "gpt-5.5" var body: [String: Any] = [ "model": modelID, @@ -138,7 +142,7 @@ final class OpenAIChat { let msg = MessageStruct( role: "assistant", content: r.content, - model: ModelSelectionStore.current.stampedMessageModel, + model: modelStampOverride ?? ModelSelectionStore.current.stampedMessageModel, functions: r.toolCalls, reasoningContent: r.reasoningContent, tokenUsage: r.usage, @@ -151,6 +155,7 @@ final class OpenAIChat { let task = streamingSession.dataTask(with: req) streamingSessionDelegate.register(task: task, reader: reader) + LocalInferenceController.shared.track(task) task.resume() } @@ -166,7 +171,15 @@ final class OpenAIChat { /// Reused by sibling OpenAI-compatible clients (FireworksChat) — the /// wire shape is identical so there's no point duplicating the mapping. static func wireMessages(from messages: [MessageStruct]) -> [[String: Any]] { - return sanitizeToolCallPairing(stripUIPlaceholders(messages)).map { m -> [String: Any] in + let prepared = sanitizeToolCallPairing(stripUIPlaceholders(messages)) + // Id of the most recent genuine human turn. An image rides inline at + // full resolution only while its message is this last user turn; on + // later turns we swap the base64 `image_url` for its cached text + // description (see the image branch below). Tool results are + // `role:"function"`, so an in-turn agent loop keeps the image raw for + // the whole turn that introduced it. + let lastUserMessageId = prepared.last { $0.role == "user" }?.id + return prepared.map { m -> [String: Any] in if m.role == "function" { if let toolCallId = m.callId, !toolCallId.isEmpty { return [ @@ -247,16 +260,27 @@ final class OpenAIChat { if let f = m.fileAttachment, f.status == .ready, f.kind == .image, let dataURL = imageDataURL(for: f) { - // Real OpenAI vision: a `content` array with the text context - // plus an `image_url` part carrying the bytes inline, so the - // model actually sees the image instead of just a path hint. - let text = m.content.isEmpty - ? f.assistantHint - : "\(m.content)\n\n\(f.assistantHint)" - out["content"] = [ - ["type": "text", "text": text], - ["type": "image_url", "image_url": ["url": dataURL]], - ] + // Downgrade the image to its cached description on every turn + // after the one that introduced it — unless no summary exists + // yet, in which case we re-send the bytes so the model never + // loses sight of it (correctness over token savings). + let isIntroducingTurn = (m.id == lastUserMessageId) + if !isIntroducingTurn, let summaryHint = f.imageSummaryHint { + out["content"] = m.content.isEmpty + ? summaryHint + : "\(m.content)\n\n\(summaryHint)" + } else { + // Real OpenAI vision: a `content` array with the text + // context plus an `image_url` part carrying the bytes + // inline, so the model actually sees the image. + let text = m.content.isEmpty + ? f.assistantHint + : "\(m.content)\n\n\(f.assistantHint)" + out["content"] = [ + ["type": "text", "text": text], + ["type": "image_url", "image_url": ["url": dataURL]], + ] + } } else { // Text-only: PDFs inline their extracted text via // `assistantHint`; an unreadable image degrades to the path @@ -285,7 +309,7 @@ final class OpenAIChat { // Identifier prefix is the authoritative signal — set by the // skill that inserted the placeholder, not derivable from any // user-controlled content. - if m.id.hasPrefix("image-") || m.id.hasPrefix("pdf-") { return false } + if m.id.hasPrefix("image-") || m.id.hasPrefix("pdf-") || m.id.hasPrefix("story-") { return false } return true } } diff --git a/LoopIOS/Data/PendingConversationOpen.swift b/LoopIOS/Data/PendingConversationOpen.swift new file mode 100644 index 0000000..7fd00e0 --- /dev/null +++ b/LoopIOS/Data/PendingConversationOpen.swift @@ -0,0 +1,43 @@ +// +// PendingConversationOpen.swift +// Loop +// +// One-shot box for a "open this conversation" request that arrives before +// MessagingVC (or the conversation store) is ready to honor it — the +// cold-start notification-tap path. A cron / runner-turn push is usually +// tapped while the app is killed: `didReceive` fires before the storyboard's +// MessagingVC is in the window hierarchy and before ConversationFileStore has +// hydrated, so the immediate `getConversation(by:)` lookup returns nil and the +// tap would otherwise be dropped. AppDelegate stashes the id here instead; +// MessagingVC drains it in `viewDidLoad` and again on `.conversationStoreDidBecomeReady`. +// +// Single slot (last write wins): if two notifications are tapped, the user +// wants the most recent conversation, so clobbering the earlier id is correct. +// + +import Foundation + +final class PendingConversationOpen { + static let shared = PendingConversationOpen() + private init() {} + + private var conversationId: String? + private let lock = NSLock() + + /// Stash a conversation id to open once the chat surface is ready. + func set(_ id: String) { + lock.lock() + conversationId = id + lock.unlock() + } + + /// Atomically read + clear the stashed id, so a drain on view-load can't + /// race a drain on store-ready into opening the same conversation twice. + func take() -> String? { + lock.lock() + let id = conversationId + conversationId = nil + lock.unlock() + return id + } +} diff --git a/LoopIOS/Data/SimpleConversationManager.swift b/LoopIOS/Data/SimpleConversationManager.swift index 65c348f..81fcf8d 100644 --- a/LoopIOS/Data/SimpleConversationManager.swift +++ b/LoopIOS/Data/SimpleConversationManager.swift @@ -71,6 +71,10 @@ struct SimpleMessage: Codable { /// an app kill (reload coerces a stale `.generating` to a retryable /// `.failed`). let imageAttachment: String? + /// JSON-encoded `ImageGalleryAttachment` (web image-search results). Nil + /// unless `SerpImageSearchSkill` ran for this turn. Persisted so the inline + /// thumbnail gallery survives relaunch. + let imageGalleryAttachment: String? /// Which model produced the message — "GPT 5.5 Instant", "Apple LLM", /// etc. Optional for backward compatibility with NDJSON entries written /// before this field existed. When nil, callers default to whatever @@ -97,6 +101,7 @@ struct SimpleMessage: Codable { fileAttachment: String? = nil, mapAttachment: String? = nil, imageAttachment: String? = nil, + imageGalleryAttachment: String? = nil, model: String? = nil, isCompactionSummary: Bool? = nil, responseSeconds: TimeInterval? = nil, @@ -112,6 +117,7 @@ struct SimpleMessage: Codable { self.fileAttachment = fileAttachment self.mapAttachment = mapAttachment self.imageAttachment = imageAttachment + self.imageGalleryAttachment = imageGalleryAttachment self.model = model self.isCompactionSummary = isCompactionSummary self.responseSeconds = responseSeconds @@ -279,6 +285,32 @@ class SimpleConversationManager { } } + /// Persist a freshly generated vision description onto an image attachment + /// already stored in the conversation. `VisionSummaryService` calls this + /// off the chat-completion path once it has a description, so subsequent + /// turns can inline the text instead of re-sending the raw image. Looks the + /// message up by the attachment's `id` (which survives the SimpleMessage + /// round-trip), stamps `visionSummary`, and re-saves in place via + /// `updateMessage`. No-op — returns false — when the conversation or + /// attachment can't be found or a summary is already present. + @discardableResult + func updateAttachmentSummary(attachmentId: String, summary: String, conversationId: String) -> Bool { + guard let conversation = router.conversation(id: conversationId) else { return false } + for simple in router.messages(forConversation: conversationId) { + guard let attachmentString = simple.fileAttachment, + let data = attachmentString.data(using: .utf8), + let attachment = try? JSONDecoder().decode(FileAttachment.self, from: data), + attachment.id == attachmentId else { continue } + // Already summarized (e.g. two in-flight jobs raced) — leave it. + guard attachment.visionSummary == nil else { return false } + var message = messageStruct(from: simple) + message.fileAttachment?.visionSummary = summary + updateMessage(message, in: conversation) + return true + } + return false + } + /// Serializes a `MessageStruct` into the persisted `SimpleMessage` shape. /// Shared by `addMessage` (createdAt = now) and `updateMessage` (createdAt /// = the original, to keep ordering stable on rewrite). @@ -354,6 +386,16 @@ class SimpleConversationManager { } } + // Serialize the web image-search gallery so the thumbnail strip + // survives relaunch / sync (URLs only — images re-fetch on display). + var imageGalleryAttachmentString: String? = nil + if let attachment = messageStruct.imageGalleryAttachment { + if let data = try? JSONEncoder().encode(attachment), + let string = String(data: data, encoding: .utf8) { + imageGalleryAttachmentString = string + } + } + return SimpleMessage( id: messageStruct.id, role: messageStruct.role, @@ -366,10 +408,13 @@ class SimpleConversationManager { fileAttachment: fileAttachmentString, mapAttachment: mapAttachmentString, imageAttachment: imageAttachmentString, + imageGalleryAttachment: imageGalleryAttachmentString, // Persist model attribution so reload paths can show the same // "Apple LLM" / "GPT 5.5 Instant" badge that the live append - // path renders. Only meaningful for assistant turns. - model: messageStruct.role == "assistant" ? messageStruct.model : nil, + // path renders. Only meaningful for assistant turns. User turns + // reuse this column to carry the dictation byline ("Deepgram STT"/"Apple STT"), + // which is otherwise unused for the user side. + model: messageStruct.role == "assistant" ? messageStruct.model : messageStruct.sttEngine, isCompactionSummary: messageStruct.isCompactionSummary ? true : nil, responseSeconds: messageStruct.role == "assistant" ? messageStruct.ttft : nil, createdAt: createdAt @@ -483,12 +528,19 @@ class SimpleConversationManager { id: simpleMessage.id, role: simpleMessage.role, content: simpleMessage.content, - name: simpleMessage.name + name: simpleMessage.name, + timestamp: simpleMessage.createdAt ) // Old NDJSON rows have no `model` field — leave the MessageStruct // default in place for those so behavior is unchanged for old chats. + // User rows reuse this column for the dictation byline (see + // `toSimpleMessage`), so route it back to `sttEngine` there. if let stored = simpleMessage.model, !stored.isEmpty { - messageStruct.model = stored + if simpleMessage.role == "user" { + messageStruct.sttEngine = stored + } else { + messageStruct.model = stored + } } // Persisted response time renders next to the model name (e.g. "… 2.6s"). if let seconds = simpleMessage.responseSeconds { @@ -548,6 +600,12 @@ class SimpleConversationManager { messageStruct.imageAttachment = image } + if let galleryString = simpleMessage.imageGalleryAttachment, + let galleryData = galleryString.data(using: .utf8), + let gallery = try? JSONDecoder().decode(ImageGalleryAttachment.self, from: galleryData) { + messageStruct.imageGalleryAttachment = gallery + } + if simpleMessage.isCompactionSummary == true { messageStruct.isCompactionSummary = true } diff --git a/LoopIOS/Data/SpeechProvider.swift b/LoopIOS/Data/SpeechProvider.swift index bd87ae6..5fb5876 100644 --- a/LoopIOS/Data/SpeechProvider.swift +++ b/LoopIOS/Data/SpeechProvider.swift @@ -128,7 +128,12 @@ enum TTSProvider: String, CaseIterable { ("Adam (deep male)", "pNInz6obpgDQGcFmaJgB"), ("Antoni (calm male)", "ErXwobaYiN019PkySvjV"), ("Elli (soft female)", "MF3mGyEYCl7XYWbV9V6O"), - ("Josh (steady male)", "TxGEqnHWrfWFTfGW9XjX") + ("Josh (steady male)", "TxGEqnHWrfWFTfGW9XjX"), + ("Hayes (english male)", "sIivXWc5MTlPIP3kJXhg"), + ("Rory (irish male)", "hmMWXCj9K7N5mCPcRkfC"), + ("Hannah (american female)", "ZSNL4hPqCnqoMPaI4jGX"), + ("Zoe (african american female)", "M6ic45wruJGWAxLFEMNK"), + ("Agent (secret agent male)", "ICIc5IiEgLitxGwyb7ZG") ] case .openAIMiniTTS: return ["alloy", "echo", "fable", "onyx", "nova", @@ -144,7 +149,7 @@ enum TTSProvider: String, CaseIterable { switch self { case .aura2: return "aura-2-thalia-en" case .elevenLabsV3: return "21m00Tcm4TlvDq8ikWAM" - case .elevenLabsFlashV25: return "21m00Tcm4TlvDq8ikWAM" + case .elevenLabsFlashV25: return "ZSNL4hPqCnqoMPaI4jGX" // Hannah (american female) case .openAIMiniTTS: return "shimmer" case .system: return "" } @@ -162,8 +167,8 @@ enum TTSProviderStore { static var current: TTSProvider { get { - let raw = iCloudKVSDefaults.shared.string(forKey: defaultsKey) ?? TTSProvider.openAIMiniTTS.rawValue - return TTSProvider(rawValue: raw) ?? .openAIMiniTTS + let raw = iCloudKVSDefaults.shared.string(forKey: defaultsKey) ?? TTSProvider.elevenLabsFlashV25.rawValue + return TTSProvider(rawValue: raw) ?? .elevenLabsFlashV25 } set { iCloudKVSDefaults.shared.set(newValue.rawValue, forKey: defaultsKey) diff --git a/LoopIOS/Data/VisionSummaryService.swift b/LoopIOS/Data/VisionSummaryService.swift new file mode 100644 index 0000000..457eca5 --- /dev/null +++ b/LoopIOS/Data/VisionSummaryService.swift @@ -0,0 +1,181 @@ +// +// VisionSummaryService.swift +// Loop +// +// Generates a one-time text description of a user-uploaded image so the local +// chat clients can stop re-sending the raw base64 image on every turn. +// +// The problem: `AnthropicChat.wirePayload` / `OpenAIChat.wireMessages` rebuild +// the whole conversation each turn, and an image attachment re-emits its full +// base64 block every time — so a long thread pays the image's input tokens on +// every request. The fix: send the image at full resolution only on the turn +// that introduced it (the last human turn), and on later turns inline a cached +// prose description instead. This service produces that description. +// +// Flow: `MessagingVC` calls `ensureSummaries(for:conversationId:)` right after +// it sends a user message. For each ready image attachment that doesn't yet +// have a `visionSummary`, we fire a single background vision call (Anthropic or +// OpenAI, whichever the user has keyed), then persist the result onto the +// attachment via `SimpleConversationManager.updateAttachmentSummary`. By the +// user's next turn the summary is usually ready; if it isn't (fast follow-up), +// the chat clients fall back to re-sending the raw image, so the model never +// silently loses sight of it. On vision failure / no provider key we fall back +// to the attachment's on-device OCR text when present. +// +// Idempotent: an in-flight guard plus the "already has a summary" check mean +// re-invoking on every turn (or on conversation reload) is safe. +// + +import Foundation + +final class VisionSummaryService { + static let shared = VisionSummaryService() + private init() {} + + /// Serializes access to `inFlight` and ensures we only launch one job per + /// attachment id even when `ensureSummaries` is called from several turns + /// in quick succession. + private let stateQueue = DispatchQueue(label: "loop.vision-summary.state") + private var inFlight: Set = [] + + /// Cheap, vision-capable model per provider. We deliberately don't reuse the + /// user's selected chat model — it may be text-only (the default is often + /// Fireworks GLM 5.2) — and a fast small model is plenty for a descriptive + /// caption. Picked per-provider so the summary bills to a key the user + /// already has. + private static let anthropicVisionModel = "claude-haiku-4-5-20251001" + private static let openAIVisionModel = "gpt-4o" + private static let fireworksVisionModel = "accounts/fireworks/models/kimi-k2p6" + + private static let prompt = """ + Describe this image in thorough detail so that someone who cannot see it \ + could confidently answer follow-up questions about it. Transcribe any text \ + verbatim, and cover layout, objects, people, colors, and any numbers, data, \ + or values present. Respond with plain prose only — no preamble, no headings, \ + no markdown. + """ + + /// Scan `messages` for ready image attachments lacking a description and + /// kick off a background job for each. Safe to call repeatedly. + func ensureSummaries(for messages: [MessageStruct], conversationId: String) { + let pending = messages.compactMap { m -> FileAttachment? in + guard let f = m.fileAttachment, + f.kind == .image, + f.status == .ready, + f.visionSummary == nil else { return nil } + return f + } + guard !pending.isEmpty else { return } + for attachment in pending { + stateQueue.async { + guard !self.inFlight.contains(attachment.id) else { return } + self.inFlight.insert(attachment.id) + self.generate(for: attachment, conversationId: conversationId) + } + } + } + + // MARK: - Generation + + private func generate(for attachment: FileAttachment, conversationId: String) { + guard let target = Self.visionTarget() else { + // No vision-capable provider configured — best we can do is the + // on-device OCR text, if any. + finishWithOCRFallback(attachment, conversationId: conversationId) + return + } + + // A single synthetic user turn carrying just the image. Because it's the + // only (hence last) user message, the chat clients send it at full + // resolution — exactly what we want for the description pass. + let probe = MessageStruct(role: "user", content: Self.prompt, fileAttachment: attachment) + + let handle: (MessageStruct?, Error?) -> Void = { [weak self] message, error in + guard let self = self else { return } + let text = message?.content.trimmingCharacters(in: .whitespacesAndNewlines) + if let text = text, !text.isEmpty { + self.finish(attachmentId: attachment.id, + summary: Self.capped(text), + conversationId: conversationId) + } else { + if let error = error { + print("VisionSummaryService: vision call failed (\(error.localizedDescription)); falling back to OCR") + } + self.finishWithOCRFallback(attachment, conversationId: conversationId) + } + } + + switch target.provider { + case .anthropic: + AnthropicChat.shared.chat(messages: [probe], tools: nil, + modelIDOverride: target.modelID, completion: handle) + case .openAI: + OpenAIChat.shared.chat(messages: [probe], tools: nil, + modelIDOverride: target.modelID, completion: handle) + case .fireworks: + FireworksChat.shared.chat(messages: [probe], tools: nil, + modelIDOverride: target.modelID, completion: handle) + case .apple: + // visionTarget() only ever returns hosted providers; keep the + // switch exhaustive. + finishWithOCRFallback(attachment, conversationId: conversationId) + } + } + + /// Use the image's on-device OCR text as the description when a real vision + /// call isn't possible. Leaves the attachment un-summarized (image keeps + /// being sent raw) when there's no text either. + private func finishWithOCRFallback(_ attachment: FileAttachment, conversationId: String) { + if let ocr = attachment.extractedText, !ocr.isEmpty { + finish(attachmentId: attachment.id, + summary: Self.capped("(On-device text extracted from the image — no full visual description available.)\n\(ocr)"), + conversationId: conversationId) + } else { + stateQueue.async { self.inFlight.remove(attachment.id) } + } + } + + private func finish(attachmentId: String, summary: String, conversationId: String) { + // Store mutation hops to main — matches every other call into + // SimpleConversationManager and keeps `_currentConversation` updates on + // the main thread (chat completions fire on a URLSession delegate queue). + DispatchQueue.main.async { + SimpleConversationManager.shared.updateAttachmentSummary( + attachmentId: attachmentId, summary: summary, conversationId: conversationId) + } + stateQueue.async { self.inFlight.remove(attachmentId) } + } + + // MARK: - Provider selection + + private struct VisionTarget { let provider: ModelProvider; let modelID: String } + + /// Pick a vision-capable provider + cheap model for the description pass. + /// Prefers the user's current provider when it's keyed (so the summary bills + /// to the account they're already using), then the first keyed of + /// Anthropic / OpenAI / Fireworks. Returns nil when none has a key — the + /// caller then falls back to on-device OCR. + private static func visionTarget() -> VisionTarget? { + let current = ModelSelectionStore.current.provider + for provider in [current, .anthropic, .openAI, .fireworks] { + switch provider { + case .anthropic where KeyStore.shared.source(for: .anthropic) != .missing: + return VisionTarget(provider: .anthropic, modelID: anthropicVisionModel) + case .openAI where KeyStore.shared.source(for: .openAI) != .missing: + return VisionTarget(provider: .openAI, modelID: openAIVisionModel) + case .fireworks where KeyStore.shared.source(for: .fireworks) != .missing: + return VisionTarget(provider: .fireworks, modelID: fireworksVisionModel) + default: + continue + } + } + return nil + } + + /// Keep descriptions from ballooning the payload — well above any real + /// caption, but a hard stop for a runaway OCR dump. + private static func capped(_ text: String) -> String { + let cap = FileAttachment.extractedTextCharCap + return text.count > cap ? String(text.prefix(cap)) : text + } +} diff --git a/LoopIOS/Feed/Card+Display.swift b/LoopIOS/Feed/Card+Display.swift new file mode 100644 index 0000000..80dc75b --- /dev/null +++ b/LoopIOS/Feed/Card+Display.swift @@ -0,0 +1,85 @@ +// +// Card+Display.swift +// Loop +// +// UI-facing derivations for a Card: the one-line summary, the kind badge, and +// the icon tile's symbol + tint. The model itself stays UI-agnostic; these +// inferred values back both the card list rows and the detail view. +// + +#if os(iOS) +import UIKit + +extension Card { + + /// First meaningful line of the body, stripped of markdown markers, used as + /// the one-line summary under the title. + var displaySubtitle: String? { + for raw in body.split(whereSeparator: \.isNewline) { + var s = raw.trimmingCharacters(in: .whitespaces) + // Drop leading heading / list / quote markers. + while let first = s.first, "#-*•>".contains(first) { + s = String(s.dropFirst()).trimmingCharacters(in: .whitespaces) + } + // Drop a leading checkbox. + if s.hasPrefix("[ ]") || s.hasPrefix("[x]") || s.hasPrefix("[X]") { + s = String(s.dropFirst(3)).trimmingCharacters(in: .whitespaces) + } + // Drop a leading "1." style ordinal. + if let dot = s.firstIndex(of: "."), dot != s.startIndex, + s[s.startIndex..= 2 ? "LIST" : "NOTE" + } + } + + /// SF Symbol + tint for the icon tile, inferred from the title and tags. + var displayIcon: (symbol: String, tint: UIColor) { + let haystack = (title + " " + tags.joined(separator: " ")).lowercased() + let rules: [(keys: [String], symbol: String, tint: UIColor)] = [ + (["dinner", "food", "meal", "recipe", "cook", "prep", "kitchen"], "fork.knife", UIColor(red: 0.70, green: 0.45, blue: 0.28, alpha: 1)), + (["music", "playlist", "song", "track"], "music.note", UIColor(red: 0.78, green: 0.60, blue: 0.30, alpha: 1)), + (["wine", "drink", "cocktail", "bar"], "wineglass", UIColor(red: 0.66, green: 0.30, blue: 0.34, alpha: 1)), + (["guest", "people", "friend", "person", "contact"], "person.2.fill", UIColor(red: 0.40, green: 0.52, blue: 0.74, alpha: 1)), + (["task", "todo", "checklist", "done"], "checklist", UIColor(red: 0.38, green: 0.60, blue: 0.45, alpha: 1)), + (["travel", "trip", "flight", "map", "world", "news"], "airplane", UIColor(red: 0.36, green: 0.54, blue: 0.70, alpha: 1)), + (["note", "idea", "thought"], "note.text", UIColor(red: 0.55, green: 0.45, blue: 0.72, alpha: 1)), + ] + for rule in rules where rule.keys.contains(where: haystack.contains) { + return (rule.symbol, rule.tint) + } + // Stable fallback tint from the id so cards keep distinct colors. + let palette: [UIColor] = [ + UIColor(red: 0.55, green: 0.45, blue: 0.72, alpha: 1), + UIColor(red: 0.40, green: 0.52, blue: 0.74, alpha: 1), + UIColor(red: 0.38, green: 0.60, blue: 0.45, alpha: 1), + UIColor(red: 0.70, green: 0.45, blue: 0.28, alpha: 1), + ] + let idx = abs(id.hashValue) % palette.count + return (kind == .image ? "photo" : "doc.text", palette[idx]) + } +} + +#endif diff --git a/LoopIOS/Feed/Card.swift b/LoopIOS/Feed/Card.swift new file mode 100644 index 0000000..e08d719 --- /dev/null +++ b/LoopIOS/Feed/Card.swift @@ -0,0 +1,67 @@ +// +// Card.swift +// Loop +// +// Data model for Feed cards. Persisted as JSON in workspace://cards/.json. +// Image assets live at workspace://cards/assets/.png. +// + +import Foundation + +/// The visual kind of a card — determines which renderer produces the poster. +enum CardKind: String, Codable { + case image + case markdown +} + +/// Lifecycle state of a card in the user's feed. +enum CardState: String, Codable { + case new + case kept + case archived +} + +/// A single feed card produced by the `generate_card` tool. +struct Card: Codable, Identifiable { + let id: String + let kind: CardKind + var title: String + var body: String + /// Relative path to the rendered poster image inside workspace (e.g. + /// "cards/assets/.png"). Nil while the renderer is still working. + var imageURL: String? + /// Attribution / provenance string (e.g. "calendar", "user request"). + var source: String? + /// Freeform tags for filtering/search. + var tags: [String] + let createdAt: Date + var state: CardState + + enum CodingKeys: String, CodingKey { + case id, kind, title, body + case imageURL = "image_url" + case source, tags + case createdAt = "created_at" + case state + } + + init(id: String = UUID().uuidString, + kind: CardKind, + title: String, + body: String, + imageURL: String? = nil, + source: String? = nil, + tags: [String] = [], + createdAt: Date = Date(), + state: CardState = .new) { + self.id = id + self.kind = kind + self.title = title + self.body = body + self.imageURL = imageURL + self.source = source + self.tags = tags + self.createdAt = createdAt + self.state = state + } +} diff --git a/LoopIOS/Feed/CardDetailViewController.swift b/LoopIOS/Feed/CardDetailViewController.swift new file mode 100644 index 0000000..929facd --- /dev/null +++ b/LoopIOS/Feed/CardDetailViewController.swift @@ -0,0 +1,284 @@ +// +// CardDetailViewController.swift +// Loop +// +// Expanded detail for a single card, presented as a dark sheet. A header row +// carries the icon tile, kind badge, and a close button; the body renders the +// card's markdown; a pinned bottom bar offers Archive / Done. +// + +#if os(iOS) +import UIKit + +final class CardDetailViewController: UIViewController { + + private let card: Card + + /// Warm gold accent shared with the card list. + private let accent = FeedCardListView.accent + /// Near-black sheet background. + private let sheetBackground = UIColor(red: 0.05, green: 0.05, blue: 0.055, alpha: 1) + + // MARK: - UI + + private let scrollView = UIScrollView() + private let contentStack = UIStackView() + + private let iconTile: UIView = { + let v = UIView() + v.layer.cornerRadius = 11 + v.layer.cornerCurve = .continuous + v.translatesAutoresizingMaskIntoConstraints = false + return v + }() + + private let iconView: UIImageView = { + let iv = UIImageView() + iv.contentMode = .scaleAspectFit + iv.preferredSymbolConfiguration = UIImage.SymbolConfiguration(pointSize: 18, weight: .semibold) + iv.translatesAutoresizingMaskIntoConstraints = false + return iv + }() + + private let badgeLabel: UILabel = { + let l = UILabel() + l.font = .systemFont(ofSize: 13, weight: .bold) + return l + }() + + private let titleLabel: UILabel = { + let l = UILabel() + l.font = .systemFont(ofSize: 32, weight: .bold) + l.textColor = .white + l.numberOfLines = 0 + return l + }() + + private let bodyLabel: UILabel = { + let l = UILabel() + l.font = .systemFont(ofSize: 17, weight: .regular) + l.textColor = UIColor(white: 0.78, alpha: 1) + l.numberOfLines = 0 + return l + }() + + private let divider: UIView = { + let v = UIView() + v.backgroundColor = UIColor(white: 1, alpha: 0.1) + v.translatesAutoresizingMaskIntoConstraints = false + return v + }() + + private let metaLabel: UILabel = { + let l = UILabel() + l.font = .systemFont(ofSize: 14, weight: .regular) + l.textColor = UIColor(white: 0.5, alpha: 1) + l.numberOfLines = 0 + return l + }() + + // MARK: - Init + + init(card: Card) { + self.card = card + super.init(nibName: nil, bundle: nil) + } + + required init?(coder: NSCoder) { + fatalError("init(coder:) has not been implemented") + } + + // MARK: - Lifecycle + + override func viewDidLoad() { + super.viewDidLoad() + view.backgroundColor = sheetBackground + navigationController?.setNavigationBarHidden(true, animated: false) + if let sheet = (navigationController ?? self).sheetPresentationController { + sheet.prefersGrabberVisible = true + } + setupLayout() + populate() + } + + override func viewWillAppear(_ animated: Bool) { + super.viewWillAppear(animated) + navigationController?.setNavigationBarHidden(true, animated: animated) + } + + // MARK: - Layout + + private func setupLayout() { + let header = makeHeader() + header.translatesAutoresizingMaskIntoConstraints = false + view.addSubview(header) + + let actionBar = makeActionBar() + actionBar.translatesAutoresizingMaskIntoConstraints = false + view.addSubview(actionBar) + + scrollView.translatesAutoresizingMaskIntoConstraints = false + scrollView.showsVerticalScrollIndicator = false + view.addSubview(scrollView) + + contentStack.axis = .vertical + contentStack.spacing = 18 + contentStack.alignment = .fill + contentStack.translatesAutoresizingMaskIntoConstraints = false + scrollView.addSubview(contentStack) + + contentStack.addArrangedSubview(titleLabel) + contentStack.addArrangedSubview(bodyLabel) + contentStack.addArrangedSubview(divider) + contentStack.addArrangedSubview(metaLabel) + contentStack.setCustomSpacing(22, after: bodyLabel) + contentStack.setCustomSpacing(14, after: divider) + + NSLayoutConstraint.activate([ + header.topAnchor.constraint(equalTo: view.safeAreaLayoutGuide.topAnchor, constant: 4), + header.leadingAnchor.constraint(equalTo: view.leadingAnchor, constant: 24), + header.trailingAnchor.constraint(equalTo: view.trailingAnchor, constant: -24), + + scrollView.topAnchor.constraint(equalTo: header.bottomAnchor, constant: 18), + scrollView.leadingAnchor.constraint(equalTo: view.leadingAnchor), + scrollView.trailingAnchor.constraint(equalTo: view.trailingAnchor), + scrollView.bottomAnchor.constraint(equalTo: actionBar.topAnchor), + + contentStack.topAnchor.constraint(equalTo: scrollView.topAnchor), + contentStack.leadingAnchor.constraint(equalTo: scrollView.leadingAnchor, constant: 24), + contentStack.trailingAnchor.constraint(equalTo: scrollView.trailingAnchor, constant: -24), + contentStack.bottomAnchor.constraint(equalTo: scrollView.bottomAnchor, constant: -24), + contentStack.widthAnchor.constraint(equalTo: scrollView.widthAnchor, constant: -48), + + divider.heightAnchor.constraint(equalToConstant: 1), + + actionBar.leadingAnchor.constraint(equalTo: view.leadingAnchor, constant: 24), + actionBar.trailingAnchor.constraint(equalTo: view.trailingAnchor, constant: -24), + actionBar.bottomAnchor.constraint(equalTo: view.safeAreaLayoutGuide.bottomAnchor, constant: -8), + ]) + } + + /// Icon tile + kind badge on the left, close button on the right. + private func makeHeader() -> UIView { + let container = UIView() + + iconTile.addSubview(iconView) + container.addSubview(iconTile) + container.addSubview(badgeLabel) + badgeLabel.translatesAutoresizingMaskIntoConstraints = false + + let close = UIButton(type: .system) + close.setImage(UIImage(systemName: "xmark", + withConfiguration: UIImage.SymbolConfiguration(pointSize: 15, weight: .semibold)), + for: .normal) + close.tintColor = UIColor(white: 0.85, alpha: 1) + close.backgroundColor = UIColor(white: 1, alpha: 0.1) + close.layer.cornerRadius = 18 + close.translatesAutoresizingMaskIntoConstraints = false + close.addTarget(self, action: #selector(closeTapped), for: .touchUpInside) + container.addSubview(close) + + NSLayoutConstraint.activate([ + iconTile.leadingAnchor.constraint(equalTo: container.leadingAnchor), + iconTile.centerYAnchor.constraint(equalTo: container.centerYAnchor), + iconTile.widthAnchor.constraint(equalToConstant: 40), + iconTile.heightAnchor.constraint(equalToConstant: 40), + container.heightAnchor.constraint(equalToConstant: 40), + + iconView.centerXAnchor.constraint(equalTo: iconTile.centerXAnchor), + iconView.centerYAnchor.constraint(equalTo: iconTile.centerYAnchor), + + badgeLabel.leadingAnchor.constraint(equalTo: iconTile.trailingAnchor, constant: 12), + badgeLabel.centerYAnchor.constraint(equalTo: container.centerYAnchor), + + close.trailingAnchor.constraint(equalTo: container.trailingAnchor), + close.centerYAnchor.constraint(equalTo: container.centerYAnchor), + close.widthAnchor.constraint(equalToConstant: 36), + close.heightAnchor.constraint(equalToConstant: 36), + ]) + return container + } + + /// Pinned Archive (gold-outlined) / Done (filled) buttons. + private func makeActionBar() -> UIView { + let archive = UIButton(type: .system) + archive.setTitle("Archive", for: .normal) + archive.setTitleColor(accent, for: .normal) + archive.titleLabel?.font = .systemFont(ofSize: 17, weight: .semibold) + archive.backgroundColor = accent.withAlphaComponent(0.08) + archive.layer.cornerRadius = 14 + archive.layer.cornerCurve = .continuous + archive.layer.borderWidth = 1 + archive.layer.borderColor = accent.withAlphaComponent(0.7).cgColor + archive.addTarget(self, action: #selector(archiveTapped), for: .touchUpInside) + + let done = UIButton(type: .system) + done.setTitle("Done", for: .normal) + done.setTitleColor(.white, for: .normal) + done.titleLabel?.font = .systemFont(ofSize: 17, weight: .semibold) + done.backgroundColor = UIColor(white: 1, alpha: 0.1) + done.layer.cornerRadius = 14 + done.layer.cornerCurve = .continuous + done.addTarget(self, action: #selector(doneTapped), for: .touchUpInside) + + let stack = UIStackView(arrangedSubviews: [archive, done]) + stack.axis = .horizontal + stack.spacing = 14 + stack.distribution = .fillEqually + archive.heightAnchor.constraint(equalToConstant: 56).isActive = true + done.heightAnchor.constraint(equalToConstant: 56).isActive = true + return stack + } + + // MARK: - Populate + + private func populate() { + let style = card.displayIcon + iconView.image = UIImage(systemName: style.symbol) + iconView.tintColor = style.tint.withAlphaComponent(0.95) + iconTile.backgroundColor = style.tint.withAlphaComponent(0.22) + + badgeLabel.attributedText = NSAttributedString( + string: card.displayBadge, + attributes: [.kern: 1.5, .foregroundColor: accent, + .font: UIFont.systemFont(ofSize: 13, weight: .bold)]) + + titleLabel.text = card.title + + if card.kind == .markdown { + bodyLabel.attributedText = CardMarkdown.attributed( + card.body, + bodyFont: .systemFont(ofSize: 17, weight: .regular), + textColor: UIColor(white: 0.82, alpha: 1), + headingColor: .white, + bulletColor: accent) + } else { + bodyLabel.text = card.body + } + + var meta = "\(card.kind.rawValue.capitalized) card" + if let source = card.source { meta += " · created from \(source)" } + let df = DateFormatter() + df.dateStyle = .medium + df.timeStyle = .none + meta += " · \(df.string(from: card.createdAt))" + metaLabel.text = meta + } + + // MARK: - Actions + + @objc private func closeTapped() { dismiss(animated: true) } + + @objc private func doneTapped() { + // "Done" acknowledges the card but keeps it in the feed. + CardStore.shared.updateState(id: card.id, state: .kept) + dismiss(animated: true) + } + + @objc private func archiveTapped() { + CardStore.shared.updateState(id: card.id, state: .archived) + dismiss(animated: true) + } +} + +#endif diff --git a/LoopIOS/Feed/CardMarkdown.swift b/LoopIOS/Feed/CardMarkdown.swift new file mode 100644 index 0000000..a21fc58 --- /dev/null +++ b/LoopIOS/Feed/CardMarkdown.swift @@ -0,0 +1,139 @@ +// +// CardMarkdown.swift +// Loop +// +// Lightweight Markdown → NSAttributedString converter for feed cards. Handles +// the block + inline subset that shows up in card bodies: headings (#, ##, +// ###), bullet and numbered lists, bold, italic, and inline code. Used by the +// markdown poster renderer and the card detail view so a markdown card reads +// as formatted text rather than literal `#`/`*` characters. +// +// Deliberately not a full CommonMark parser — Foundation's AttributedString +// markdown init collapses block structure (lists/headings lose their styling), +// which is exactly what we need to preserve here. +// + +#if os(iOS) +import UIKit + +enum CardMarkdown { + + /// Convert `markdown` to an attributed string laid out for a card. + /// - Parameters: + /// - bodyFont: base font for paragraph text; headings scale from it. + /// - textColor: color for body/list text. + /// - headingColor: color for heading lines (defaults to `textColor`). + static func attributed(_ markdown: String, + bodyFont: UIFont, + textColor: UIColor, + headingColor: UIColor? = nil, + bulletColor: UIColor? = nil) -> NSAttributedString { + let headingColor = headingColor ?? textColor + let bulletColor = bulletColor ?? textColor + let out = NSMutableAttributedString() + let lines = markdown + .replacingOccurrences(of: "\r\n", with: "\n") + .components(separatedBy: "\n") + + for (idx, raw) in lines.enumerated() { + let trimmed = raw.trimmingCharacters(in: .whitespaces) + + let para = NSMutableParagraphStyle() + para.lineSpacing = bodyFont.pointSize * 0.16 + para.paragraphSpacing = bodyFont.pointSize * 0.45 + + let content: NSAttributedString + if trimmed.hasPrefix("### ") { + let f = UIFont.systemFont(ofSize: bodyFont.pointSize * 1.15, weight: .semibold) + para.paragraphSpacingBefore = bodyFont.pointSize * 0.3 + content = inline(String(trimmed.dropFirst(4)), baseFont: f, color: headingColor) + } else if trimmed.hasPrefix("## ") { + let f = UIFont.systemFont(ofSize: bodyFont.pointSize * 1.35, weight: .bold) + para.paragraphSpacingBefore = bodyFont.pointSize * 0.4 + content = inline(String(trimmed.dropFirst(3)), baseFont: f, color: headingColor) + } else if trimmed.hasPrefix("# ") { + let f = UIFont.systemFont(ofSize: bodyFont.pointSize * 1.6, weight: .bold) + content = inline(String(trimmed.dropFirst(2)), baseFont: f, color: headingColor) + } else if trimmed.hasPrefix("- ") || trimmed.hasPrefix("* ") || trimmed.hasPrefix("• ") { + para.headIndent = bodyFont.pointSize * 1.2 // wrapped lines hang under text + let bullet = NSMutableAttributedString( + string: "• ", + attributes: [.font: bodyFont, .foregroundColor: bulletColor]) + bullet.append(inline(String(trimmed.dropFirst(2)), baseFont: bodyFont, color: textColor)) + content = bullet + } else if let rest = numberedListBody(trimmed) { + para.headIndent = bodyFont.pointSize * 1.4 + content = inline(rest, baseFont: bodyFont, color: textColor) + } else { + content = inline(trimmed, baseFont: bodyFont, color: textColor) + } + + let m = NSMutableAttributedString(attributedString: content) + m.addAttribute(.paragraphStyle, value: para, range: NSRange(location: 0, length: m.length)) + out.append(m) + if idx < lines.count - 1 { + out.append(NSAttributedString(string: "\n")) + } + } + return out + } + + // MARK: - Inline + + /// Parse `**bold**`, `*italic*` / `_italic_`, and `` `code` `` within a line. + private static func inline(_ text: String, baseFont: UIFont, color: UIColor) -> NSAttributedString { + let out = NSMutableAttributedString() + let chars = Array(text) + var i = 0 + var run = "" + var bold = false, italic = false, code = false + + func emit() { + guard !run.isEmpty else { return } + var font = baseFont + if code { + font = UIFont.monospacedSystemFont(ofSize: baseFont.pointSize * 0.92, + weight: bold ? .bold : .regular) + } else { + var traits: UIFontDescriptor.SymbolicTraits = [] + if bold { traits.insert(.traitBold) } + if italic { traits.insert(.traitItalic) } + if !traits.isEmpty, + let d = baseFont.fontDescriptor.withSymbolicTraits(traits) { + font = UIFont(descriptor: d, size: baseFont.pointSize) + } + } + out.append(NSAttributedString(string: run, attributes: [.font: font, .foregroundColor: color])) + run = "" + } + + while i < chars.count { + let c = chars[i] + if c == "*", i + 1 < chars.count, chars[i + 1] == "*" { + emit(); bold.toggle(); i += 2; continue + } + if c == "*" || c == "_" { + emit(); italic.toggle(); i += 1; continue + } + if c == "`" { + emit(); code.toggle(); i += 1; continue + } + run.append(c); i += 1 + } + emit() + return out + } + + /// If `line` is a numbered list item ("1. foo"), return the item with the + /// marker preserved; otherwise nil. + private static func numberedListBody(_ line: String) -> String? { + guard let dot = line.firstIndex(of: ".") else { return nil } + let digits = line[line.startIndex.. Void) { + // Stack new pills below any already-showing ones so several cards + // generated in a row each stay visible until tapped. + let existing = viewController.view.subviews.filter { $0 is PillView }.count + let topOffset = CGFloat(8 + existing * 44) + + let pill = PillView(cardId: cardId, onTap: onTap) + pill.translatesAutoresizingMaskIntoConstraints = false + viewController.view.addSubview(pill) + + NSLayoutConstraint.activate([ + pill.centerXAnchor.constraint(equalTo: viewController.view.centerXAnchor), + pill.topAnchor.constraint(equalTo: viewController.view.safeAreaLayoutGuide.topAnchor, constant: topOffset), + pill.heightAnchor.constraint(equalToConstant: 36), + ]) + + pill.alpha = 0 + pill.transform = CGAffineTransform(translationX: 0, y: -20) + + UIView.animate(withDuration: 0.4, delay: 0, usingSpringWithDamping: 0.8, initialSpringVelocity: 0) { + pill.alpha = 1 + pill.transform = .identity + } + } +} + +private final class PillView: UIView { + + private let cardId: String + private let onTap: (String) -> Void + + init(cardId: String, onTap: @escaping (String) -> Void) { + self.cardId = cardId + self.onTap = onTap + super.init(frame: .zero) + setupUI() + } + + required init?(coder: NSCoder) { + fatalError("init(coder:) has not been implemented") + } + + private func setupUI() { + backgroundColor = UIColor(red: 0.15, green: 0.13, blue: 0.22, alpha: 0.95) + layer.cornerRadius = 18 + + let label = UILabel() + label.text = "\u{2728} new card" + label.font = .systemFont(ofSize: 14, weight: .semibold) + label.textColor = .white + label.translatesAutoresizingMaskIntoConstraints = false + addSubview(label) + + NSLayoutConstraint.activate([ + label.leadingAnchor.constraint(equalTo: leadingAnchor, constant: 16), + label.trailingAnchor.constraint(equalTo: trailingAnchor, constant: -16), + label.centerYAnchor.constraint(equalTo: centerYAnchor), + ]) + + let tap = UITapGestureRecognizer(target: self, action: #selector(tapped)) + addGestureRecognizer(tap) + isUserInteractionEnabled = true + + // Shadow + layer.shadowColor = UIColor.black.cgColor + layer.shadowOpacity = 0.3 + layer.shadowOffset = CGSize(width: 0, height: 2) + layer.shadowRadius = 8 + } + + @objc private func tapped() { + onTap(cardId) + dismiss() + } + + func dismiss() { + UIView.animate(withDuration: 0.3, animations: { + self.alpha = 0 + self.transform = CGAffineTransform(translationX: 0, y: -20) + }) { _ in + self.removeFromSuperview() + } + } +} + +#endif diff --git a/LoopIOS/Feed/CardRenderer.swift b/LoopIOS/Feed/CardRenderer.swift new file mode 100644 index 0000000..b60f70b --- /dev/null +++ b/LoopIOS/Feed/CardRenderer.swift @@ -0,0 +1,65 @@ +// +// CardRenderer.swift +// Loop +// +// Pluggable renderer interface for cards. Each CardKind maps to a concrete +// renderer that produces a 4:3 landscape poster PNG. Future backends (HTML→image, +// Higgsfield, vectors) conform to CardRendering and register themselves. +// + +#if os(iOS) +import UIKit +#endif +import Foundation + +/// Protocol for card renderers. Each implementation takes a card's payload and +/// produces a poster image (4:3 landscape, 1200×900 default). +protocol CardRendering { + /// The kind this renderer handles. + var kind: CardKind { get } + + /// Render a poster image for the given card. Completion delivers the + /// absolute file URL of the saved PNG, or an error. + func render(card: Card, completion: @escaping (Result) -> Void) +} + +/// Registry that dispatches renderCard(kind, payload) to the correct backend. +/// v1 ships with image and markdown renderers; future kinds drop in without +/// changing the tool surface. +final class CardRendererRegistry { + static let shared = CardRendererRegistry() + + private var renderers: [CardKind: CardRendering] = [:] + + private init() { + #if os(iOS) + register(ImageCardRenderer()) + register(MarkdownCardRenderer()) + #endif + } + + func register(_ renderer: CardRendering) { + renderers[renderer.kind] = renderer + } + + /// Render a card using the appropriate backend. + func render(card: Card, completion: @escaping (Result) -> Void) { + guard let renderer = renderers[card.kind] else { + completion(.failure(CardRendererError.noRenderer(card.kind))) + return + } + renderer.render(card: card, completion: completion) + } + + enum CardRendererError: Error, LocalizedError { + case noRenderer(CardKind) + case renderFailed(String) + + var errorDescription: String? { + switch self { + case .noRenderer(let k): return "No renderer registered for kind '\(k.rawValue)'" + case .renderFailed(let m): return "Render failed: \(m)" + } + } + } +} diff --git a/LoopIOS/Feed/CardSkill.swift b/LoopIOS/Feed/CardSkill.swift new file mode 100644 index 0000000..b562481 --- /dev/null +++ b/LoopIOS/Feed/CardSkill.swift @@ -0,0 +1,345 @@ +// +// CardSkill.swift +// Loop +// +// Tool registration for `generate_card`. Creates a Card, persists it via +// CardStore, dispatches rendering via CardRendererRegistry, and posts a +// notification so the UI can show a pill alert. +// + +import Foundation + +final class CardSkill { + static let shared = CardSkill() + private init() {} + + // MARK: - System prompt fragment + + static let systemPromptFragment: String = """ +You can generate feed cards for the user using the `generate_card` tool. Cards \ +appear in the user's Feed tab as visual poster-style summaries they can swipe \ +through, keep, or archive. + +When to call: +- The user asks for a card, summary card, or visual recap on a topic. +- A natural summarization moment: "card on my day tomorrow", "card about X". + +Kinds: +- `image`: provide an `image_prompt` — a vivid descriptive prompt for image \ + generation. The title is overlaid on the generated image. +- `markdown`: the `body` is rendered as styled text on a dark poster. Good for \ + schedules, lists, quick-reference info. + +Rules: +- Pick `image` when the topic is visual/emotional (pets, scenery, inspiration). +- Pick `markdown` when the content is textual/informational (schedule, checklist). +- Keep `title` short (≤6 words). Body should be concise and scannable. +- Tags are optional lowercase keywords for future filtering. + +Reading & editing existing cards: +- Call `list_feed_cards` to see the cards currently visible in the user's feed \ + (newest first, archived excluded). It returns each card's `card_id`, kind, \ + title, body, tags, source, and state. Use it before editing so you target the \ + right card by `card_id`. +- Call `edit_card` with a `card_id` and only the fields you want to change \ + (`title`, `body`, `tags`, `source`, or `image_prompt` for image cards). \ + Omitted fields are left untouched. Edits re-render the card automatically. +- When the user says "update/change/fix that card" or names a card to revise, \ + list first if you don't already know the `card_id`, then edit it — don't \ + create a duplicate with `generate_card`. +""" + + // MARK: - Tool schema + + static let tools: [[String: Any]] = [ + [ + "type": "function", + "function": [ + "name": "generate_card", + "description": "Generate a visual feed card. Image cards produce an AI-generated poster; markdown cards render styled text. Cards appear in the Feed tab.", + "parameters": [ + "type": "object", + "properties": [ + "kind": [ + "type": "string", + "enum": ["image", "markdown"], + "description": "Card type: 'image' (AI-generated visual) or 'markdown' (styled text poster)." + ], + "title": [ + "type": "string", + "description": "Short card title (≤6 words)." + ], + "body": [ + "type": "string", + "description": "Card body text. For markdown cards this is the content rendered on the poster. For image cards this is a subtitle/caption." + ], + "image_prompt": [ + "type": "string", + "description": "Required when kind='image'. Vivid descriptive prompt for the image generator (subject, style, mood, lighting, colors)." + ], + "source": [ + "type": "string", + "description": "Optional attribution (e.g. 'calendar', 'user request')." + ], + "tags": [ + "type": "array", + "items": ["type": "string"], + "description": "Optional lowercase tags for filtering." + ] + ], + "required": ["kind", "title", "body"] + ] as [String: Any] + ] as [String: Any] + ], + [ + "type": "function", + "function": [ + "name": "list_feed_cards", + "description": "List the cards currently visible in the user's feed (non-archived, newest first). Returns each card's id, kind, title, body, tags, source, and state. Use before editing to find the right card_id.", + "parameters": [ + "type": "object", + "properties": [:] as [String: Any] + ] as [String: Any] + ] as [String: Any] + ], + [ + "type": "function", + "function": [ + "name": "edit_card", + "description": "Edit an existing feed card in place. Provide the card_id and only the fields to change; omitted fields are left as-is. The card re-renders automatically.", + "parameters": [ + "type": "object", + "properties": [ + "card_id": [ + "type": "string", + "description": "ID of the card to edit (from list_feed_cards or generate_card)." + ], + "title": [ + "type": "string", + "description": "New title (≤6 words)." + ], + "body": [ + "type": "string", + "description": "New body. For markdown cards this is the rendered content; for image cards prefer 'image_prompt'." + ], + "image_prompt": [ + "type": "string", + "description": "New image prompt for an image card. Changing this regenerates the poster image." + ], + "source": [ + "type": "string", + "description": "New attribution string." + ], + "tags": [ + "type": "array", + "items": ["type": "string"], + "description": "Replacement list of lowercase tags (replaces existing tags)." + ] + ], + "required": ["card_id"] + ] as [String: Any] + ] as [String: Any] + ] + ] + + static let toolNames: Set = ["generate_card", "list_feed_cards", "edit_card"] + + // MARK: - Routing + + func handles(functionName: String) -> Bool { + CardSkill.toolNames.contains(functionName) + } + + func handle(functionCall: FunctionCallStruct, + completion: @escaping (MessageStruct) -> Void) { + switch functionCall.name { + case "generate_card": + handleGenerate(functionCall: functionCall, completion: completion) + case "list_feed_cards": + handleList(completion: completion) + case "edit_card": + handleEdit(functionCall: functionCall, completion: completion) + default: + completion(MessageStruct(role: "function", + content: "Unknown card tool '\(functionCall.name)'.", + name: functionCall.name)) + } + } + + // MARK: - generate_card + + private func handleGenerate(functionCall: FunctionCallStruct, + completion: @escaping (MessageStruct) -> Void) { + let args = functionCall.arguments + + guard let kindStr = args["kind"] as? String, + let kind = CardKind(rawValue: kindStr) else { + completion(MessageStruct(role: "function", + content: "{\"error\":\"Missing or invalid 'kind'. Must be 'image' or 'markdown'.\"}", + name: "generate_card")) + return + } + + guard let title = args["title"] as? String, !title.isEmpty else { + completion(MessageStruct(role: "function", + content: "{\"error\":\"Missing 'title'.\"}", + name: "generate_card")) + return + } + + guard let body = args["body"] as? String, !body.isEmpty else { + completion(MessageStruct(role: "function", + content: "{\"error\":\"Missing 'body'.\"}", + name: "generate_card")) + return + } + + if kind == .image { + guard let imagePrompt = args["image_prompt"] as? String, !imagePrompt.isEmpty else { + completion(MessageStruct(role: "function", + content: "{\"error\":\"'image_prompt' is required when kind='image'.\"}", + name: "generate_card")) + return + } + // For image cards, the body used for rendering is the image_prompt + let card = Card(kind: kind, title: title, body: imagePrompt, + source: args["source"] as? String, + tags: (args["tags"] as? [String]) ?? []) + generateCard(card, displayBody: body, completion: completion) + } else { + let card = Card(kind: kind, title: title, body: body, + source: args["source"] as? String, + tags: (args["tags"] as? [String]) ?? []) + generateCard(card, displayBody: body, completion: completion) + } + } + + // MARK: - list_feed_cards + + private func handleList(completion: @escaping (MessageStruct) -> Void) { + let cards = CardStore.shared.feedCards + let payload: [[String: Any]] = cards.map { card in + [ + "card_id": card.id, + "kind": card.kind.rawValue, + "title": card.title, + "body": card.body, + "tags": card.tags, + "source": card.source ?? "", + "state": card.state.rawValue, + "created_at": CardStore.dateFormatter.string(from: card.createdAt) + ] + } + let result: [String: Any] = ["status": "ok", "count": cards.count, "cards": payload] + completion(MessageStruct(role: "function", content: Self.jsonString(result), name: "list_feed_cards")) + } + + // MARK: - edit_card + + private func handleEdit(functionCall: FunctionCallStruct, + completion: @escaping (MessageStruct) -> Void) { + let args = functionCall.arguments + + guard let cardId = args["card_id"] as? String, !cardId.isEmpty else { + completion(MessageStruct(role: "function", + content: "{\"error\":\"Missing 'card_id'.\"}", + name: "edit_card")) + return + } + + guard let existing = CardStore.shared.card(for: cardId) else { + completion(MessageStruct(role: "function", + content: "{\"error\":\"No card found with card_id '\(cardId)'. Call list_feed_cards to see valid ids.\"}", + name: "edit_card")) + return + } + + // For image cards the stored body IS the image prompt, so an edit to + // either field maps onto `body`. Track whether the rendered output needs + // regenerating (content changed). + let newTitle = (args["title"] as? String).flatMap { $0.isEmpty ? nil : $0 } + let newSource = args["source"] as? String + let newTags = args["tags"] as? [String] + let bodyArg = existing.kind == .image + ? (args["image_prompt"] as? String ?? args["body"] as? String) + : (args["body"] as? String) + let newBody = bodyArg.flatMap { $0.isEmpty ? nil : $0 } + + guard newTitle != nil || newBody != nil || newSource != nil || newTags != nil else { + completion(MessageStruct(role: "function", + content: "{\"error\":\"Nothing to edit. Provide at least one of: title, body, image_prompt, source, tags.\"}", + name: "edit_card")) + return + } + + let contentChanged = newTitle != nil || newBody != nil + + let updated = CardStore.shared.update(id: cardId) { card in + if let newTitle { card.title = newTitle } + if let newBody { card.body = newBody } + if let newSource { card.source = newSource } + if let newTags { card.tags = newTags } + } + + guard let updated else { + completion(MessageStruct(role: "function", + content: "{\"error\":\"Failed to update card '\(cardId)'.\"}", + name: "edit_card")) + return + } + + // Re-render so any stored poster stays in sync with the new content. + // Image cards only regenerate when their prompt actually changed (it's + // an expensive image-model call); markdown re-renders are cheap/local. + if contentChanged && (updated.kind == .markdown || newBody != nil) { + CardRendererRegistry.shared.render(card: updated) { _ in } + } + + let result: [String: Any] = [ + "status": "ok", + "card_id": updated.id, + "kind": updated.kind.rawValue, + "title": updated.title, + "tags": updated.tags, + "message": "Card updated." + ] + completion(MessageStruct(role: "function", content: Self.jsonString(result), name: "edit_card")) + } + + /// Serialize a JSON-object dictionary to a string, falling back to a minimal + /// ok payload on failure. + private static func jsonString(_ object: [String: Any]) -> String { + (try? JSONSerialization.data(withJSONObject: object)) + .flatMap { String(data: $0, encoding: .utf8) } ?? "{\"status\":\"ok\"}" + } + + // MARK: - Generation + + private func generateCard(_ card: Card, displayBody: String, + completion: @escaping (MessageStruct) -> Void) { + // Persist the card immediately (state: .new, no poster yet) + CardStore.shared.add(card) + + // Reply to the LLM right away so the turn unblocks + let result: [String: Any] = [ + "status": "ok", + "card_id": card.id, + "kind": card.kind.rawValue, + "title": card.title, + "message": "Card created and rendering poster. It will appear in the Feed shortly." + ] + let json = (try? JSONSerialization.data(withJSONObject: result)) + .flatMap { String(data: $0, encoding: .utf8) } ?? "{\"status\":\"ok\"}" + completion(MessageStruct(role: "function", content: json, name: "generate_card")) + + // Kick off async rendering + CardRendererRegistry.shared.render(card: card) { renderResult in + switch renderResult { + case .success: + print("CardSkill: poster rendered for card \(card.id)") + case .failure(let error): + print("CardSkill: render failed for card \(card.id): \(error.localizedDescription)") + } + } + } +} diff --git a/LoopIOS/Feed/CardStore.swift b/LoopIOS/Feed/CardStore.swift new file mode 100644 index 0000000..ddda325 --- /dev/null +++ b/LoopIOS/Feed/CardStore.swift @@ -0,0 +1,199 @@ +// +// CardStore.swift +// Loop +// +// Persistence layer for Feed cards. Cards are stored as individual JSON files +// at workspace://cards/.json. Thread-safe in-memory cache backed by disk. +// + +import Foundation + +final class CardStore { + static let shared = CardStore() + + /// Notification posted when a new card is added. Object is the Card. + static let cardAddedNotification = Notification.Name("CardStore.cardAdded") + /// Notification posted when a card's state changes. + static let cardUpdatedNotification = Notification.Name("CardStore.cardUpdated") + + private let cardsFolder = "cards" + private let assetsFolder = "cards/assets" + + private var cache: [String: Card] = [:] + private let lock = NSLock() + + /// ISO 8601 encoder/decoder with fractional seconds for round-trip fidelity. + static let dateFormatter: ISO8601DateFormatter = { + let f = ISO8601DateFormatter() + f.formatOptions = [.withInternetDateTime, .withFractionalSeconds] + return f + }() + + private static let encoder: JSONEncoder = { + let e = JSONEncoder() + e.outputFormatting = [.prettyPrinted, .sortedKeys] + e.dateEncodingStrategy = .custom { date, encoder in + var container = encoder.singleValueContainer() + try container.encode(CardStore.dateFormatter.string(from: date)) + } + return e + }() + + private static let decoder: JSONDecoder = { + let d = JSONDecoder() + d.dateDecodingStrategy = .custom { decoder in + let container = try decoder.singleValueContainer() + let str = try container.decode(String.self) + guard let date = CardStore.dateFormatter.date(from: str) else { + throw DecodingError.dataCorruptedError(in: container, debugDescription: "Invalid ISO 8601 date: \(str)") + } + return date + } + return d + }() + + private init() { + ensureDirectories() + loadAll() + } + + // MARK: - Public API + + /// All non-archived cards ordered strictly newest-first by creation date. + var feedCards: [Card] { + lock.lock(); defer { lock.unlock() } + return cache.values + .filter { $0.state != .archived } + .sorted { $0.createdAt > $1.createdAt } + } + + /// All cards including archived (for settings recovery). + var allCards: [Card] { + lock.lock(); defer { lock.unlock() } + return cache.values.sorted { $0.createdAt > $1.createdAt } + } + + /// Persist a new card and broadcast. + @discardableResult + func add(_ card: Card) -> Card { + lock.lock() + cache[card.id] = card + lock.unlock() + writeToDisk(card) + NotificationCenter.default.post(name: CardStore.cardAddedNotification, object: card) + return card + } + + /// Apply edits to an existing card's content and persist. The closure + /// receives the current card to mutate in place. Returns the updated card, + /// or nil if no card with `id` exists. + @discardableResult + func update(id: String, _ mutate: (inout Card) -> Void) -> Card? { + lock.lock() + guard var card = cache[id] else { lock.unlock(); return nil } + mutate(&card) + cache[id] = card + lock.unlock() + writeToDisk(card) + NotificationCenter.default.post(name: CardStore.cardUpdatedNotification, object: card) + return card + } + + /// Update a card's state (keep/archive) and persist. + func updateState(id: String, state: CardState) { + lock.lock() + guard var card = cache[id] else { lock.unlock(); return } + card.state = state + cache[id] = card + lock.unlock() + writeToDisk(card) + NotificationCenter.default.post(name: CardStore.cardUpdatedNotification, object: card) + } + + /// Permanently delete a card: removes it from the cache, deletes its JSON + /// and any rendered poster asset from disk, and broadcasts an update. + func remove(id: String) { + lock.lock() + let card = cache[id] + cache[id] = nil + lock.unlock() + guard let card = card else { return } + + let fm = FileManager.default + let jsonURL = Workspace.shared.rootURL + .appendingPathComponent(cardsFolder, isDirectory: true) + .appendingPathComponent("\(id).json") + try? fm.removeItem(at: jsonURL) + if let poster = posterURL(for: card) { + try? fm.removeItem(at: poster) + } + NotificationCenter.default.post(name: CardStore.cardUpdatedNotification, object: card) + } + + /// Update a card's image URL after rendering completes. + func updateImageURL(id: String, imageURL: String) { + lock.lock() + guard var card = cache[id] else { lock.unlock(); return } + card.imageURL = imageURL + cache[id] = card + lock.unlock() + writeToDisk(card) + NotificationCenter.default.post(name: CardStore.cardUpdatedNotification, object: card) + } + + /// Retrieve a card by id. + func card(for id: String) -> Card? { + lock.lock(); defer { lock.unlock() } + return cache[id] + } + + /// Absolute URL for a card's poster asset. + func posterURL(for card: Card) -> URL? { + guard let rel = card.imageURL else { return nil } + return resolveWorkspaceURL(rel) + } + + /// The workspace-relative path where a card's poster should be saved. + func posterRelativePath(for cardId: String) -> String { + return "\(assetsFolder)/\(cardId).png" + } + + // MARK: - Disk I/O + + private func ensureDirectories() { + let ws = Workspace.shared + let fm = FileManager.default + let cardsDir = ws.rootURL.appendingPathComponent(cardsFolder, isDirectory: true) + let assetsDir = ws.rootURL.appendingPathComponent(assetsFolder, isDirectory: true) + try? fm.createDirectory(at: cardsDir, withIntermediateDirectories: true) + try? fm.createDirectory(at: assetsDir, withIntermediateDirectories: true) + } + + private func writeToDisk(_ card: Card) { + let url = Workspace.shared.rootURL + .appendingPathComponent(cardsFolder, isDirectory: true) + .appendingPathComponent("\(card.id).json") + do { + let data = try CardStore.encoder.encode(card) + try data.write(to: url, options: .atomic) + } catch { + print("CardStore: failed to write \(card.id): \(error)") + } + } + + private func loadAll() { + let dir = Workspace.shared.rootURL.appendingPathComponent(cardsFolder, isDirectory: true) + let fm = FileManager.default + guard let files = try? fm.contentsOfDirectory(at: dir, includingPropertiesForKeys: nil) else { return } + for file in files where file.pathExtension == "json" { + guard let data = try? Data(contentsOf: file), + let card = try? CardStore.decoder.decode(Card.self, from: data) else { continue } + cache[card.id] = card + } + print("CardStore: loaded \(cache.count) cards from disk") + } + + private func resolveWorkspaceURL(_ relativePath: String) -> URL { + return Workspace.shared.rootURL.appendingPathComponent(relativePath) + } +} diff --git a/LoopIOS/Feed/FeedCardListView.swift b/LoopIOS/Feed/FeedCardListView.swift new file mode 100644 index 0000000..1901a50 --- /dev/null +++ b/LoopIOS/Feed/FeedCardListView.swift @@ -0,0 +1,361 @@ +// +// FeedCardListView.swift +// Loop +// +// Scannable vertical list of feed cards shown on the empty new-chat screen. +// Each row is a compact panel — icon tile, title, one-line summary, a kind +// badge, and hashtags — laid out to read at a glance. Swipe a row left to +// Archive or Delete; tap to open its detail. The list only renders when there +// are unarchived cards; otherwise the hero orb owns the empty state. +// +// This replaces the old Tinder-style swipe deck. The file kept its original +// `Feed/` home but the surface is now a list, not a stack. +// + +#if os(iOS) +import UIKit + +final class FeedCardListView: UIView { + + // MARK: - Callbacks + + /// Fired when a row is tapped — the host opens the card's detail. + var onTap: ((Card) -> Void)? + /// Fired when the user archives a card (swipe → Archive). The host updates + /// the card's state in the store. + var onArchive: ((Card) -> Void)? + /// Fired when the list becomes empty (last card archived / deleted) so the + /// host can restore the orb / empty state. + var onEmptied: (() -> Void)? + + // MARK: - State + + private var cards: [Card] = [] + + /// Warm gold used for badges and accents — matches the app's accent on dark + /// surfaces. + static let accent = UIColor(red: 0.82, green: 0.66, blue: 0.40, alpha: 1) + + // MARK: - Subviews + + private let tableView = UITableView(frame: .zero, style: .plain) + + private let titleLabel: UILabel = { + let l = UILabel() + l.font = .systemFont(ofSize: 30, weight: .bold) + l.textColor = .label + l.text = "Cards" + return l + }() + + private let countLabel: UILabel = { + let l = UILabel() + l.font = .systemFont(ofSize: 14, weight: .regular) + l.textColor = .secondaryLabel + return l + }() + + // MARK: - Init + + override init(frame: CGRect) { + super.init(frame: frame) + setup() + } + + required init?(coder: NSCoder) { fatalError("init(coder:) has not been implemented") } + + private func setup() { + backgroundColor = .clear + + let header = makeHeader() + header.translatesAutoresizingMaskIntoConstraints = false + addSubview(header) + + tableView.translatesAutoresizingMaskIntoConstraints = false + tableView.backgroundColor = .clear + tableView.separatorStyle = .none + tableView.showsVerticalScrollIndicator = false + tableView.dataSource = self + tableView.delegate = self + tableView.rowHeight = UITableView.automaticDimension + tableView.estimatedRowHeight = 96 + tableView.contentInset = UIEdgeInsets(top: 4, left: 0, bottom: 8, right: 0) + tableView.register(FeedCardCell.self, forCellReuseIdentifier: FeedCardCell.reuseID) + tableView.tableFooterView = makeFooter() + addSubview(tableView) + + NSLayoutConstraint.activate([ + header.topAnchor.constraint(equalTo: topAnchor), + header.leadingAnchor.constraint(equalTo: leadingAnchor), + header.trailingAnchor.constraint(equalTo: trailingAnchor), + + tableView.topAnchor.constraint(equalTo: header.bottomAnchor, constant: 8), + tableView.leadingAnchor.constraint(equalTo: leadingAnchor), + tableView.trailingAnchor.constraint(equalTo: trailingAnchor), + tableView.bottomAnchor.constraint(equalTo: bottomAnchor), + ]) + } + + private func makeHeader() -> UIView { + let container = UIView() + let textStack = UIStackView(arrangedSubviews: [titleLabel, countLabel]) + textStack.axis = .vertical + textStack.spacing = 2 + textStack.translatesAutoresizingMaskIntoConstraints = false + container.addSubview(textStack) + NSLayoutConstraint.activate([ + textStack.topAnchor.constraint(equalTo: container.topAnchor, constant: 8), + textStack.leadingAnchor.constraint(equalTo: container.leadingAnchor, constant: 20), + textStack.trailingAnchor.constraint(lessThanOrEqualTo: container.trailingAnchor, constant: -20), + textStack.bottomAnchor.constraint(equalTo: container.bottomAnchor, constant: -10), + ]) + return container + } + + private func makeFooter() -> UIView { + let footer = UIView(frame: CGRect(x: 0, y: 0, width: 320, height: 56)) + let label = UILabel() + label.font = .systemFont(ofSize: 13, weight: .regular) + label.textColor = .tertiaryLabel + label.textAlignment = .center + label.text = "Swipe a card left to Archive" + label.translatesAutoresizingMaskIntoConstraints = false + footer.addSubview(label) + NSLayoutConstraint.activate([ + label.centerXAnchor.constraint(equalTo: footer.centerXAnchor), + label.topAnchor.constraint(equalTo: footer.topAnchor, constant: 18), + ]) + return footer + } + + // MARK: - API + + /// Replace the list contents and refresh. + func setCards(_ cards: [Card]) { + self.cards = cards + updateCount() + tableView.reloadData() + } + + var isEmpty: Bool { cards.isEmpty } + + private func updateCount() { + let n = cards.count + countLabel.text = "\(n) card\(n == 1 ? "" : "s") · newest first" + } + + /// Drop the row at `index` with the given persistence action, then notify + /// the host if the list just emptied. + private func removeRow(at index: Int, persist: (Card) -> Void) { + guard cards.indices.contains(index) else { return } + let card = cards.remove(at: index) + persist(card) + updateCount() + tableView.deleteRows(at: [IndexPath(row: index, section: 0)], with: .left) + if cards.isEmpty { onEmptied?() } + } +} + +// MARK: - Data source / delegate + +extension FeedCardListView: UITableViewDataSource, UITableViewDelegate { + + func tableView(_ tableView: UITableView, numberOfRowsInSection section: Int) -> Int { + cards.count + } + + func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell { + let cell = tableView.dequeueReusableCell(withIdentifier: FeedCardCell.reuseID, for: indexPath) as! FeedCardCell + cell.configure(with: cards[indexPath.row]) + return cell + } + + func tableView(_ tableView: UITableView, didSelectRowAt indexPath: IndexPath) { + tableView.deselectRow(at: indexPath, animated: true) + guard cards.indices.contains(indexPath.row) else { return } + onTap?(cards[indexPath.row]) + } + + func tableView(_ tableView: UITableView, + trailingSwipeActionsConfigurationForRowAt indexPath: IndexPath) -> UISwipeActionsConfiguration? { + let archive = UIContextualAction(style: .destructive, title: "Archive") { [weak self] _, _, done in + self?.removeRow(at: indexPath.row) { card in self?.onArchive?(card) } + done(true) + } + archive.image = UIImage(systemName: "archivebox") + archive.backgroundColor = UIColor(red: 0.62, green: 0.47, blue: 0.20, alpha: 1) + + let config = UISwipeActionsConfiguration(actions: [archive]) + config.performsFirstActionWithFullSwipe = true + return config + } +} + +// MARK: - Card cell + +/// A single scannable card row: icon tile, title, summary, kind badge, tags. +private final class FeedCardCell: UITableViewCell { + + static let reuseID = "FeedCardCell" + + private let panel: UIView = { + let v = UIView() + v.backgroundColor = UIColor(red: 0.11, green: 0.11, blue: 0.12, alpha: 1) + v.layer.cornerRadius = 20 + v.layer.cornerCurve = .continuous + v.layer.borderWidth = 1 + v.layer.borderColor = UIColor(white: 1, alpha: 0.06).cgColor + return v + }() + + private let iconTile: UIView = { + let v = UIView() + v.layer.cornerRadius = 14 + v.layer.cornerCurve = .continuous + return v + }() + + private let iconView: UIImageView = { + let iv = UIImageView() + iv.contentMode = .scaleAspectFit + iv.preferredSymbolConfiguration = UIImage.SymbolConfiguration(pointSize: 22, weight: .semibold) + return iv + }() + + private let titleLabel: UILabel = { + let l = UILabel() + l.font = .systemFont(ofSize: 20, weight: .bold) + l.textColor = .white + l.numberOfLines = 2 + return l + }() + + private let subtitleLabel: UILabel = { + let l = UILabel() + l.font = .systemFont(ofSize: 15, weight: .regular) + l.textColor = .secondaryLabel + l.numberOfLines = 1 + return l + }() + + private let tagsLabel: UILabel = { + let l = UILabel() + l.font = .systemFont(ofSize: 13, weight: .regular) + l.textColor = .tertiaryLabel + l.numberOfLines = 1 + return l + }() + + private let badgeLabel: PaddingLabel = { + let l = PaddingLabel() + l.font = .systemFont(ofSize: 11, weight: .bold) + l.textColor = FeedCardListView.accent + l.backgroundColor = FeedCardListView.accent.withAlphaComponent(0.12) + l.layer.cornerRadius = 7 + l.layer.cornerCurve = .continuous + l.clipsToBounds = true + l.setContentHuggingPriority(.required, for: .horizontal) + l.setContentCompressionResistancePriority(.required, for: .horizontal) + return l + }() + + override init(style: UITableViewCell.CellStyle, reuseIdentifier: String?) { + super.init(style: style, reuseIdentifier: reuseIdentifier) + backgroundColor = .clear + selectionStyle = .none + contentView.backgroundColor = .clear + setupLayout() + } + + required init?(coder: NSCoder) { fatalError("init(coder:) has not been implemented") } + + private func setupLayout() { + panel.translatesAutoresizingMaskIntoConstraints = false + contentView.addSubview(panel) + + // Title and badge share the top row; the title wraps in the space left + // of the badge, which hugs the trailing edge. + let titleRow = UIStackView(arrangedSubviews: [titleLabel, badgeLabel]) + titleRow.axis = .horizontal + titleRow.alignment = .top + titleRow.spacing = 10 + + let textStack = UIStackView(arrangedSubviews: [titleRow, subtitleLabel, tagsLabel]) + textStack.axis = .vertical + textStack.alignment = .fill + textStack.spacing = 3 + textStack.setCustomSpacing(10, after: subtitleLabel) + textStack.translatesAutoresizingMaskIntoConstraints = false + + for v in [iconTile, textStack] { + v.translatesAutoresizingMaskIntoConstraints = false + panel.addSubview(v) + } + iconTile.addSubview(iconView) + iconView.translatesAutoresizingMaskIntoConstraints = false + + NSLayoutConstraint.activate([ + panel.topAnchor.constraint(equalTo: contentView.topAnchor, constant: 7), + panel.bottomAnchor.constraint(equalTo: contentView.bottomAnchor, constant: -7), + panel.leadingAnchor.constraint(equalTo: contentView.leadingAnchor, constant: 20), + panel.trailingAnchor.constraint(equalTo: contentView.trailingAnchor, constant: -20), + + iconTile.leadingAnchor.constraint(equalTo: panel.leadingAnchor, constant: 16), + iconTile.topAnchor.constraint(equalTo: panel.topAnchor, constant: 16), + iconTile.widthAnchor.constraint(equalToConstant: 52), + iconTile.heightAnchor.constraint(equalToConstant: 52), + + iconView.centerXAnchor.constraint(equalTo: iconTile.centerXAnchor), + iconView.centerYAnchor.constraint(equalTo: iconTile.centerYAnchor), + + textStack.leadingAnchor.constraint(equalTo: iconTile.trailingAnchor, constant: 14), + textStack.topAnchor.constraint(equalTo: panel.topAnchor, constant: 16), + textStack.trailingAnchor.constraint(equalTo: panel.trailingAnchor, constant: -16), + ]) + + // Panel height wraps the taller of the icon tile and the text block. + let stackBottom = textStack.bottomAnchor.constraint(equalTo: panel.bottomAnchor, constant: -16) + stackBottom.priority = .defaultHigh + let iconBottom = iconTile.bottomAnchor.constraint(lessThanOrEqualTo: panel.bottomAnchor, constant: -16) + NSLayoutConstraint.activate([stackBottom, iconBottom]) + } + + func configure(with card: Card) { + titleLabel.text = card.title + + let subtitle = card.displaySubtitle + subtitleLabel.text = subtitle + subtitleLabel.isHidden = (subtitle == nil) + + if card.tags.isEmpty { + tagsLabel.isHidden = true + } else { + tagsLabel.isHidden = false + tagsLabel.text = card.tags.map { "#\($0)" }.joined(separator: " ") + } + + badgeLabel.text = card.displayBadge + + let style = card.displayIcon + iconView.image = UIImage(systemName: style.symbol) + iconView.tintColor = style.tint.withAlphaComponent(0.95) + iconTile.backgroundColor = style.tint.withAlphaComponent(0.22) + } +} + +// MARK: - Small UI helpers + +/// Label with internal padding, used for the kind badge. +private final class PaddingLabel: UILabel { + private let inset = UIEdgeInsets(top: 4, left: 9, bottom: 4, right: 9) + override func drawText(in rect: CGRect) { + super.drawText(in: rect.inset(by: inset)) + } + override var intrinsicContentSize: CGSize { + let s = super.intrinsicContentSize + return CGSize(width: s.width + inset.left + inset.right, + height: s.height + inset.top + inset.bottom) + } +} + +#endif diff --git a/LoopIOS/Feed/ImageCardRenderer.swift b/LoopIOS/Feed/ImageCardRenderer.swift new file mode 100644 index 0000000..202b7ff --- /dev/null +++ b/LoopIOS/Feed/ImageCardRenderer.swift @@ -0,0 +1,106 @@ +// +// ImageCardRenderer.swift +// Loop +// +// v1 image renderer: pipes image_prompt through the existing generate_image +// pipeline at 4:3 (landscape aspect). Saves the result as a PNG poster in +// the workspace cards/assets/ folder. +// + +#if os(iOS) +import UIKit + +final class ImageCardRenderer: CardRendering { + let kind: CardKind = .image + + /// Poster dimensions — 4:3 landscape. + private let posterSize = "1536x1024" + + func render(card: Card, completion: @escaping (Result) -> Void) { + // The card body IS the image prompt for image-kind cards. + let prompt = card.body + guard !prompt.isEmpty else { + completion(.failure(CardRendererRegistry.CardRendererError.renderFailed("Empty image prompt"))) + return + } + + // Use the existing ImageGenerationService infrastructure but grab the + // raw image data instead of going through the chat host flow. + generateImageData(prompt: prompt) { result in + switch result { + case .success(let imageData): + let relativePath = CardStore.shared.posterRelativePath(for: card.id) + let url = Workspace.shared.rootURL.appendingPathComponent(relativePath) + do { + try imageData.write(to: url, options: .atomic) + CardStore.shared.updateImageURL(id: card.id, imageURL: relativePath) + completion(.success(url)) + } catch { + completion(.failure(error)) + } + case .failure(let error): + completion(.failure(error)) + } + } + } + + /// Hit the OpenAI image generation endpoint directly for card rendering. + /// Reuses the same API key and endpoint logic as ImageGenerationService. + private func generateImageData(prompt: String, completion: @escaping (Result) -> Void) { + guard let apiKey = KeyStore.shared.value(for: .openAI) else { + completion(.failure(CardRendererRegistry.CardRendererError.renderFailed("No OpenAI API key configured"))) + return + } + + let url = URL(string: "https://api.openai.com/v1/images/generations")! + var request = URLRequest(url: url) + request.httpMethod = "POST" + request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.timeoutInterval = 300 + + let payload: [String: Any] = [ + "model": "gpt-image-1", + "prompt": prompt, + "n": 1, + "size": posterSize, + "quality": "medium" + ] + + request.httpBody = try? JSONSerialization.data(withJSONObject: payload) + + URLSession.shared.dataTask(with: request) { data, response, error in + if let error = error { + completion(.failure(error)) + return + } + guard let data = data, + let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], + let dataArr = json["data"] as? [[String: Any]], + let first = dataArr.first else { + completion(.failure(CardRendererRegistry.CardRendererError.renderFailed("Unexpected API response"))) + return + } + + // Handle both b64_json and url responses + if let b64 = first["b64_json"] as? String, + let imageData = Data(base64Encoded: b64) { + completion(.success(imageData)) + } else if let urlStr = first["url"] as? String, + let imageURL = URL(string: urlStr) { + // Download from URL + URLSession.shared.dataTask(with: imageURL) { imgData, _, imgErr in + if let imgData = imgData { + completion(.success(imgData)) + } else { + completion(.failure(imgErr ?? CardRendererRegistry.CardRendererError.renderFailed("Failed to download image"))) + } + }.resume() + } else { + completion(.failure(CardRendererRegistry.CardRendererError.renderFailed("No image data in response"))) + } + }.resume() + } +} + +#endif diff --git a/LoopIOS/Feed/MarkdownCardRenderer.swift b/LoopIOS/Feed/MarkdownCardRenderer.swift new file mode 100644 index 0000000..297043f --- /dev/null +++ b/LoopIOS/Feed/MarkdownCardRenderer.swift @@ -0,0 +1,106 @@ +// +// MarkdownCardRenderer.swift +// Loop +// +// v1 markdown renderer: renders title + body markdown to a 4:3 poster-style +// PNG via UIKit offscreen render. Clean typography, Loop-branded dark +// background with white text. +// + +#if os(iOS) +import UIKit + +final class MarkdownCardRenderer: CardRendering { + let kind: CardKind = .markdown + + /// Poster dimensions — 4:3 landscape at 2x for retina. + private let posterWidth: CGFloat = 1200 + private let posterHeight: CGFloat = 900 + + func render(card: Card, completion: @escaping (Result) -> Void) { + DispatchQueue.main.async { [self] in + let image = renderPoster(title: card.title, body: card.body) + guard let pngData = image.pngData() else { + completion(.failure(CardRendererRegistry.CardRendererError.renderFailed("Failed to encode PNG"))) + return + } + + let relativePath = CardStore.shared.posterRelativePath(for: card.id) + let url = Workspace.shared.rootURL.appendingPathComponent(relativePath) + + DispatchQueue.global(qos: .userInitiated).async { + do { + try pngData.write(to: url, options: .atomic) + CardStore.shared.updateImageURL(id: card.id, imageURL: relativePath) + completion(.success(url)) + } catch { + completion(.failure(error)) + } + } + } + } + + /// Render a poster-style card with title + body using UIKit drawing. + private func renderPoster(title: String, body: String) -> UIImage { + let size = CGSize(width: posterWidth, height: posterHeight) + let renderer = UIGraphicsImageRenderer(size: size) + + return renderer.image { ctx in + // Dark gradient background (Loop-branded) + let bgColors = [ + UIColor(red: 0.08, green: 0.08, blue: 0.12, alpha: 1.0).cgColor, + UIColor(red: 0.12, green: 0.10, blue: 0.18, alpha: 1.0).cgColor, + ] + let gradient = CGGradient(colorsSpace: CGColorSpaceCreateDeviceRGB(), + colors: bgColors as CFArray, + locations: [0, 1])! + ctx.cgContext.drawLinearGradient(gradient, + start: .zero, + end: CGPoint(x: 0, y: size.height), + options: []) + + // Subtle accent bar at top + let accentColor = UIColor(red: 0.55, green: 0.36, blue: 1.0, alpha: 0.8) + accentColor.setFill() + UIBezierPath(rect: CGRect(x: 0, y: 0, width: size.width, height: 4)).fill() + + let margin: CGFloat = 60 + let textWidth = size.width - margin * 2 + + // Title + let titleFont = UIFont.systemFont(ofSize: 48, weight: .bold) + let titleAttrs: [NSAttributedString.Key: Any] = [ + .font: titleFont, + .foregroundColor: UIColor.white, + ] + let titleRect = CGRect(x: margin, y: margin + 20, width: textWidth, height: 120) + let titleStr = NSString(string: title) + titleStr.draw(with: titleRect, options: [.usesLineFragmentOrigin, .truncatesLastVisibleLine], attributes: titleAttrs, context: nil) + + // Body — rendered as formatted markdown (headings, bullets, bold) + // rather than literal characters. + let bodyFont = UIFont.systemFont(ofSize: 28, weight: .regular) + let bodyTop: CGFloat = margin + 160 + let bodyRect = CGRect(x: margin, y: bodyTop, width: textWidth, height: size.height - bodyTop - margin) + let bodyStr = CardMarkdown.attributed(body, + bodyFont: bodyFont, + textColor: UIColor(white: 0.85, alpha: 1.0), + headingColor: .white) + bodyStr.draw(with: bodyRect, options: [.usesLineFragmentOrigin, .truncatesLastVisibleLine], context: nil) + + // Loop watermark bottom-right + let wmFont = UIFont.systemFont(ofSize: 18, weight: .medium) + let wmAttrs: [NSAttributedString.Key: Any] = [ + .font: wmFont, + .foregroundColor: UIColor(white: 0.4, alpha: 1.0), + ] + let wm = NSString(string: "Loop") + let wmSize = wm.size(withAttributes: wmAttrs) + wm.draw(at: CGPoint(x: size.width - margin - wmSize.width, + y: size.height - margin + 10), + withAttributes: wmAttrs) + } + } +} + +#endif diff --git a/LoopIOS/Info.plist b/LoopIOS/Info.plist index 0c8b59c..7b3f832 100644 --- a/LoopIOS/Info.plist +++ b/LoopIOS/Info.plist @@ -41,6 +41,24 @@ $(OBSIDIAN_API_KEY) OBSIDIAN_BASE_URL $(OBSIDIAN_BASE_URL) + AGENT_MAIL_API_KEY + $(AGENT_MAIL_API_KEY) + AGENT_MAIL_INBOX + $(AGENT_MAIL_INBOX) + SSH_HOST + $(SSH_HOST) + SSH_PORT + $(SSH_PORT) + SSH_USERNAME + $(SSH_USERNAME) + SSH_NAME + $(SSH_NAME) + SSH_PRIVATE_KEY_B64 + $(SSH_PRIVATE_KEY_B64) + SSH_PASSPHRASE + $(SSH_PASSPHRASE) + LOOP_FLAG + $(LOOP_FLAG) NSUbiquitousContainers iCloud.com.bhat.intel @@ -120,6 +138,8 @@ Loop does not write to Health. This key is present only because the HealthKit framework requires it; the app never requests write access. NSSpeechRecognitionUsageDescription Loop transcribes the voice messages you record so you can talk to the assistant. When you pick Apple in Settings ▸ Model ▸ STT, transcription happens entirely on this device. + NSLocalNetworkUsageDescription + Loop connects to machines on your local network and Tailscale (e.g. your Mac) over SSH so you can run a terminal and drive your loop runner. BGTaskSchedulerPermittedIdentifiers com.bhat.intel.scheduler.prefetch diff --git a/LoopIOS/MainVC.swift b/LoopIOS/MainVC.swift index 921b6c6..9252838 100644 --- a/LoopIOS/MainVC.swift +++ b/LoopIOS/MainVC.swift @@ -24,6 +24,16 @@ class MainVC: MessagingVC { /// later calls run the pop when this flips. private var lastVisibilityEmpty: Bool? + /// Scannable card list shown on the empty new-chat screen. Replaces the + /// hero orb whenever there are unarchived feed cards; the orb steps back to + /// the nav bar. There is no Feed tab — this is where cards live. + private var feedCardList: FeedCardListView? + + /// True once the current empty-state list has been populated. Reset when + /// the chat becomes non-empty so the next new chat reloads a fresh list + /// rather than reloading on every layout pass. + private var feedStackLoaded = false + /// The immersive agent view when it is hosted as a child view /// controller (instead of a fullscreen modal). Kept alive so dismiss /// can tear it down cleanly. @@ -34,9 +44,35 @@ class MainVC: MessagingVC { setupAvatarTitleView() setupHeroAvatar() + setupFeedCardStack() wireAvatarToVoiceLoop() + // A card generated on a heartbeat / while sitting on a blank chat + // should slide a fresh deck in under the orb. Re-shuffle and refresh. + NotificationCenter.default.addObserver( + forName: CardStore.cardAddedNotification, object: nil, queue: .main + ) { [weak self] _ in + guard let self = self, self.visible_messages.isEmpty else { return } + self.feedStackLoaded = false + self.refreshAvatarVisibility(animated: true) + } + + // An edit (or archive) to a card while we're sitting on a blank chat + // should refresh the visible list. Deferred so it never reloads the + // table mid-swipe — the swipe handler removes its own row first, and + // this reconciles afterward against the latest store contents. + NotificationCenter.default.addObserver( + forName: CardStore.cardUpdatedNotification, object: nil, queue: .main + ) { [weak self] _ in + guard let self = self, self.visible_messages.isEmpty else { return } + DispatchQueue.main.asyncAfter(deadline: .now() + 0.4) { + guard self.visible_messages.isEmpty else { return } + self.feedStackLoaded = false + self.refreshAvatarVisibility(animated: false) + } + } + // Hero owns the empty state, nav-bar avatar owns the conversation // state. Exactly one is shown at a time, driven by whether there's // anything visible in the chat (system message and other invisibles @@ -108,23 +144,34 @@ class MainVC: MessagingVC { vc.didMove(toParent: self) agentLargeVC = vc - // Fade the chrome in while the pop flies. + // Fade the backdrop in quickly for continuity, but hold the chrome + // (labels, pill, ticker) back — it should arrive WITH the orb, not + // before it. Without this the chrome lands at full opacity in ~0.35s + // while the orb is still flying up over 0.65s, so the labels read as + // "showing up early." vc.view.alpha = 0 + vc.agentView.setChromeHidden(true, animated: false) vc.view.layoutIfNeeded() - UIView.animate(withDuration: 0.35) { vc.view.alpha = 1 } + UIView.animate(withDuration: 0.3) { vc.view.alpha = 1 } // Fly the orb from the nav-bar avatar to the hero position. if let source = avatar, let window = view.window { let dest = vc.agentView.avatar - AvatarPopAnimator.play(from: source, to: dest, in: window, duration: 0.65) { + AvatarPopAnimator.play(from: source, to: dest, in: window, duration: 0.65) { [weak vc] in // Leave the nav-bar orb hidden for the lifetime of the large // view. Unhiding it here made it flash back the instant the // hero finished flying in (the nav bar is hidden, but the // titleView orb still re-rendered). It's restored on dismiss — // the reverse pop unhides it on a tap, the pan path below does. dest.isHidden = false + // Orb has landed — now bring the chrome in. + vc?.agentView.setChromeHidden(false, animated: true) } + } else { + // No window to fly the orb in — just reveal the chrome immediately + // so it isn't stuck hidden. + vc.agentView.setChromeHidden(false, animated: false) } navigationController?.setNavigationBarHidden(true, animated: true) @@ -159,8 +206,10 @@ class MainVC: MessagingVC { animations: { vc.view.alpha = 0 if panDismiss { - let currentY = vc.view.transform.ty - vc.view.transform = CGAffineTransform(translationX: 0, y: max(currentY, 200)) + // The drag slides `agentView` (the pinned content subview), + // not the root view — so continue the slide from there. + let currentY = vc.agentView.transform.ty + vc.agentView.transform = CGAffineTransform(translationX: 0, y: max(currentY, 200)) } }, completion: { [weak self] _ in @@ -172,6 +221,17 @@ class MainVC: MessagingVC { ) } + /// A notification tap must land on the chat even if the large agent orb is + /// covering it — tear it down (no fly-back animation; the user is jumping + /// straight to a conversation, not casually dismissing the orb) before the + /// base class pops/dismisses to surface the chat. + override func bringChatToFront() { + if agentLargeVC != nil { + dismissAgentLargeView() + } + super.bringChatToFront() + } + /// Hero avatar that fills the empty-state slot above the message box. /// Same component as the nav-bar avatar at a larger scale — both drive /// off the shared VoiceLoopCoordinator, so they animate in lock-step. @@ -199,6 +259,42 @@ class MainVC: MessagingVC { self.heroAvatar = big } + /// Builds the card list and fills the empty-state area above the input bar, + /// so it reads as the primary empty-state element when cards exist. Hidden + /// until `refreshAvatarVisibility` decides there's a list to show. + private func setupFeedCardStack() { + let list = FeedCardListView() + list.translatesAutoresizingMaskIntoConstraints = false + list.isHidden = true + // Below the message box for the same reason as the hero — never cover + // the input bar. + view.insertSubview(list, belowSubview: messageBox) + + NSLayoutConstraint.activate([ + list.topAnchor.constraint(equalTo: view.safeAreaLayoutGuide.topAnchor), + list.leadingAnchor.constraint(equalTo: view.leadingAnchor), + list.trailingAnchor.constraint(equalTo: view.trailingAnchor), + list.bottomAnchor.constraint(equalTo: messageBox.topAnchor, constant: -8), + ]) + + list.onTap = { [weak self] card in + guard let self = self else { return } + let latest = CardStore.shared.card(for: card.id) ?? card + let detail = CardDetailViewController(card: latest) + let nav = UINavigationController(rootViewController: detail) + self.present(nav, animated: true) + } + list.onArchive = { card in + CardStore.shared.updateState(id: card.id, state: .archived) + } + list.onEmptied = { [weak self] in + // Last card cleared — bring the orb back. + self?.refreshAvatarVisibility(animated: true) + } + + self.feedCardList = list + } + /// Subscribes both avatars to the shared VoiceLoopCoordinator. Same /// state→mode mapping the Mac uses on its conversation window. private func wireAvatarToVoiceLoop() { @@ -240,6 +336,28 @@ class MainVC: MessagingVC { } } + /// Primary catch-all: fires whenever the message set is rebuilt from + /// scratch, including the cold-start restore that populates `messages` + /// from storage long after `viewDidLoad` ran (the "hero floating over + /// text on cold start" bug — that path never hit an explicit refresh + /// site). Snaps rather than pops, since these are state restorations, not + /// live interactions; the deliberate 3D pop stays reserved for send / open + /// / new-chat, which call `refreshAvatarVisibility()` with animation. + override func messagesDidReload() { + super.messagesDidReload() + refreshAvatarVisibility(animated: false) + } + + /// Belt-and-suspenders: keep the avatar in sync across re-layouts driven + /// by rotation, keyboard, or bounds changes. `refreshAvatarVisibility` is + /// idempotent, so the steady-state cost is one `visible_messages.isEmpty` + /// check. (This does NOT fire on a bare `tableView.reloadData()`, which is + /// why `messagesDidReload` above is the real fix for the cold-start case.) + override func viewDidLayoutSubviews() { + super.viewDidLayoutSubviews() + refreshAvatarVisibility(animated: false) + } + private func setHeroVisible(_ visible: Bool, animated: Bool = true) { guard let hero = heroAvatar else { return } if visible { @@ -319,12 +437,41 @@ class MainVC: MessagingVC { /// the two slots. Initial visibility, Reduce Motion, and non-animated /// callers take the plain alpha-fade path. private func refreshAvatarVisibility(animated: Bool = true) { - let isEmpty = self.visible_messages.isEmpty - let previous = lastVisibilityEmpty - lastVisibilityEmpty = isEmpty + let chatEmpty = self.visible_messages.isEmpty + + // Load (or reshuffle) the swipe deck the first time we land on a blank + // chat. Reset the flag when the chat fills so the next new chat deals a + // fresh deck — and so we never reload mid-swipe on a layout pass. + if chatEmpty { + if !feedStackLoaded { + feedCardList?.setCards(CardStore.shared.feedCards) + feedStackLoaded = true + } + } else { + feedStackLoaded = false + } + // The list owns the empty state when it has cards; otherwise the orb + // does. `heroShown` is the orb's truth — note it's false while the + // list is up, which is what relegates the orb to the nav bar. + let stackHasCards = chatEmpty && !(feedCardList?.isEmpty ?? true) + let heroShown = chatEmpty && !stackHasCards + feedCardList?.isHidden = !stackHasCards + + let previous = lastVisibilityEmpty let isInitial = (previous == nil) - let changed = previous != isEmpty + let changed = previous != heroShown + + // Idempotent: bail out when the hero's shown/hidden state hasn't moved. + // This makes the method cheap and safe to call on every layout pass + // (see viewDidLayoutSubviews), which is what guarantees the hero never + // lingers over a populated chat regardless of which path appended the + // messages — launch restore, a background VM/runner turn, a remote + // poll, etc. None of those route through an explicit refresh site. + guard isInitial || changed else { return } + + lastVisibilityEmpty = heroShown + let canPop = animated && changed && !isInitial @@ -332,8 +479,8 @@ class MainVC: MessagingVC { && view.window != nil guard canPop else { - setHeroVisible(isEmpty, animated: animated && !isInitial) - setNavAvatarVisible(!isEmpty, animated: animated && !isInitial) + setHeroVisible(heroShown, animated: animated && !isInitial) + setNavAvatarVisible(!heroShown, animated: animated && !isInitial) return } @@ -353,21 +500,21 @@ class MainVC: MessagingVC { let nav = avatar, let window = view.window else { - setHeroVisible(isEmpty, animated: true) - setNavAvatarVisible(!isEmpty, animated: true) + setHeroVisible(heroShown, animated: true) + setNavAvatarVisible(!heroShown, animated: true) return } - let source: UIView = isEmpty ? nav : hero - let dest: UIView = isEmpty ? hero : nav + let source: UIView = heroShown ? nav : hero + let dest: UIView = heroShown ? hero : nav AvatarPopAnimator.play(from: source, to: dest, in: window) { [weak self] in guard let self = self else { return } - // Final visibility: hero shows on empty, nav-bar shows on - // non-empty. The animator restored isHidden on both, so we set - // the canonical end state here without re-animating. - self.setHeroVisible(isEmpty, animated: false) - self.setNavAvatarVisible(!isEmpty, animated: false) + // Final visibility: hero shows when it owns the empty state, + // nav-bar shows otherwise (live chat or stack up). The animator + // restored isHidden on both, so set the canonical end state here. + self.setHeroVisible(heroShown, animated: false) + self.setNavAvatarVisible(!heroShown, animated: false) } } } diff --git a/LoopIOS/Markdown/MarkdownShareToolbar.swift b/LoopIOS/Markdown/MarkdownShareToolbar.swift new file mode 100644 index 0000000..07500ab --- /dev/null +++ b/LoopIOS/Markdown/MarkdownShareToolbar.swift @@ -0,0 +1,78 @@ +// +// MarkdownShareToolbar.swift +// Loop +// +// A minimal, reusable toolbar pinned to the bottom of markdown preview +// surfaces. Contains a single Share action that triggers the native share +// sheet with the raw markdown source text. Styled to match the existing +// Loop visual language — subtle separator, system background, SF Symbol +// icon. +// + +import UIKit + +final class MarkdownShareToolbar: UIView { + + // MARK: - Public + + /// Invoked when the user taps the Share button. The host is responsible + /// for presenting `UIActivityViewController` with the appropriate text. + var onShare: (() -> Void)? + + /// The frame of the share button in this view's coordinate space, for + /// anchoring the activity popover on iPad. + var shareButtonFrame: CGRect { shareButton.frame } + + // MARK: - Subviews + + private let separator = UIView() + private let shareButton: UIButton = { + var config = UIButton.Configuration.plain() + config.image = UIImage(systemName: "square.and.arrow.up") + config.preferredSymbolConfigurationForImage = UIImage.SymbolConfiguration( + pointSize: 16, weight: .medium) + config.baseForegroundColor = .label + config.contentInsets = NSDirectionalEdgeInsets(top: 10, leading: 14, bottom: 10, trailing: 14) + let b = UIButton(configuration: config) + b.accessibilityLabel = "Share" + return b + }() + + // MARK: - Init + + override init(frame: CGRect) { + super.init(frame: frame) + setup() + } + + required init?(coder: NSCoder) { fatalError("init(coder:) has not been implemented") } + + // MARK: - Setup + + private func setup() { + backgroundColor = .systemBackground + + separator.translatesAutoresizingMaskIntoConstraints = false + separator.backgroundColor = .separator + addSubview(separator) + + shareButton.translatesAutoresizingMaskIntoConstraints = false + shareButton.addTarget(self, action: #selector(shareTapped), for: .touchUpInside) + addSubview(shareButton) + + NSLayoutConstraint.activate([ + separator.topAnchor.constraint(equalTo: topAnchor), + separator.leadingAnchor.constraint(equalTo: leadingAnchor), + separator.trailingAnchor.constraint(equalTo: trailingAnchor), + separator.heightAnchor.constraint(equalToConstant: 1.0 / UIScreen.main.scale), + + shareButton.topAnchor.constraint(equalTo: separator.bottomAnchor, constant: 4), + shareButton.trailingAnchor.constraint(equalTo: trailingAnchor, constant: -8), + shareButton.bottomAnchor.constraint(equalTo: bottomAnchor, constant: -4), + ]) + + // Intrinsic height comes from the button; no explicit height needed. + } + + @objc private func shareTapped() { onShare?() } +} diff --git a/LoopIOS/MarkdownEditorViewController.swift b/LoopIOS/MarkdownEditorViewController.swift index 9452ee1..b05a9ba 100644 --- a/LoopIOS/MarkdownEditorViewController.swift +++ b/LoopIOS/MarkdownEditorViewController.swift @@ -75,6 +75,7 @@ final class MarkdownEditorViewController: UIViewController { private let textView = UITextView() private let activityIndicator = UIActivityIndicatorView(style: .large) + private let shareToolbar = MarkdownShareToolbar() private lazy var saveButton = UIBarButtonItem( barButtonSystemItem: .save, target: self, action: #selector(saveTapped)) @@ -97,6 +98,9 @@ final class MarkdownEditorViewController: UIViewController { super.viewDidLoad() view.backgroundColor = .systemBackground configureNavigationItem() + // Toolbar must be in the view hierarchy before configureTextView() + // constrains the text view's bottom to shareToolbar.topAnchor. + configureShareToolbar() configureTextView() configureActivityIndicator() observeKeyboard() @@ -145,10 +149,32 @@ final class MarkdownEditorViewController: UIViewController { textView.topAnchor.constraint(equalTo: view.safeAreaLayoutGuide.topAnchor), textView.leadingAnchor.constraint(equalTo: view.leadingAnchor), textView.trailingAnchor.constraint(equalTo: view.trailingAnchor), - textView.bottomAnchor.constraint(equalTo: view.bottomAnchor), + textView.bottomAnchor.constraint(equalTo: shareToolbar.topAnchor), ]) } + private func configureShareToolbar() { + shareToolbar.translatesAutoresizingMaskIntoConstraints = false + view.addSubview(shareToolbar) + NSLayoutConstraint.activate([ + shareToolbar.leadingAnchor.constraint(equalTo: view.leadingAnchor), + shareToolbar.trailingAnchor.constraint(equalTo: view.trailingAnchor), + shareToolbar.bottomAnchor.constraint(equalTo: view.safeAreaLayoutGuide.bottomAnchor), + ]) + shareToolbar.onShare = { [weak self] in + self?.shareMarkdownText() + } + } + + private func shareMarkdownText() { + guard isLoaded else { return } + let text = textView.text ?? "" + let vc = UIActivityViewController(activityItems: [text], applicationActivities: nil) + vc.popoverPresentationController?.sourceView = shareToolbar + vc.popoverPresentationController?.sourceRect = shareToolbar.shareButtonFrame + present(vc, animated: true) + } + private func configureActivityIndicator() { activityIndicator.translatesAutoresizingMaskIntoConstraints = false activityIndicator.hidesWhenStopped = true @@ -316,7 +342,8 @@ final class MarkdownEditorViewController: UIViewController { @objc private func keyboardWillChange(_ note: Notification) { guard let frame = note.userInfo?[UIResponder.keyboardFrameEndUserInfoKey] as? CGRect else { return } let overlap = max(0, view.bounds.maxY - view.convert(frame, from: nil).minY) - let inset = max(0, overlap - view.safeAreaInsets.bottom) + let toolbarHeight = shareToolbar.bounds.height + let inset = max(0, overlap - view.safeAreaInsets.bottom - toolbarHeight) textView.contentInset.bottom = inset textView.verticalScrollIndicatorInsets.bottom = inset } diff --git a/LoopIOS/MessageBox.swift b/LoopIOS/MessageBox.swift index 4e66925..e099e34 100644 --- a/LoopIOS/MessageBox.swift +++ b/LoopIOS/MessageBox.swift @@ -12,10 +12,20 @@ import PhotosUI import UniformTypeIdentifiers protocol MessageBoxDelegate: AnyObject { - func didSendMessageText(_ message: String) + /// `sttEngine` carries the engine label ("Deepgram STT"/"Apple STT") when + /// the text came from dictation, so the cell can show a transcription byline + /// under the bubble. `nil` for typed input. + func didSendMessageText(_ message: String, sttEngine: String?) func stopSpeech() } +extension MessageBoxDelegate { + /// Convenience for the typed-input path — no STT engine to attribute. + func didSendMessageText(_ message: String) { + didSendMessageText(message, sttEngine: nil) + } +} + enum MessageBoxState { case normal case recording @@ -67,7 +77,7 @@ class MessageBox: UIView { let recordingContainerView = UIView() let waveformView = UIView() let transcribingLabel = UILabel() - + // Recording state var currentState: MessageBoxState = .normal var audioRecorder: AVAudioRecorder? @@ -175,7 +185,7 @@ class MessageBox: UIView { attachmentChipView.addSubview(v) } - let recordingViews = [waveformView, transcribingLabel] + let recordingViews: [UIView] = [waveformView, transcribingLabel] for view in recordingViews { view.translatesAutoresizingMaskIntoConstraints = false self.recordingContainerView.addSubview(view) @@ -384,7 +394,7 @@ class MessageBox: UIView { transcribingLabel.textColor = .secondaryLabel transcribingLabel.font = UIFont.preferredFont(forTextStyle: .body) transcribingLabel.isHidden = true - + emptyLabel.text = "Ask anything" emptyLabel.textColor = .secondaryLabel emptyLabel.font = UIFont.preferredFont(forTextStyle: .body) @@ -623,6 +633,13 @@ class MessageBox: UIView { // floating over the recording container with the waveform between them. refreshInputButtons() + // Duck music synchronously BEFORE the earcon and state change so the + // earcon doesn't collide with a playing track. The notification-based + // duck in MusicController.handleVoiceLoopState fires asynchronously + // after the next run-loop tick; calling duckForVoiceSession() here + // ensures the pause lands first. + MusicController.shared.duckForVoiceSession() + VoiceLoopCoordinator.shared.setState(.recording) EarconPlayer.shared.play(.listenStart) @@ -737,9 +754,12 @@ class MessageBox: UIView { shouldTryDeepgram = (MessageBox.deepgramAPIKey != nil && MessageBox.isOnline) } if shouldTryDeepgram, beginStreamingRecording() { + VoiceLoopCoordinator.shared.setSTTEngine(.deepgram) return } + VoiceLoopCoordinator.shared.setSTTEngine(.apple) + // Setup audio session to allow background audio to continue playing // and earcons to be audible. // @@ -1005,7 +1025,10 @@ class MessageBox: UIView { print("Transcription successful: '\(transcribedText)'") if !transcribedText.isEmpty { - self?.delegate?.didSendMessageText(transcribedText) + self?.delegate?.didSendMessageText( + transcribedText, + sttEngine: VoiceLoopCoordinator.shared.activeSTTEngine?.displayLabel + ) } else { print("Transcription returned empty string") } @@ -1559,6 +1582,7 @@ extension MessageBox { /// Used on Deepgram WS error or finalize timeout. fileprivate func fallbackToSFSpeechOnFailure() { guard isStreamingSTT else { return } + VoiceLoopCoordinator.shared.setSTTEngine(.apple) if let engine = audioEngine, engine.isRunning { engine.inputNode.removeTap(onBus: 0) @@ -1608,7 +1632,10 @@ extension MessageBox { let final = text.trimmingCharacters(in: .whitespacesAndNewlines) teardownStreaming() if !final.isEmpty { - delegate?.didSendMessageText(final) + delegate?.didSendMessageText( + final, + sttEngine: VoiceLoopCoordinator.shared.activeSTTEngine?.displayLabel + ) } returnToNormalState() } diff --git a/LoopIOS/MessagingCell.swift b/LoopIOS/MessagingCell.swift index cfeb27a..9621db7 100644 --- a/LoopIOS/MessagingCell.swift +++ b/LoopIOS/MessagingCell.swift @@ -36,6 +36,20 @@ protocol MessagingCellPDFDelegate: AnyObject { func messagingCellDidTapPDFRetry(attachmentId: String) } +/// Tap-callbacks from the inline story card. Set on every cell that renders +/// a story attachment so MessagingVC can present the full-screen story player +/// or re-run a failed render. +protocol MessagingCellStoryDelegate: AnyObject { + func messagingCellDidTapStory(attachmentId: String) + func messagingCellDidTapStoryRetry(attachmentId: String) +} + +/// Tap-callback from a browse preview card. Set on cells carrying a browse +/// attachment so MessagingVC can present the full-screen live/replay player. +protocol MessagingCellBrowseDelegate: AnyObject { + func messagingCellDidTapBrowse(attachmentId: String) +} + /// Tap-callback from the inline "Used N tools" disclosure row. Set on cells /// that render an assistant turn carrying tool calls so MessagingVC can toggle /// the row's expanded/collapsed state. @@ -110,6 +124,35 @@ class MessagingCell: UITableViewCell { /// untouched. let ttsIndicator = UIActivityIndicatorView(style: .medium) + // MARK: - Swipe-to-reveal timestamp (iMessage-style) + /// Sits just off the cell's right edge; revealed when the chat is swiped + /// left. MessagingVC drives every visible cell's `contentView` translation + /// in lock-step from a single pan, so all bubbles slide together and these + /// labels come into view at once. Text is the message's posted time. + let timeLabel: UILabel = { + let l = UILabel() + l.font = .preferredFont(forTextStyle: .caption1) + l.textColor = .secondaryLabel + l.textAlignment = .right + l.translatesAutoresizingMaskIntoConstraints = false + return l + }() + private var timeLabelInstalled = false + /// Formats a message's `timestamp` for the swipe-reveal label. Shows the + /// short time for today's messages and a "M/d, h:mm a" stamp for older ones, + /// mirroring how iMessage labels its swiped timestamps. + private static let timeFormatter: DateFormatter = { + let f = DateFormatter() + f.dateStyle = .none + f.timeStyle = .short + return f + }() + private static let dayTimeFormatter: DateFormatter = { + let f = DateFormatter() + f.setLocalizedDateFormatFromTemplate("MMMd jmm") + return f + }() + // MARK: - Inline image attachment views (image_spec) let attachmentImageView = UIImageView() let attachmentSpinner = UIActivityIndicatorView(style: .large) @@ -200,6 +243,141 @@ class MessagingCell: UITableViewCell { private var currentPDFAttachmentId: String? weak var pdfDelegate: MessagingCellPDFDelegate? + // MARK: - Inline story card views (StorySkill) + /// Portrait "poster" card for a generated HTML story. The full story + /// renders in the full-screen player on tap; the card is a glanceable + /// stand-in (gradient + title + play glyph) with a spinner while + /// generating and a retry affordance on failure. + private lazy var storyCardView: UIView = { + let v = UIView() + v.translatesAutoresizingMaskIntoConstraints = false + v.layer.cornerRadius = 16 + v.clipsToBounds = true + v.backgroundColor = UIColor.secondarySystemBackground + v.isHidden = true + v.isUserInteractionEnabled = true + return v + }() + private lazy var storyGradientLayer: CAGradientLayer = { + let g = CAGradientLayer() + g.colors = [ + UIColor.systemIndigo.cgColor, + UIColor.systemPurple.cgColor, + UIColor.systemPink.cgColor, + ] + g.startPoint = CGPoint(x: 0, y: 0) + g.endPoint = CGPoint(x: 1, y: 1) + return g + }() + private lazy var storyPlayGlyph: UIImageView = { + let v = UIImageView() + v.translatesAutoresizingMaskIntoConstraints = false + let cfg = UIImage.SymbolConfiguration(pointSize: 34, weight: .semibold) + v.image = UIImage(systemName: "play.circle.fill", withConfiguration: cfg) + v.tintColor = UIColor.white.withAlphaComponent(0.95) + v.contentMode = .scaleAspectFit + return v + }() + private lazy var storyTitleLabel: UILabel = { + let l = UILabel() + l.translatesAutoresizingMaskIntoConstraints = false + l.font = UIFont.systemFont(ofSize: 17, weight: .bold) + l.textColor = .white + l.numberOfLines = 3 + return l + }() + private lazy var storySubtitleLabel: UILabel = { + let l = UILabel() + l.translatesAutoresizingMaskIntoConstraints = false + l.font = UIFont.preferredFont(forTextStyle: .caption1) + l.textColor = UIColor.white.withAlphaComponent(0.85) + l.numberOfLines = 2 + return l + }() + private lazy var storySpinner: UIActivityIndicatorView = { + let s = UIActivityIndicatorView(style: .medium) + s.translatesAutoresizingMaskIntoConstraints = false + s.color = .white + s.hidesWhenStopped = true + return s + }() + private lazy var storyRetryButton: UIButton = { + let b = makePDFActionButton(title: "Try again", + systemImage: "arrow.clockwise", + action: #selector(handleStoryRetryTap)) + return b + }() + private lazy var storyTapRecognizer: UITapGestureRecognizer = { + let r = UITapGestureRecognizer(target: self, action: #selector(handleStoryTap)) + r.numberOfTapsRequired = 1 + return r + }() + private var storyCardConstraints: [NSLayoutConstraint] = [] + private var currentStoryAttachmentId: String? + weak var storyDelegate: MessagingCellStoryDelegate? + + // MARK: - Inline browse preview card (browse) + private lazy var browseCardView: UIView = { + let v = UIView() + v.translatesAutoresizingMaskIntoConstraints = false + v.layer.cornerRadius = 16 + v.clipsToBounds = true + v.backgroundColor = UIColor.secondarySystemBackground + v.isHidden = true + v.isUserInteractionEnabled = true + return v + }() + private lazy var browseThumbnail: UIImageView = { + let v = UIImageView() + v.translatesAutoresizingMaskIntoConstraints = false + v.contentMode = .scaleAspectFill + v.clipsToBounds = true + v.backgroundColor = UIColor.tertiarySystemBackground + return v + }() + private lazy var browseSpinner: UIActivityIndicatorView = { + let s = UIActivityIndicatorView(style: .medium) + s.translatesAutoresizingMaskIntoConstraints = false + s.hidesWhenStopped = true + return s + }() + private lazy var browseGlyph: UIImageView = { + let v = UIImageView() + v.translatesAutoresizingMaskIntoConstraints = false + let cfg = UIImage.SymbolConfiguration(pointSize: 13, weight: .semibold) + v.image = UIImage(systemName: "globe", withConfiguration: cfg) + v.tintColor = .secondaryLabel + v.contentMode = .scaleAspectFit + return v + }() + private lazy var browseHostLabel: UILabel = { + let l = UILabel() + l.translatesAutoresizingMaskIntoConstraints = false + l.font = UIFont.systemFont(ofSize: 13, weight: .semibold) + l.textColor = .label + l.numberOfLines = 1 + l.lineBreakMode = .byTruncatingTail + return l + }() + private lazy var browsePill: PaddedLabel = { + let l = PaddedLabel() + l.translatesAutoresizingMaskIntoConstraints = false + l.font = UIFont.systemFont(ofSize: 11, weight: .bold) + l.textColor = .white + l.layer.cornerRadius = 8 + l.clipsToBounds = true + l.textAlignment = .center + return l + }() + private lazy var browseTapRecognizer: UITapGestureRecognizer = { + let r = UITapGestureRecognizer(target: self, action: #selector(handleBrowseTap)) + r.numberOfTapsRequired = 1 + return r + }() + private var browseCardConstraints: [NSLayoutConstraint] = [] + private var currentBrowseAttachmentId: String? + weak var browseDelegate: MessagingCellBrowseDelegate? + // MARK: - Inline map attachment views (MapsSkill) /// Caption label shown above the map (optional — hidden when nil). private lazy var mapTitleLabel: UILabel = { @@ -234,6 +412,46 @@ class MessagingCell: UITableViewCell { private var mapConstraints: [NSLayoutConstraint] = [] private var currentMapAttachmentId: String? + // MARK: - Image-gallery views (web image search) + + /// Caption shown above the gallery (the search query, e.g. "Alamo Square park"). + private lazy var galleryTitleLabel: UILabel = { + let l = UILabel() + l.translatesAutoresizingMaskIntoConstraints = false + l.font = UIFont.systemFont(ofSize: 15, weight: .semibold) + l.textColor = .label + l.numberOfLines = 2 + l.isHidden = true + return l + }() + /// Horizontally-scrolling strip of thumbnails. Lazy so plain messages + /// don't pay for it. Tiles are rebuilt imperatively each render. + private lazy var galleryScrollView: ImageGalleryScrollView = { + let sv = ImageGalleryScrollView() + sv.translatesAutoresizingMaskIntoConstraints = false + sv.showsHorizontalScrollIndicator = false + sv.showsVerticalScrollIndicator = false + sv.clipsToBounds = false + sv.isHidden = true + return sv + }() + private lazy var galleryStack: UIStackView = { + let st = UIStackView() + st.translatesAutoresizingMaskIntoConstraints = false + st.axis = .horizontal + st.alignment = .center + st.spacing = 8 + return st + }() + private var galleryConstraints: [NSLayoutConstraint] = [] + private var currentGalleryAttachmentId: String? + /// Bumped on every gallery render so stale async thumbnail loads from a + /// recycled cell don't paint onto the wrong message's tiles. + private var galleryLoadToken: Int = 0 + /// Per-tile mapping (tile tag → full-resolution URL) for tap-to-open. + private var galleryOriginalURLs: [Int: URL] = [:] + private static let galleryThumbSide: CGFloat = 120 + private var currentAttachmentId: String? /// Set by `applyFileAttachment` (user upload) so the tap handler can /// open a QuickLook preview directly instead of routing through the @@ -253,6 +471,7 @@ class MessagingCell: UITableViewCell { v.translatesAutoresizingMaskIntoConstraints = false v.isHidden = true v.onTap = { [weak self] in self?.handleFilePreviewCardTap() } + v.onShare = { [weak self] in self?.handleFilePreviewCardShare() } return v }() private var filePreviewCardConstraints: [NSLayoutConstraint] = [] @@ -331,6 +550,12 @@ class MessagingCell: UITableViewCell { timer?.invalidate() timer = nil + // Reset the swipe-to-reveal slide so a recycled cell doesn't carry over + // a mid-swipe offset, and clear the stale time text. + timeRevealOffset = 0 + contentView.transform = .identity + timeLabel.text = nil + // Halt any in-flight type-on reveal so a recycled cell doesn't keep // fading into the wrong message. stopTypeOnReveal() @@ -387,6 +612,31 @@ class MessagingCell: UITableViewCell { NSLayoutConstraint.deactivate(pdfCardConstraints) pdfCardConstraints.removeAll() + // Story card cleanup. Hide + clear + restore the label colors the + // failure path overrides, so a recycled cell doesn't flash stale text. + storyCardView.isHidden = true + storyTitleLabel.text = nil + storyTitleLabel.textColor = .white + storySubtitleLabel.text = nil + storySubtitleLabel.textColor = UIColor.white.withAlphaComponent(0.85) + storySpinner.stopAnimating() + storySpinner.isHidden = true + storyRetryButton.isHidden = true + currentStoryAttachmentId = nil + NSLayoutConstraint.deactivate(storyCardConstraints) + storyCardConstraints.removeAll() + + // Browse card cleanup — same recycle hygiene as the story card. + browseCardView.isHidden = true + browseThumbnail.image = nil + browseHostLabel.text = nil + browsePill.text = nil + browseSpinner.stopAnimating() + browseSpinner.isHidden = true + currentBrowseAttachmentId = nil + NSLayoutConstraint.deactivate(browseCardConstraints) + browseCardConstraints.removeAll() + // Map cleanup — drop annotations so a recycled cell doesn't briefly // show the previous message's pins. mapView.removeAnnotations(mapView.annotations) @@ -397,6 +647,22 @@ class MessagingCell: UITableViewCell { NSLayoutConstraint.deactivate(mapConstraints) mapConstraints.removeAll() + // Image-gallery cleanup — invalidate in-flight thumbnail loads, tear + // down the tiles, and hide the strip so a recycled cell doesn't flash + // the previous message's images. + galleryLoadToken &+= 1 + galleryScrollView.isHidden = true + galleryTitleLabel.isHidden = true + galleryTitleLabel.text = nil + currentGalleryAttachmentId = nil + galleryOriginalURLs.removeAll() + for tile in galleryStack.arrangedSubviews { + galleryStack.removeArrangedSubview(tile) + tile.removeFromSuperview() + } + NSLayoutConstraint.deactivate(galleryConstraints) + galleryConstraints.removeAll() + // Onboarding-card cleanup. Same hide-and-reset pattern as the file // preview card — the view is reused across cells that render // different card kinds. @@ -517,6 +783,11 @@ class MessagingCell: UITableViewCell { self.addViews(views: [attachmentImageView, attachmentSpinner, attachmentErrorLabel, downloadButton, retryButton]) } + // Swipe-left reveals when this message was posted. Set on every render + // path (early-returns below all inherit it), so all bubble kinds slide + // out to the same timestamp column. + setTimeLabel(for: data.timestamp) + // Onboarding card takes its own dedicated path: a left-aligned text // bubble with the prompt and an interactive card pinned underneath. // Routed first so it bypasses the image / file / table branches @@ -543,6 +814,20 @@ class MessagingCell: UITableViewCell { return } + // Inline story card (generate_story). Renders as a portrait poster + // with title + play glyph; tap opens the full-screen story player. + if let storyAttachment = data.storyAttachment { + applyStoryAttachment(storyAttachment, modelLabelText: modelText(for: data)) + return + } + + // Inline browse preview card (browse). Shows the latest screenshot + + // host + a status pill; tap opens the full-screen live/replay player. + if let browseAttachment = data.browseAttachment { + applyBrowseAttachment(browseAttachment, modelLabelText: modelText(for: data)) + return + } + // Inline map embed — MKMapView with one pin per place. Callouts // open Apple Maps for that destination. if let mapAttachment = data.mapAttachment { @@ -550,6 +835,15 @@ class MessagingCell: UITableViewCell { return } + // Inline web-image-search gallery — a horizontal strip of thumbnails + // loaded from the result URLs; tap a thumbnail to open the full image. + if let galleryAttachment = data.imageGalleryAttachment { + // Attribution is the image source, not the chat model — these are + // real photos from Google Images (via SerpAPI), not model output. + applyImageGalleryAttachment(galleryAttachment, modelLabelText: "Google Images") + return + } + // File attachment. Renders on the user side (right-aligned) for // uploads, the assistant side for share_file results. The function- // role variant carries an LLM-only confirmation string in `content` @@ -639,16 +933,25 @@ class MessagingCell: UITableViewCell { animatingtextView.textContainer.widthTracksTextView = true - textView.attributedText = self.attributedString(from: data.content) + // Render the markdown once and share it between the base text view + // and the animating overlay. + let full = self.attributedString(from: data.content) + + // IMPORTANT: set `font`/`textColor` BEFORE assigning `attributedText`. + // On a UITextView these setters apply uniformly to the *existing* + // text, so setting them AFTER `attributedText` strips the per-range + // bold/link styling the markdown renderer produced. That left the + // base `textView` rendering in plain body weight while the overlay + // kept its bold — so the two wrapped differently and, with both + // visible, drew the text twice at mismatched positions (the + // "garbled / doubled text" bug, worst around bold/heading/link runs). textView.textColor = .label textView.font = UIFont.preferredFont(forTextStyle: .body) - textView.alpha = 0.1 textView.layer.borderWidth = 0 textView.layer.borderColor = UIColor.clear.cgColor - - animatingtextView.alpha = 1 + textView.attributedText = full + animatingtextView.textColor = .label -// animatingtextView.text = data.content animatingtextView.font = UIFont.preferredFont(forTextStyle: .body) let byline = modelText(for: data) @@ -669,14 +972,18 @@ class MessagingCell: UITableViewCell { // which re-ran markdown regex per tick and relaid out the table — // this renders markdown once, fixes the height via `textView`, and // only modulates per-word alpha. See docs/streaming-investigation.md. - let full = self.attributedString(from: data.content) if shouldAnimate, !UIAccessibility.isReduceMotionEnabled { + // Base layer is the invisible height anchor; the overlay fades + // the words in (startTypeOnReveal sets textView.alpha = 0). startTypeOnReveal(full: full) } else { + // No animation: render through the single base text view and + // keep the overlay fully hidden. Showing both at once draws the + // same text twice, which garbles whenever their wrapping diverges. stopTypeOnReveal() textView.alpha = 1.0 - animatingtextView.alpha = 1 - animatingtextView.attributedText = full + animatingtextView.alpha = 0 + animatingtextView.attributedText = nil } // Update content size after setting text @@ -689,17 +996,40 @@ class MessagingCell: UITableViewCell { animatingtextView.isHidden = true actionButton.isHidden = true shimmerLabel.isHidden = true - modelLabel.isHidden = true - + + // Dictation byline ("Deepgram STT"/"Apple STT") under the user bubble, reusing + // `modelLabel`. When present it drives the cell bottom; otherwise + // the text view pins to the bottom as before. + let sttByline = data.sttEngine?.trimmingCharacters(in: .whitespaces) + let hasSTTByline = !(sttByline ?? "").isEmpty + modelLabel.isHidden = !hasSTTByline + // Store and activate text view constraints for user messages textViewConstraints = [ textView.trailingAnchor.constraint(equalTo: self.contentView.trailingAnchor, constant: -10), textView.topAnchor.constraint(equalTo: self.contentView.topAnchor, constant: 6), textView.widthAnchor.constraint(lessThanOrEqualTo: self.contentView.widthAnchor, multiplier: 0.8, constant: -40), - textView.bottomAnchor.constraint(equalTo: self.contentView.bottomAnchor, constant: -6) ] - + if !hasSTTByline { + textViewConstraints.append( + textView.bottomAnchor.constraint(equalTo: self.contentView.bottomAnchor, constant: -6) + ) + } NSLayoutConstraint.activate(textViewConstraints) + + if hasSTTByline { + modelLabel.text = sttByline + modelLabel.textColor = .secondaryLabel + modelLabel.font = UIFont.preferredFont(forTextStyle: .caption2) + modelLabel.numberOfLines = 1 + baseModelText = sttByline + modelLabelConstraints = [ + modelLabel.trailingAnchor.constraint(equalTo: textView.trailingAnchor, constant: -2), + modelLabel.topAnchor.constraint(equalTo: textView.bottomAnchor, constant: 2), + self.contentView.bottomAnchor.constraint(equalTo: modelLabel.bottomAnchor, constant: 6), + ] + NSLayoutConstraint.activate(modelLabelConstraints) + } animatingtextView.alpha = 0 textView.alpha = 1 textView.textContainerInset = .init(top: 8, left: 10, bottom: 8, right: 10) @@ -783,6 +1113,61 @@ class MessagingCell: UITableViewCell { self.contentView.addSubview(view) } } + + // MARK: - Swipe-to-reveal timestamp + + /// Adds `timeLabel` to the content view exactly once, pinned just past the + /// right edge so it's off-screen until the chat is swiped left. It rides + /// inside `contentView`, so translating the content view (see + /// `setTimeRevealOffset`) slides the bubble out and this label in together — + /// the whole row moves as one unit, like iMessage. + private func installTimeLabelIfNeeded() { + guard !timeLabelInstalled else { return } + timeLabelInstalled = true + // Content view must not clip, or the off-edge label is invisible even + // once revealed. + contentView.clipsToBounds = false + contentView.addSubview(timeLabel) + // Trailing is pinned past the right edge by (reveal column − inset), so + // at a full swipe the label settles 12pt from the edge, right-aligned — + // wider "older message" stamps grow leftward instead of clipping. Must + // stay in sync with MessagingVC.timeRevealMax. + NSLayoutConstraint.activate([ + timeLabel.trailingAnchor.constraint(equalTo: contentView.trailingAnchor, constant: 72 - 12), + timeLabel.centerYAnchor.constraint(equalTo: contentView.centerYAnchor), + ]) + } + + /// Sets the swipe-reveal time text for this row from its message. + func setTimeLabel(for date: Date) { + installTimeLabelIfNeeded() + timeLabel.text = Calendar.current.isDateInToday(date) + ? Self.timeFormatter.string(from: date) + : Self.dayTimeFormatter.string(from: date) + } + + /// Current swipe-reveal slide offset, retained so `layoutSubviews` can + /// re-assert the transform. `UITableViewCell.layoutSubviews` sets + /// `contentView.frame = bounds`, which cancels a translation transform's + /// visible offset — and the nav-orb's display-link animation invalidates + /// the view tree every frame, so that reset fires continuously during a + /// drag. The net effect was the slide only appearing on release (when the + /// explicit spring animation took over). Re-applying after every layout + /// keeps the live drag visible. + private var timeRevealOffset: CGFloat = 0 + + /// Slides the whole row left by `offset` points to reveal the timestamp. + /// Driven by MessagingVC's pan so every visible cell moves in lock-step. + func setTimeRevealOffset(_ offset: CGFloat) { + timeRevealOffset = offset + applyTimeRevealTransform() + } + + private func applyTimeRevealTransform() { + contentView.transform = timeRevealOffset == 0 + ? .identity + : CGAffineTransform(translationX: -timeRevealOffset, y: 0) + } func attributedString(from text: String) -> NSAttributedString { // Delegated to the shared helper so the chat bubble and the @@ -1743,6 +2128,271 @@ class MessagingCell: UITableViewCell { pdfDelegate?.messagingCellDidTapPDFRetry(attachmentId: id) } + // MARK: - Story card + + private func applyStoryAttachment(_ attachment: StoryAttachment, + modelLabelText: String) { + currentStoryAttachmentId = attachment.id + + // Hide every other render path's views; the story card takes the row. + profileImageView.isHidden = true + textView.isHidden = true + animatingtextView.isHidden = true + actionButton.isHidden = true + shimmerLabel.isHidden = true + modelLabel.isHidden = false + attachmentImageView.isHidden = true + attachmentSpinner.stopAnimating() + attachmentSpinner.isHidden = true + attachmentErrorLabel.isHidden = true + downloadButton.isHidden = true + retryButton.isHidden = true + browseCardView.isHidden = true + + if storyCardView.superview == nil { + self.contentView.addSubview(storyCardView) + storyCardView.layer.insertSublayer(storyGradientLayer, at: 0) + storyCardView.addSubview(storyPlayGlyph) + storyCardView.addSubview(storyTitleLabel) + storyCardView.addSubview(storySubtitleLabel) + storyCardView.addSubview(storySpinner) + storyCardView.addSubview(storyRetryButton) + } + if storyTapRecognizer.view !== storyCardView { + storyCardView.addGestureRecognizer(storyTapRecognizer) + } + + storyCardView.isHidden = false + + // Portrait poster — 9:16-ish, sized small so it reads as a card. + let cardWidth: CGFloat = 160 + let cardHeight: CGFloat = 248 + + storyCardConstraints = [ + storyCardView.leadingAnchor.constraint(equalTo: contentView.leadingAnchor, constant: 20), + storyCardView.topAnchor.constraint(equalTo: contentView.topAnchor, constant: 12), + storyCardView.widthAnchor.constraint(equalToConstant: cardWidth), + storyCardView.heightAnchor.constraint(equalToConstant: cardHeight), + + storyPlayGlyph.topAnchor.constraint(equalTo: storyCardView.topAnchor, constant: 16), + storyPlayGlyph.leadingAnchor.constraint(equalTo: storyCardView.leadingAnchor, constant: 16), + + storySpinner.centerXAnchor.constraint(equalTo: storyCardView.centerXAnchor), + storySpinner.centerYAnchor.constraint(equalTo: storyCardView.centerYAnchor), + + storyTitleLabel.leadingAnchor.constraint(equalTo: storyCardView.leadingAnchor, constant: 16), + storyTitleLabel.trailingAnchor.constraint(equalTo: storyCardView.trailingAnchor, constant: -16), + + storySubtitleLabel.leadingAnchor.constraint(equalTo: storyTitleLabel.leadingAnchor), + storySubtitleLabel.trailingAnchor.constraint(equalTo: storyTitleLabel.trailingAnchor), + storySubtitleLabel.topAnchor.constraint(equalTo: storyTitleLabel.bottomAnchor, constant: 4), + storySubtitleLabel.bottomAnchor.constraint(equalTo: storyCardView.bottomAnchor, constant: -16), + + storyRetryButton.leadingAnchor.constraint(equalTo: storyTitleLabel.leadingAnchor), + storyRetryButton.topAnchor.constraint(equalTo: storySubtitleLabel.bottomAnchor, constant: 8), + + modelLabel.leadingAnchor.constraint(equalTo: storyCardView.leadingAnchor), + modelLabel.topAnchor.constraint(equalTo: storyCardView.bottomAnchor, constant: 6), + contentView.bottomAnchor.constraint(greaterThanOrEqualTo: modelLabel.bottomAnchor, constant: 12), + ] + NSLayoutConstraint.activate(storyCardConstraints) + + storyTitleLabel.text = attachment.title + + switch attachment.status { + case .generating: + storyGradientLayer.isHidden = false + storyPlayGlyph.isHidden = true + storySpinner.isHidden = false + storySpinner.startAnimating() + storySubtitleLabel.text = "Creating story…" + storyRetryButton.isHidden = true + storyTapRecognizer.isEnabled = false + case .ready: + storyGradientLayer.isHidden = false + storySpinner.stopAnimating() + storySpinner.isHidden = true + storyPlayGlyph.isHidden = false + storySubtitleLabel.text = "Tap to view" + storyRetryButton.isHidden = true + storyTapRecognizer.isEnabled = true + case .failed: + storyGradientLayer.isHidden = true + storySpinner.stopAnimating() + storySpinner.isHidden = true + storyPlayGlyph.isHidden = true + storyTitleLabel.textColor = .label + storySubtitleLabel.textColor = .secondaryLabel + storySubtitleLabel.text = attachment.failureReason ?? "Couldn't generate story" + storyRetryButton.isHidden = false + storyTapRecognizer.isEnabled = false + } + + baseModelText = modelLabelText + modelLabel.text = modelLabelText + modelLabel.textColor = .secondaryLabel + modelLabel.font = UIFont.preferredFont(forTextStyle: .caption2) + modelLabel.numberOfLines = 1 + ttsIndicator.stopAnimating() + ttsIndicator.isHidden = true + setNeedsLayout() + } + + @objc private func handleStoryTap() { + guard let id = currentStoryAttachmentId else { return } + storyDelegate?.messagingCellDidTapStory(attachmentId: id) + } + + @objc private func handleStoryRetryTap() { + guard let id = currentStoryAttachmentId else { return } + storyDelegate?.messagingCellDidTapStoryRetry(attachmentId: id) + } + + // MARK: - Browse preview card + + private func applyBrowseAttachment(_ attachment: BrowseAttachment, + modelLabelText: String) { + currentBrowseAttachmentId = attachment.id + + // Hide every other render path's views; the browse card takes the row. + profileImageView.isHidden = true + textView.isHidden = true + animatingtextView.isHidden = true + actionButton.isHidden = true + shimmerLabel.isHidden = true + modelLabel.isHidden = false + attachmentImageView.isHidden = true + attachmentSpinner.stopAnimating() + attachmentSpinner.isHidden = true + attachmentErrorLabel.isHidden = true + downloadButton.isHidden = true + retryButton.isHidden = true + storyCardView.isHidden = true + + if browseCardView.superview == nil { + self.contentView.addSubview(browseCardView) + browseCardView.addSubview(browseThumbnail) + browseCardView.addSubview(browseSpinner) + browseCardView.addSubview(browseGlyph) + browseCardView.addSubview(browseHostLabel) + browseCardView.addSubview(browsePill) + } + if browseTapRecognizer.view !== browseCardView { + browseCardView.addGestureRecognizer(browseTapRecognizer) + } + browseCardView.isHidden = false + + let cardWidth: CGFloat = 232 + let thumbHeight: CGFloat = 132 + let footerHeight: CGFloat = 40 + + browseCardConstraints = [ + browseCardView.leadingAnchor.constraint(equalTo: contentView.leadingAnchor, constant: 20), + browseCardView.topAnchor.constraint(equalTo: contentView.topAnchor, constant: 12), + browseCardView.widthAnchor.constraint(equalToConstant: cardWidth), + + browseThumbnail.topAnchor.constraint(equalTo: browseCardView.topAnchor), + browseThumbnail.leadingAnchor.constraint(equalTo: browseCardView.leadingAnchor), + browseThumbnail.trailingAnchor.constraint(equalTo: browseCardView.trailingAnchor), + browseThumbnail.heightAnchor.constraint(equalToConstant: thumbHeight), + + browseSpinner.centerXAnchor.constraint(equalTo: browseThumbnail.centerXAnchor), + browseSpinner.centerYAnchor.constraint(equalTo: browseThumbnail.centerYAnchor), + + browseGlyph.leadingAnchor.constraint(equalTo: browseCardView.leadingAnchor, constant: 12), + browseGlyph.topAnchor.constraint(equalTo: browseThumbnail.bottomAnchor, constant: 12), + + browseHostLabel.leadingAnchor.constraint(equalTo: browseGlyph.trailingAnchor, constant: 6), + browseHostLabel.centerYAnchor.constraint(equalTo: browseGlyph.centerYAnchor), + + browsePill.leadingAnchor.constraint(greaterThanOrEqualTo: browseHostLabel.trailingAnchor, constant: 8), + browsePill.trailingAnchor.constraint(equalTo: browseCardView.trailingAnchor, constant: -12), + browsePill.centerYAnchor.constraint(equalTo: browseGlyph.centerYAnchor), + + browseCardView.bottomAnchor.constraint(equalTo: browseThumbnail.bottomAnchor, constant: footerHeight), + + modelLabel.leadingAnchor.constraint(equalTo: browseCardView.leadingAnchor), + modelLabel.topAnchor.constraint(equalTo: browseCardView.bottomAnchor, constant: 6), + contentView.bottomAnchor.constraint(greaterThanOrEqualTo: modelLabel.bottomAnchor, constant: 12), + ] + NSLayoutConstraint.activate(browseCardConstraints) + + browseHostLabel.text = attachment.displayHost + + // Thumbnail — latest captured screenshot, if any yet. + if let path = attachment.latestThumbnailPath, + let img = UIImage(contentsOfFile: path) { + browseThumbnail.image = img + } + + // Status pill. + browsePill.text = attachment.pillText.uppercased() + browsePill.textInsets = UIEdgeInsets(top: 3, left: 7, bottom: 3, right: 7) + switch attachment.status { + case .navigating: + browsePill.backgroundColor = .systemBlue + browseSpinner.isHidden = browseThumbnail.image != nil + if browseThumbnail.image == nil { browseSpinner.startAnimating() } else { browseSpinner.stopAnimating() } + browseTapRecognizer.isEnabled = true + case .reading: + browsePill.backgroundColor = .systemIndigo + browseSpinner.stopAnimating() + browseSpinner.isHidden = true + browseTapRecognizer.isEnabled = true + case .done: + browsePill.backgroundColor = .systemGreen + browseSpinner.stopAnimating() + browseSpinner.isHidden = true + browseTapRecognizer.isEnabled = true + case .failed: + browsePill.text = "FAILED" + browsePill.backgroundColor = .systemRed + browseSpinner.stopAnimating() + browseSpinner.isHidden = true + browseHostLabel.text = attachment.failureReason ?? attachment.displayHost + browseTapRecognizer.isEnabled = true + } + + baseModelText = modelLabelText + modelLabel.text = modelLabelText + modelLabel.textColor = .secondaryLabel + modelLabel.font = UIFont.preferredFont(forTextStyle: .caption2) + modelLabel.numberOfLines = 1 + ttsIndicator.stopAnimating() + ttsIndicator.isHidden = true + setNeedsLayout() + } + + @objc private func handleBrowseTap() { + guard let id = currentBrowseAttachmentId else { return } + browseDelegate?.messagingCellDidTapBrowse(attachmentId: id) + } + + override func layoutSubviews() { + // While a swipe-reveal slide is active, clear the transform BEFORE + // super lays out. `UITableViewCell.layoutSubviews` sets + // `contentView.frame = bounds`; if the transform is non-identity at + // that moment, UIKit shifts the layer's center to compensate, leaving + // the transform property set but the visible offset cancelled (which + // is why the slide only appeared on release, when the explicit + // animation took over). Laying out with an identity transform keeps + // the frame clean, then we re-apply the slide on top of it. Only do + // this mid-slide so the release spring animation (offset == 0) runs + // untouched. + if timeRevealOffset != 0 { + contentView.transform = .identity + } + super.layoutSubviews() + if timeRevealOffset != 0 { + applyTimeRevealTransform() + } + // CALayer frames aren't driven by Auto Layout — keep the story card's + // gradient sized to the card whenever the card is on screen. + if !storyCardView.isHidden { + storyGradientLayer.frame = storyCardView.bounds + } + } + private func configureRoundButton(_ button: UIButton, systemImage: String, accessibility: String, @@ -1938,7 +2588,7 @@ class MessagingCell: UITableViewCell { return } currentAttachmentId = attachment.id - currentAttachmentFileURL = attachment.fileURL + currentAttachmentFileURL = attachment.resolvedFileURL profileImageView.isHidden = true animatingtextView.isHidden = true @@ -2038,7 +2688,7 @@ class MessagingCell: UITableViewCell { // page 1; images get loaded directly. Either way `currentAttachmentId` // is the gate against stale callbacks on a recycled cell. let id = attachment.id - let url = attachment.fileURL + let url = attachment.resolvedFileURL let kind = attachment.kind let pdfSize = CGSize(width: imageSide, height: pdfHeight) DispatchQueue.global(qos: .userInitiated).async { [weak self] in @@ -2076,7 +2726,7 @@ class MessagingCell: UITableViewCell { accompanyingText: String, role: String) { currentAttachmentId = attachment.id - currentAttachmentFileURL = attachment.fileURL + currentAttachmentFileURL = attachment.resolvedFileURL profileImageView.isHidden = true animatingtextView.isHidden = true @@ -2174,6 +2824,18 @@ class MessagingCell: UITableViewCell { presentQuickLook(for: url) } + /// Share the raw text content of the previewed file. + private func handleFilePreviewCardShare() { + guard let url = currentAttachmentFileURL, + let data = try? Data(contentsOf: url), + let text = String(data: data, encoding: .utf8), + let presenter = parentViewController else { return } + let vc = UIActivityViewController(activityItems: [text], applicationActivities: nil) + vc.popoverPresentationController?.sourceView = filePreviewCard + vc.popoverPresentationController?.sourceRect = filePreviewCard.bounds + presenter.present(vc, animated: true) + } + /// Render page 1 of a PDF as a UIImage sized to fit the bubble. Returns /// nil for malformed or empty PDFs — caller handles the placeholder. private static func renderPDFThumbnail(at url: URL, size: CGSize) -> UIImage? { @@ -2277,11 +2939,366 @@ class MessagingCell: UITableViewCell { ttsIndicator.isHidden = true } + // MARK: - Inline image-gallery rendering (web image search) + + /// Lay out the web image-search results as a horizontally-scrolling strip + /// of square thumbnails on the assistant side. Thumbnails load lazily from + /// their remote URLs (URLSession + shared URLCache); a recycled cell's + /// stale loads are dropped via `galleryLoadToken`. Tapping a thumbnail + /// opens the full-resolution image in a zoomable viewer. + private func applyImageGalleryAttachment(_ attachment: ImageGalleryAttachment, + modelLabelText: String) { + currentGalleryAttachmentId = attachment.id + + profileImageView.isHidden = true + textView.isHidden = true + animatingtextView.isHidden = true + actionButton.isHidden = true + shimmerLabel.isHidden = true + modelLabel.isHidden = false + + if galleryScrollView.superview == nil { + self.addViews(views: [galleryTitleLabel, galleryScrollView]) + galleryScrollView.addSubview(galleryStack) + NSLayoutConstraint.activate([ + galleryStack.leadingAnchor.constraint(equalTo: galleryScrollView.contentLayoutGuide.leadingAnchor), + galleryStack.trailingAnchor.constraint(equalTo: galleryScrollView.contentLayoutGuide.trailingAnchor), + galleryStack.topAnchor.constraint(equalTo: galleryScrollView.contentLayoutGuide.topAnchor), + galleryStack.bottomAnchor.constraint(equalTo: galleryScrollView.contentLayoutGuide.bottomAnchor), + galleryStack.heightAnchor.constraint(equalTo: galleryScrollView.frameLayoutGuide.heightAnchor), + ]) + } + + let query = attachment.query.trimmingCharacters(in: .whitespacesAndNewlines) + let hasTitle = !query.isEmpty + if hasTitle { + galleryTitleLabel.text = query + galleryTitleLabel.isHidden = false + } else { + galleryTitleLabel.isHidden = true + galleryTitleLabel.text = nil + } + galleryScrollView.isHidden = false + + // Rebuild tiles. A new render token invalidates any in-flight loads + // from a prior configuration of this (recycled) cell. + galleryLoadToken &+= 1 + let token = galleryLoadToken + galleryOriginalURLs.removeAll() + for tile in galleryStack.arrangedSubviews { + galleryStack.removeArrangedSubview(tile) + tile.removeFromSuperview() + } + + let side = MessagingCell.galleryThumbSide + for (index, item) in attachment.items.enumerated() { + let tile = makeGalleryTile(side: side, tag: index) + galleryStack.addArrangedSubview(tile) + if let original = URL(string: item.originalURL) { + galleryOriginalURLs[index] = original + } + if let thumbURL = URL(string: item.thumbnailURL) { + loadGalleryThumbnail(thumbURL, into: tile, token: token) + } + } + + galleryConstraints = [ + galleryScrollView.leadingAnchor.constraint(equalTo: contentView.leadingAnchor, constant: 20), + galleryScrollView.trailingAnchor.constraint(equalTo: contentView.trailingAnchor, constant: -20), + galleryScrollView.heightAnchor.constraint(equalToConstant: side), + + modelLabel.leadingAnchor.constraint(equalTo: galleryScrollView.leadingAnchor), + modelLabel.topAnchor.constraint(equalTo: galleryScrollView.bottomAnchor, constant: 6), + contentView.bottomAnchor.constraint(greaterThanOrEqualTo: modelLabel.bottomAnchor, constant: 12), + ] + if hasTitle { + galleryConstraints.append(contentsOf: [ + galleryTitleLabel.leadingAnchor.constraint(equalTo: contentView.leadingAnchor, constant: 20), + galleryTitleLabel.trailingAnchor.constraint(lessThanOrEqualTo: contentView.trailingAnchor, constant: -20), + galleryTitleLabel.topAnchor.constraint(equalTo: contentView.topAnchor, constant: 12), + galleryScrollView.topAnchor.constraint(equalTo: galleryTitleLabel.bottomAnchor, constant: 8), + ]) + } else { + galleryConstraints.append( + galleryScrollView.topAnchor.constraint(equalTo: contentView.topAnchor, constant: 12) + ) + } + NSLayoutConstraint.activate(galleryConstraints) + + baseModelText = modelLabelText + modelLabel.text = modelLabelText + modelLabel.textColor = .secondaryLabel + modelLabel.font = UIFont.preferredFont(forTextStyle: .caption2) + modelLabel.numberOfLines = 1 + ttsIndicator.stopAnimating() + ttsIndicator.isHidden = true + } + + /// Build one square thumbnail tile (an image view + spinner) tagged with + /// its index so the tap handler can resolve the full-resolution URL. + private func makeGalleryTile(side: CGFloat, tag: Int) -> UIImageView { + let iv = UIImageView() + iv.tag = tag + iv.translatesAutoresizingMaskIntoConstraints = false + iv.contentMode = .scaleAspectFill + iv.clipsToBounds = true + iv.layer.cornerRadius = 12 + iv.layer.borderWidth = 1 + iv.layer.borderColor = UIColor.systemFill.cgColor + iv.backgroundColor = .secondarySystemBackground + iv.isUserInteractionEnabled = true + NSLayoutConstraint.activate([ + iv.widthAnchor.constraint(equalToConstant: side), + iv.heightAnchor.constraint(equalToConstant: side), + ]) + let spinner = UIActivityIndicatorView(style: .medium) + spinner.translatesAutoresizingMaskIntoConstraints = false + spinner.color = .secondaryLabel + spinner.startAnimating() + iv.addSubview(spinner) + NSLayoutConstraint.activate([ + spinner.centerXAnchor.constraint(equalTo: iv.centerXAnchor), + spinner.centerYAnchor.constraint(equalTo: iv.centerYAnchor), + ]) + iv.addGestureRecognizer( + UITapGestureRecognizer(target: self, action: #selector(handleGalleryThumbTap(_:))) + ) + return iv + } + + /// Fetch a thumbnail off the main thread and paint it only if this cell is + /// still showing the same gallery render (token match). + private func loadGalleryThumbnail(_ url: URL, into tile: UIImageView, token: Int) { + URLSession.shared.dataTask(with: url) { [weak self, weak tile] data, _, _ in + guard let data = data, let image = UIImage(data: data) else { return } + DispatchQueue.main.async { + guard let self = self, let tile = tile, + self.galleryLoadToken == token else { return } + // Drop the spinner once the image lands. + for sub in tile.subviews where sub is UIActivityIndicatorView { + (sub as? UIActivityIndicatorView)?.stopAnimating() + sub.removeFromSuperview() + } + tile.image = image + } + }.resume() + } + + @objc private func handleGalleryThumbTap(_ recognizer: UITapGestureRecognizer) { + guard let tile = recognizer.view as? UIImageView, + galleryOriginalURLs[tile.tag] != nil, + let presenter = parentViewController else { return } + // Build the ordered list of full-resolution URLs so the viewer can + // page (swipe) between all results, starting at the tapped one. + let ordered = galleryOriginalURLs.keys.sorted() + let urls = ordered.compactMap { galleryOriginalURLs[$0] } + let startIndex = ordered.firstIndex(of: tile.tag) ?? 0 + let viewer = RemoteImageGalleryViewerController(imageURLs: urls, + startIndex: startIndex, + startPlaceholder: tile.image) + viewer.modalPresentationStyle = .fullScreen + presenter.present(viewer, animated: true) + } + required init?(coder: NSCoder) { fatalError() } } +/// Marker subclass for the image-search gallery's horizontal scroller. Lets +/// `MessagingVC`'s swipe-to-reveal-timestamp pan ignore touches that land +/// inside a gallery, so horizontally swiping the thumbnails scrolls them +/// instead of dragging the whole transcript to reveal timestamps. +final class ImageGalleryScrollView: UIScrollView {} + +// MARK: - Remote full-screen image viewer (swipeable gallery) + +/// Full-screen viewer that pages (swipes) between the image-search results, +/// each page pinch-to-zoomable. Opens on the tapped image and shows a +/// "n of N" position indicator. Tap (when not zoomed) or Done dismisses. +private final class RemoteImageGalleryViewerController: UIPageViewController, + UIPageViewControllerDataSource, + UIPageViewControllerDelegate { + private let imageURLs: [URL] + private var currentIndex: Int + private let startPlaceholder: UIImage? + private let counterLabel = UILabel() + + init(imageURLs: [URL], startIndex: Int, startPlaceholder: UIImage?) { + self.imageURLs = imageURLs + self.currentIndex = min(max(0, startIndex), max(0, imageURLs.count - 1)) + self.startPlaceholder = startPlaceholder + super.init(transitionStyle: .scroll, + navigationOrientation: .horizontal, + options: [.interPageSpacing: 16]) + } + + required init?(coder: NSCoder) { fatalError() } + + override func viewDidLoad() { + super.viewDidLoad() + view.backgroundColor = .black + dataSource = self + delegate = self + + let first = makePage(index: currentIndex, placeholder: startPlaceholder) + setViewControllers([first], direction: .forward, animated: false) + + let done = UIButton(type: .system) + done.translatesAutoresizingMaskIntoConstraints = false + done.setTitle("Done", for: .normal) + done.setTitleColor(.white, for: .normal) + done.titleLabel?.font = UIFont.systemFont(ofSize: 17, weight: .semibold) + done.addTarget(self, action: #selector(dismissSelf), for: .touchUpInside) + view.addSubview(done) + + counterLabel.translatesAutoresizingMaskIntoConstraints = false + counterLabel.textColor = .white + counterLabel.font = UIFont.systemFont(ofSize: 15, weight: .medium) + counterLabel.textAlignment = .center + view.addSubview(counterLabel) + + NSLayoutConstraint.activate([ + done.topAnchor.constraint(equalTo: view.safeAreaLayoutGuide.topAnchor, constant: 8), + done.trailingAnchor.constraint(equalTo: view.trailingAnchor, constant: -16), + counterLabel.centerYAnchor.constraint(equalTo: done.centerYAnchor), + counterLabel.centerXAnchor.constraint(equalTo: view.centerXAnchor), + ]) + updateCounter() + } + + private func makePage(index: Int, placeholder: UIImage?) -> RemoteImagePageController { + let page = RemoteImagePageController(imageURL: imageURLs[index], + pageIndex: index, + placeholder: placeholder) + page.onTapWhileUnzoomed = { [weak self] in self?.dismissSelf() } + return page + } + + private func updateCounter() { + counterLabel.isHidden = imageURLs.count <= 1 + counterLabel.text = "\(currentIndex + 1) of \(imageURLs.count)" + } + + @objc private func dismissSelf() { dismiss(animated: true) } + + // MARK: UIPageViewControllerDataSource + + func pageViewController(_ pageViewController: UIPageViewController, + viewControllerBefore viewController: UIViewController) -> UIViewController? { + guard let page = viewController as? RemoteImagePageController, + page.pageIndex > 0 else { return nil } + return makePage(index: page.pageIndex - 1, placeholder: nil) + } + + func pageViewController(_ pageViewController: UIPageViewController, + viewControllerAfter viewController: UIViewController) -> UIViewController? { + guard let page = viewController as? RemoteImagePageController, + page.pageIndex < imageURLs.count - 1 else { return nil } + return makePage(index: page.pageIndex + 1, placeholder: nil) + } + + func pageViewController(_ pageViewController: UIPageViewController, + didFinishAnimating finished: Bool, + previousViewControllers: [UIViewController], + transitionCompleted completed: Bool) { + guard completed, + let page = viewControllers?.first as? RemoteImagePageController else { return } + currentIndex = page.pageIndex + updateCounter() + } +} + +/// One pinch-to-zoom page in `RemoteImageGalleryViewerController`. Shows a +/// placeholder thumbnail immediately (when provided), then swaps in the +/// full-resolution image once it downloads. +private final class RemoteImagePageController: UIViewController, UIScrollViewDelegate { + let pageIndex: Int + /// Invoked on a single tap when the image is not zoomed in — the container + /// uses it to dismiss. + var onTapWhileUnzoomed: (() -> Void)? + + private let imageURL: URL + private let placeholder: UIImage? + private let scrollView = UIScrollView() + private let imageView = UIImageView() + private let spinner = UIActivityIndicatorView(style: .large) + + init(imageURL: URL, pageIndex: Int, placeholder: UIImage?) { + self.imageURL = imageURL + self.pageIndex = pageIndex + self.placeholder = placeholder + super.init(nibName: nil, bundle: nil) + } + + required init?(coder: NSCoder) { fatalError() } + + override func viewDidLoad() { + super.viewDidLoad() + view.backgroundColor = .clear + + scrollView.translatesAutoresizingMaskIntoConstraints = false + scrollView.delegate = self + scrollView.minimumZoomScale = 1 + scrollView.maximumZoomScale = 4 + scrollView.showsHorizontalScrollIndicator = false + scrollView.showsVerticalScrollIndicator = false + view.addSubview(scrollView) + + imageView.translatesAutoresizingMaskIntoConstraints = false + imageView.contentMode = .scaleAspectFit + imageView.image = placeholder + scrollView.addSubview(imageView) + + spinner.translatesAutoresizingMaskIntoConstraints = false + spinner.color = .white + spinner.startAnimating() + view.addSubview(spinner) + + NSLayoutConstraint.activate([ + scrollView.topAnchor.constraint(equalTo: view.topAnchor), + scrollView.bottomAnchor.constraint(equalTo: view.bottomAnchor), + scrollView.leadingAnchor.constraint(equalTo: view.leadingAnchor), + scrollView.trailingAnchor.constraint(equalTo: view.trailingAnchor), + + imageView.topAnchor.constraint(equalTo: scrollView.contentLayoutGuide.topAnchor), + imageView.bottomAnchor.constraint(equalTo: scrollView.contentLayoutGuide.bottomAnchor), + imageView.leadingAnchor.constraint(equalTo: scrollView.contentLayoutGuide.leadingAnchor), + imageView.trailingAnchor.constraint(equalTo: scrollView.contentLayoutGuide.trailingAnchor), + imageView.widthAnchor.constraint(equalTo: scrollView.frameLayoutGuide.widthAnchor), + imageView.heightAnchor.constraint(equalTo: scrollView.frameLayoutGuide.heightAnchor), + + spinner.centerXAnchor.constraint(equalTo: view.centerXAnchor), + spinner.centerYAnchor.constraint(equalTo: view.centerYAnchor), + ]) + + let tap = UITapGestureRecognizer(target: self, action: #selector(handleTap)) + view.addGestureRecognizer(tap) + + loadFullImage() + } + + private func loadFullImage() { + URLSession.shared.dataTask(with: imageURL) { [weak self] data, _, _ in + DispatchQueue.main.async { + guard let self = self else { return } + self.spinner.stopAnimating() + if let data = data, let image = UIImage(data: data) { + self.imageView.image = image + } + } + }.resume() + } + + func viewForZooming(in scrollView: UIScrollView) -> UIView? { imageView } + + @objc private func handleTap() { + // Don't dismiss while zoomed in — let the tap interact with the image. + guard scrollView.zoomScale <= scrollView.minimumZoomScale else { return } + onTapWhileUnzoomed?() + } +} + // MARK: - Map annotation + delegate /// MKPointAnnotation subclass that carries the underlying MapPlace so the @@ -2451,6 +3468,10 @@ final class FilePreviewCardView: UIView { /// QuickLook). var onTap: (() -> Void)? + /// Invoked when the user taps the Share button on the card. The host + /// reads the full file content and presents the share sheet. + var onShare: (() -> Void)? + /// Apply visual state for `attachment`. Call again with a new attachment /// to repurpose a recycled card; call `reset()` before the cell is /// re-rendered with a non-card attachment kind. @@ -2460,7 +3481,7 @@ final class FilePreviewCardView: UIView { // Bytes is best-effort; failure just hides the size suffix rather // than the whole subtitle (we still want the MIME-ish label). let sizeText: String? - if let bytes = (try? attachment.fileURL.resourceValues(forKeys: [.fileSizeKey]).fileSize) { + if let bytes = (try? attachment.resolvedFileURL.resourceValues(forKeys: [.fileSizeKey]).fileSize) { sizeText = ByteCountFormatter.string(fromByteCount: Int64(bytes), countStyle: .file) } else { sizeText = nil @@ -2474,6 +3495,8 @@ final class FilePreviewCardView: UIView { badgeLabel.isHidden = false subtitleLabel.text = Self.subtitle("Markdown", size: sizeText) applyMarkdownSnippet(attachment.extractedText ?? "") + shareRow.isHidden = false + shareRowCollapsed.isActive = false case .text: iconView.image = UIImage(systemName: "chevron.left.forwardslash.chevron.right") iconView.tintColor = .secondaryLabel @@ -2486,6 +3509,8 @@ final class FilePreviewCardView: UIView { subtitleLabel.text = Self.subtitle("Text", size: sizeText) } applyCodeSnippet(attachment.extractedText ?? "") + shareRow.isHidden = false + shareRowCollapsed.isActive = false case .generic: iconView.image = UIImage(systemName: "doc") iconView.tintColor = .secondaryLabel @@ -2504,6 +3529,8 @@ final class FilePreviewCardView: UIView { snippetLabel.attributedText = nil snippetLabel.text = nil snippetLabel.isHidden = true + shareRow.isHidden = true + shareRowCollapsed.isActive = true case .image, .pdf: // Shouldn't happen — the cell never routes image/PDF through // this view — but render a sensible placeholder if it does. @@ -2512,6 +3539,8 @@ final class FilePreviewCardView: UIView { badgeLabel.isHidden = true subtitleLabel.text = Self.subtitle(attachment.mimeType, size: sizeText) snippetLabel.isHidden = true + shareRow.isHidden = true + shareRowCollapsed.isActive = true } } @@ -2527,6 +3556,8 @@ final class FilePreviewCardView: UIView { snippetLabel.text = nil snippetLabel.isHidden = false iconView.image = nil + shareRow.isHidden = true + shareRowCollapsed.isActive = true } // MARK: - Subviews @@ -2537,6 +3568,20 @@ final class FilePreviewCardView: UIView { private let subtitleLabel = UILabel() private let snippetLabel = UILabel() private let headerStack = UIStackView() + private let shareRow = UIView() + private let shareRowSeparator = UIView() + private lazy var shareRowCollapsed: NSLayoutConstraint = shareRow.heightAnchor.constraint(equalToConstant: 0) + private let shareButton: UIButton = { + var config = UIButton.Configuration.plain() + config.image = UIImage(systemName: "square.and.arrow.up") + config.preferredSymbolConfigurationForImage = UIImage.SymbolConfiguration( + pointSize: 13, weight: .medium) + config.baseForegroundColor = .secondaryLabel + config.contentInsets = NSDirectionalEdgeInsets(top: 6, leading: 10, bottom: 6, trailing: 10) + let b = UIButton(configuration: config) + b.accessibilityLabel = "Share" + return b + }() // MARK: - Setup @@ -2602,9 +3647,34 @@ final class FilePreviewCardView: UIView { headerStack.addArrangedSubview(titleLabel) headerStack.addArrangedSubview(badgeLabel) + // Share row — separator + button, hidden until configured for a + // text-based kind (markdown/source/text). + shareRow.translatesAutoresizingMaskIntoConstraints = false + shareRow.isHidden = true + + shareRowSeparator.translatesAutoresizingMaskIntoConstraints = false + shareRowSeparator.backgroundColor = .separator + shareRow.addSubview(shareRowSeparator) + + shareButton.translatesAutoresizingMaskIntoConstraints = false + shareButton.addTarget(self, action: #selector(handleShareTap), for: .touchUpInside) + shareRow.addSubview(shareButton) + + NSLayoutConstraint.activate([ + shareRowSeparator.topAnchor.constraint(equalTo: shareRow.topAnchor), + shareRowSeparator.leadingAnchor.constraint(equalTo: shareRow.leadingAnchor), + shareRowSeparator.trailingAnchor.constraint(equalTo: shareRow.trailingAnchor), + shareRowSeparator.heightAnchor.constraint(equalToConstant: 1.0 / UIScreen.main.scale), + + shareButton.topAnchor.constraint(equalTo: shareRowSeparator.bottomAnchor, constant: 2), + shareButton.trailingAnchor.constraint(equalTo: shareRow.trailingAnchor, constant: -4), + shareButton.bottomAnchor.constraint(equalTo: shareRow.bottomAnchor, constant: -2), + ]) + addSubview(headerStack) addSubview(subtitleLabel) addSubview(snippetLabel) + addSubview(shareRow) NSLayoutConstraint.activate([ iconView.widthAnchor.constraint(equalToConstant: 16), @@ -2621,25 +3691,34 @@ final class FilePreviewCardView: UIView { snippetLabel.topAnchor.constraint(equalTo: subtitleLabel.bottomAnchor, constant: 8), snippetLabel.leadingAnchor.constraint(equalTo: leadingAnchor, constant: 12), snippetLabel.trailingAnchor.constraint(equalTo: trailingAnchor, constant: -12), - snippetLabel.bottomAnchor.constraint(lessThanOrEqualTo: bottomAnchor, constant: -12), + + shareRow.topAnchor.constraint(equalTo: snippetLabel.bottomAnchor, constant: 8), + shareRow.leadingAnchor.constraint(equalTo: leadingAnchor), + shareRow.trailingAnchor.constraint(equalTo: trailingAnchor), + shareRow.bottomAnchor.constraint(lessThanOrEqualTo: bottomAnchor), ]) - // Drive the bottom edge off whichever element is the lowest visible - // one. snippetLabel-bottom is `lessThanOrEqualTo` above so it - // doesn't *force* height; this constraint pulls the bottom up when - // the snippet collapses, keeping the generic-card chip-sized. - let bottomEqual = bottomAnchor.constraint(equalTo: snippetLabel.bottomAnchor, constant: 12) - bottomEqual.priority = .defaultHigh - bottomEqual.isActive = true + // Drive the bottom edge. When the share row is visible it's the + // lowest element; otherwise snippet or subtitle drives height. + let shareRowBottom = bottomAnchor.constraint(equalTo: shareRow.bottomAnchor) + shareRowBottom.priority = UILayoutPriority(751) + shareRowBottom.isActive = true + let snippetBottom = bottomAnchor.constraint(equalTo: snippetLabel.bottomAnchor, constant: 12) + snippetBottom.priority = .defaultHigh + snippetBottom.isActive = true let subtitleBottomEqual = bottomAnchor.constraint(equalTo: subtitleLabel.bottomAnchor, constant: 12) subtitleBottomEqual.priority = .defaultLow subtitleBottomEqual.isActive = true + shareRow.clipsToBounds = true + shareRowCollapsed.isActive = true + let tap = UITapGestureRecognizer(target: self, action: #selector(handleTap)) addGestureRecognizer(tap) } @objc private func handleTap() { onTap?() } + @objc private func handleShareTap() { onShare?() } override func traitCollectionDidChange(_ previous: UITraitCollection?) { super.traitCollectionDidChange(previous) diff --git a/LoopIOS/MessagingVC.swift b/LoopIOS/MessagingVC.swift index 64a2315..c320ee7 100644 --- a/LoopIOS/MessagingVC.swift +++ b/LoopIOS/MessagingVC.swift @@ -9,6 +9,8 @@ import UIKit import AVFoundation import FoundationModels import QuickLook +import os +import UserNotifications enum AIState: Equatable { @@ -72,12 +74,22 @@ class MessagingVC: UIViewController { var ai_state: AIState = .None let tableView = UITableView() + /// Drives the iMessage-style swipe-left timestamp reveal on the transcript. + /// Configured in the tableView setup; handled by `handleTimeRevealPan`. + private var timeRevealPan: UIPanGestureRecognizer! + /// Maximum left-slide before rubber-banding kicks in — the column width the + /// timestamps settle into while the swipe is held. + private let timeRevealMax: CGFloat = 72 let messageBox = MessageBox() /// Slim pill at the top of the screen showing "N sub-agents running". Tap /// presents `SubAgentInspectorVC`. Collapses to zero height when no /// sub-agents are alive so it doesn't eat layout space. let subAgentStatusBar = SubAgentStatusBarView() + /// Horizontally scrollable top banner that hosts the sub-agent pill and + /// the music mini-player. Scrolls when multiple items coexist. + let topBannerScroll = TopBannerScrollView() + /// Persistent reminder shown after the user skipped the Action Button /// step during onboarding. Same collapse-to-zero behavior as the /// sub-agent pill, stacked just below it. @@ -107,6 +119,19 @@ class MessagingVC: UIViewController { /// Coalesces bursts of deltas into one UI update per run-loop turn. private var streamRenderScheduled = false + /// Log channel for the background-handoff decision path (filter Console by + /// category "handoff" to see why a handoff did/didn't fire on device). + static let handoffLog = Logger(subsystem: "com.bhat.intel", category: "handoff") + + /// Token for the in-flight local inference turn, if any. Set when a local + /// `Cloud.connection.chat` starts; read by the background handoff so it can + /// mark that exact turn abandoned when it's handed off to a runner. + private var currentLocalTurnToken: String? + /// Tokens of local turns handed off to a runner. Their local completion (if + /// it still fires after backgrounding) must be discarded so the conversation + /// doesn't get both the abandoned local reply and the runner's reply. + private var abandonedLocalTurns: Set = [] + /// Light tap fired when the user commits a message — a soft button-press /// confirmation. Kept lazy so we don't spin the haptic engine for users /// who never send (cold-launched into a transcript they only read). @@ -129,6 +154,8 @@ When the user asks how you work, what you can do, or how you're built, read `ABO \(SchedulerSkill.systemPromptFragment) +\(VMCronSkill.systemPromptFragment) + \(ExaSkill.systemPromptFragment) \(MuniRealtimeSkill.systemPromptFragment) @@ -171,6 +198,7 @@ When the user asks how you work, what you can do, or how you're built, read `ABO // every assignment so create/switch/reload paths all propagate // without each having to call the pill directly. subAgentStatusBar.conversationId = currentConversationEntity?.id + topBannerScroll.conversationId = currentConversationEntity?.id } } @@ -248,8 +276,12 @@ When the user asks how you work, what you can do, or how you're built, read `ABO /// the same iCloud-KVS key. The setter rebuilds the speaker menu so /// the voice submenu updates to the new provider's voice list. private var ttsProvider: TTSProvider { - get { TTSProviderStore.current } + // Managed builds lock TTS to ElevenLabs Flash v2.5 — the speaker menu + // drops the model picker and every read/write here is pinned, so no + // code path can switch providers. + get { AppFlags.isManaged ? .elevenLabsFlashV25 : TTSProviderStore.current } set { + guard !AppFlags.isManaged else { return } TTSProviderStore.current = newValue muteButton?.menu = buildSpeakerMenu() } @@ -354,6 +386,7 @@ When the user asks how you work, what you can do, or how you're built, read `ABO // with a fileAttachment; image / map skills do the same. if $0.imageAttachment != nil { return true } if $0.mapAttachment != nil { return true } + if $0.imageGalleryAttachment != nil { return true } if $0.fileAttachment != nil { return true } // Assistant turns carrying tool calls are now surfaced as a // collapsible "Used N tools" disclosure, so they're kept (they @@ -376,6 +409,7 @@ When the user asks how you work, what you can do, or how you're built, read `ABO && !m.functions.isEmpty && m.content.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty && m.imageAttachment == nil && m.mapAttachment == nil && m.fileAttachment == nil + && m.imageGalleryAttachment == nil } var out: [MessageStruct] = [] for message in messages { @@ -444,6 +478,10 @@ When the user asks how you work, what you can do, or how you're built, read `ABO // Same plumbing for PDFGenerationService — placeholder card on // submit, swap to ready/failed when the WKWebView render finishes. PDFGenerationService.shared.host = self + // And for StoryGenerationService — placeholder story card on submit, + // swap to ready/failed when the HTML story render finishes. + StoryGenerationService.shared.host = self + BrowseGenerationService.shared.host = self // CalendarSkill needs a UI host so it can present // EKEventEditViewController for the user to review proposed events // before they're saved. @@ -556,6 +594,15 @@ When the user asks how you work, what you can do, or how you're built, read `ABO object: nil ) + // iOS 27 App Intents inject user messages via notification so the + // intent doesn't depend on finding MessagingVC in the responder chain. + NotificationCenter.default.addObserver( + self, + selector: #selector(handleIntentMessage(_:)), + name: .loopIntentMessageReceived, + object: nil + ) + // Settings ▸ Model writes to TTSProviderStore from outside our setter. // Rebuild the speaker menu so the checkmark + voice submenu reflect // the new pick without waiting for the user to reopen this VC. @@ -608,6 +655,27 @@ When the user asks how you work, what you can do, or how you're built, read `ABO object: nil ) + // A handed-off local turn finished on a runner and its reply was written + // into a conversation — reload if it's the one on screen so the message + // appears without the user having to re-open the chat. + NotificationCenter.default.addObserver( + self, + selector: #selector(handleRunnerTurnApplied(_:)), + name: .runnerTurnApplied, + object: nil + ) + + // When iCloud sync (or pass-2 hydration) updates the store, re-read + // the current conversation's messages so the UI self-heals after a + // cold restart or cross-device push. Guarded inside the handler to + // avoid clobbering an in-flight agent turn. + NotificationCenter.default.addObserver( + self, + selector: #selector(handleConversationStoreDidChange), + name: .conversationStoreDidChange, + object: nil + ) + // Cover writers that go through iCloudKVSDefaults directly (e.g. the // OnboardingCoordinator's voice-step writes to `audioMuted` and // `ttsProvider`) — without this, picking a voice during onboarding @@ -620,6 +688,14 @@ When the user asks how you work, what you can do, or how you're built, read `ABO object: nil ) + // Card feed: show a pill alert whenever a new card is generated. + NotificationCenter.default.addObserver( + self, + selector: #selector(handleCardAdded(_:)), + name: CardStore.cardAddedNotification, + object: nil + ) + // Pick up any attachments the SceneDelegate stashed during cold-start // before MessagingVC had loaded. Each call to `stageIncomingAttachment` // overwrites the chip, so the last one wins as the visible staging — @@ -637,6 +713,12 @@ When the user asks how you work, what you can do, or how you're built, read `ABO // so the share lands on the chip regardless of which hook fired first. drainAppGroupInbox() + // Honor a conversation-open request stashed by a notification tap that + // landed before this VC existed. If the store is already hydrated this + // opens it now; otherwise the id waits and the store-ready path below + // drains it. + consumePendingConversationOpen() + // Do any additional setup after loading the view. } @@ -736,6 +818,9 @@ When the user asks how you work, what you can do, or how you're built, read `ABO guard let self = self else { return } if let token = token { NotificationCenter.default.removeObserver(token) } self.setConversationChromeLoading(false) + // A notification tapped during cold start wins over the default + // restore: open the tapped conversation instead of the last one. + if self.consumePendingConversationOpen() { return } if self.currentConversationEntity == nil { self.loadLastConversation() } @@ -754,6 +839,10 @@ When the user asks how you work, what you can do, or how you're built, read `ABO if let conversation = conversationManager.loadLastConversation() { print("🚀 Found last conversation: \(conversation.title)") currentConversationEntity = conversation + // Tell the store to hydrate this conversation first so the user + // doesn't stare at a blank screen while 50 other conversations + // get parsed ahead of the one they're looking at. + ConversationFileStore.shared.prioritizeHydration(id: conversation.id) loadMessagesFromConversation(conversation) } else { print("🚀 No last conversation found, starting with default message") @@ -768,7 +857,27 @@ When the user asks how you work, what you can do, or how you're built, read `ABO let messageEntities = conversationManager.getMessages(for: conversation) print("🔄 Retrieved \(messageEntities.count) messages from storage") - + + // Defensive: if the store returned zero messages but the conversation + // has a recent updatedAt (implying content exists on disk that hasn't + // been hydrated yet), skip the clear-and-reload to avoid flashing a + // blank screen. The store will post .conversationStoreDidChange once + // hydration finishes, and handleConversationStoreDidChange will retry. + if messageEntities.isEmpty && !conversation.messages.isEmpty { + print("⚠️ Store returned 0 messages but conversation metadata suggests content exists — deferring render until hydration completes") + return + } + if messageEntities.isEmpty && conversation.updatedAt.timeIntervalSinceNow > -86400 * 365 { + // Even if conversation.messages is empty (meta-only stub), a + // conversation updated within the last year likely has content. + // If the store isn't hydrated yet, request hydration and bail. + if !ConversationFileStore.shared.isHydrated(id: conversation.id) { + print("⚠️ Conversation not yet hydrated — requesting async hydration") + ConversationFileStore.shared.requestHydrationIfNeeded(id: conversation.id) + return + } + } + // Clear existing messages except system message messages = [messages[0]] // Keep system message @@ -785,7 +894,9 @@ When the user asks how you work, what you can do, or how you're built, read `ABO DispatchQueue.main.async { self.tableView.reloadData() self.scrollToLastMessage() + self.messagesDidReload() } + messagesDidReload() // For a VM-backed conversation, the assistant turn is written remotely // by the daemon, so the local cache can lag what's on the VM (e.g. a @@ -817,10 +928,57 @@ When the user asks how you work, what you can do, or how you're built, read `ABO // Reload table view DispatchQueue.main.async { self.tableView.reloadData() + self.messagesDidReload() // self.messageBox.textView.becomeFirstResponder() } } + + /// Hook fired after the on-screen message set is rebuilt from scratch — + /// cold-start restore, a conversation switch, or a reset to the empty/new + /// chat. Subclasses override to keep avatar/chrome state in sync with what + /// is actually rendered (the explicit refresh call sites miss async paths + /// like the store-ready restore that populates `messages` long after + /// viewDidLoad). Default is a no-op. Always invoked on the main thread. + func messagesDidReload() {} + /// Open a conversation in response to a notification tap. Unlike a plain + /// `loadConversation` (which only swaps the data), this also surfaces the + /// chat: a tap should land the user *inside* the conversation even if a + /// modal or the large agent orb was covering it. `bringChatToFront` is the + /// overridable hook for that (MainVC dismisses the orb / pops the stack). + func openConversationFromNotification(_ conversation: SimpleConversation) { + bringChatToFront() + loadConversation(conversation) + } + + /// Make this chat surface visible before loading a notification-tapped + /// conversation. Base behavior: drop any presented modal and pop to this + /// VC. Subclasses (MainVC) extend this to also tear down the large agent + /// view that would otherwise cover the chat. + func bringChatToFront() { + presentedViewController?.dismiss(animated: false) + navigationController?.popToViewController(self, animated: false) + } + + /// Drain a conversation-open request stashed by a cold-start notification + /// tap (`AppDelegate.openPrefetchedConversation` couldn't resolve the chat + /// at tap time). Called from `viewDidLoad` and again on store-ready, since + /// the conversation only becomes resolvable once the store hydrates. + /// Holds the id (doesn't `take()`) until the store is ready, so an early + /// `viewDidLoad` call doesn't discard a request the store can't yet + /// resolve. Returns true when it actually opened a conversation, so the + /// store-ready path knows to skip its default "load last conversation". + @discardableResult + func consumePendingConversationOpen() -> Bool { + guard ConversationFileStore.shared.isReady, + let id = PendingConversationOpen.shared.take() else { return false } + // Id is now consumed even if it no longer resolves — a deleted/stale + // conversation shouldn't keep hijacking later launches. + guard let conversation = conversationManager.getConversation(by: id) else { return false } + openConversationFromNotification(conversation) + return true + } + func loadConversation(_ conversation: SimpleConversation) { currentConversationEntity = conversation conversationManager.currentConversation = conversation @@ -921,6 +1079,18 @@ When the user asks how you work, what you can do, or how you're built, read `ABO } } + /// Handles a user message injected by an iOS 27 App Intent (AskLoop, + /// CaptureToLoop, SearchLoop). The notification carries a `"message"` + /// string in `userInfo` which gets piped through the same path as a + /// typed message. + @objc private func handleIntentMessage(_ notification: Notification) { + guard let text = notification.userInfo?["message"] as? String, + !text.isEmpty else { return } + DispatchQueue.main.asyncAfter(deadline: .now() + 0.3) { [weak self] in + self?.didSendMessageText(text) + } + } + /// Posted by TTSProviderStore when Settings ▸ Model picks a new provider. /// We rebuild the speaker menu directly rather than rerouting through the /// `ttsProvider` setter (which would also re-post the same notification @@ -1066,6 +1236,42 @@ When the user asks how you work, what you can do, or how you're built, read `ABO } + /// Called after `loadMessagesFromConversation` finishes populating + /// `self.messages` and reloading the table. Subclasses (MainVC) override + /// to refresh orb visibility so the hero/nav-bar handoff stays correct + /// on every message-load path — not just `loadConversation` and + /// `newMessageSent`. + // MARK: - Store change observation + + /// Re-read the current conversation from the store when iCloud sync or + /// pass-2 hydration updates it. Skips the reload when an agent turn is + /// in flight (streaming partial or thinking) to avoid clobbering live + /// state that hasn't been flushed to disk yet. + @objc private func handleConversationStoreDidChange() { + guard let current = currentConversationEntity else { return } + + // Don't clobber an in-flight agent turn whose data lives only in + // memory (streamingPartial, ai_state). The store will eventually + // have it once the turn completes; we'll pick it up on the next + // notification. + if ai_state != .None || !streamingPartial.isEmpty { return } + + let freshMessages = conversationManager.getMessages(for: current) + let currentCount = messages.count - 1 // minus system message + + // Only reload if the store has *more* content than what's on screen. + // This avoids pointless reloads for unrelated conversations and + // prevents replacing a populated screen with fewer messages during + // a transient cache state. + guard freshMessages.count > currentCount else { return } + + // Re-read the conversation entity so our snapshot is current. + if let fresh = conversationManager.getConversation(by: current.id) { + currentConversationEntity = fresh + loadMessagesFromConversation(fresh, refreshIfRemote: false) + } + } + } @@ -1142,11 +1348,14 @@ extension MessagingVC: MessageBoxDelegate { let reqConvId = conversation.id let context = self.contextMessages(for: reqConvId) self.beginStreamingTurn() + let localTurnToken = UUID().uuidString + self.currentLocalTurnToken = localTurnToken Cloud.connection.chat(messages: context, onPartial: self.streamingPartialHandler(for: reqConvId)) { responseMessage, error in // Fires off-main (URLSession delegate queue); ai_state / self.messages // are read by the table on main, so all mutation runs on main to avoid // racing the streaming render (see startReply for the full rationale). DispatchQueue.main.async { + if self.discardIfHandedOff(localTurnToken, reqConvId) { return } if self.currentConversationEntity?.id == reqConvId { self.ai_state = .None } @@ -1172,6 +1381,9 @@ extension MessagingVC: MessageBoxDelegate { let viewing = self.currentConversationEntity?.id == reqConvId DispatchQueue.main.async { + ActiveRequestTracker.shared.markIdle(reqConvId) + self.streamingPartial = "" + VoiceLoopCoordinator.shared.setState(.idle) if viewing { self.messages.append(responseMessage) self.messageIdToAnimate = responseMessage.id @@ -1181,6 +1393,25 @@ extension MessagingVC: MessageBoxDelegate { } } + } catch { + print("Apple on-device fallback error (tool loop): \(error)") + DispatchQueue.main.async { + ActiveRequestTracker.shared.markIdle(reqConvId) + self.streamingPartial = "" + VoiceLoopCoordinator.shared.setState(.idle) + let errorMessage = MessageStruct(role: "assistant", content: "Sorry – Apple's on-device model couldn't respond. You can try again or switch models in Settings ▸ Model.") + if let target = self.conversationManager.getConversation(by: reqConvId) { + self.conversationManager.addMessage(errorMessage, to: target) + } + let viewing = self.currentConversationEntity?.id == reqConvId + if viewing { + self.messages.append(errorMessage) + self.messageIdToAnimate = errorMessage.id + self.tableView.reloadData() + self.scrollToLastMessage() + } + EarconPlayer.shared.play(.error) + } } } return @@ -1210,7 +1441,7 @@ extension MessagingVC: MessageBoxDelegate { } } - func didSendMessageText(_ message: String) { + func didSendMessageText(_ message: String, sttEngine: String?) { // Cancel any in-flight TTS immediately — the user just sent a new // message, so continuing to speak the previous response is stale. stopSpeaking() @@ -1237,6 +1468,9 @@ extension MessagingVC: MessageBoxDelegate { let stagedAttachment = self.messageBox.pendingAttachment var messageStruct = MessageStruct(role: "user", content: message) messageStruct.fileAttachment = stagedAttachment + // Dictated messages carry the STT engine ("Deepgram STT"/"Apple STT") so the cell can + // show a transcription byline under the user bubble; nil when typed. + messageStruct.sttEngine = sttEngine // Clear the staged attachment immediately so the chip + paperclip // bounce back the moment the send tap registers, even before the @@ -1300,13 +1534,31 @@ extension MessagingVC: MessageBoxDelegate { conversationId: requestConversationId ) + // Reset the anti-loop guard so prior tool-call patterns don't + // bleed into the new user turn. + ToolCallGuard.shared.resetForNewTurn() + + // Kick off background descriptions for any image attachment in this + // conversation that doesn't have one yet. Generating now — at send + // time — means the description is usually ready by the user's next + // turn, at which point the chat clients stop re-sending the raw image + // and inline the text instead. Idempotent (guarded per attachment id), + // so re-running every turn is fine. Remote (OpenClaw) conversations + // already reference images by workspace path and returned above. + VisionSummaryService.shared.ensureSummaries(for: self.messages, conversationId: requestConversationId) + let initialContext = self.chatContextMessages self.beginStreamingTurn() + let localTurnToken = UUID().uuidString + self.currentLocalTurnToken = localTurnToken Cloud.connection.chat(messages: initialContext, onPartial: self.streamingPartialHandler(for: requestConversationId)) { responseMessage, error in // Fires off-main (URLSession delegate queue); ai_state / self.messages // are read by the table on main, so all mutation runs on main to avoid // racing the streaming render (see startReply for the full rationale). DispatchQueue.main.async { + // If this turn was handed off to a runner when the app backgrounded, + // discard the local result — the runner's reply will arrive via push. + if self.discardIfHandedOff(localTurnToken, requestConversationId) { return } if self.currentConversationEntity?.id == requestConversationId { self.ai_state = .None } @@ -1329,9 +1581,7 @@ extension MessagingVC: MessageBoxDelegate { let singlePrompt = self.makeSinglePrompt(from: self.contextMessages(for: requestConversationId)) Task { do { - print(singlePrompt) let response = try await session.respond(to: singlePrompt) - print(response) var responseMessage = MessageStruct(role: "assistant", content: response.content, model: "Apple LLM") if compactionTrigger == .hard { responseMessage.content += "\n\n(compacting context in the background)" @@ -1346,6 +1596,9 @@ extension MessagingVC: MessageBoxDelegate { let isStillViewing = self.currentConversationEntity?.id == requestConversationId DispatchQueue.main.async { + ActiveRequestTracker.shared.markIdle(requestConversationId) + self.streamingPartial = "" + VoiceLoopCoordinator.shared.setState(.idle) if isStillViewing { self.messages.append(responseMessage) self.messageIdToAnimate = responseMessage.id @@ -1355,6 +1608,25 @@ extension MessagingVC: MessageBoxDelegate { } } + } catch { + print("Apple on-device fallback error: \(error)") + DispatchQueue.main.async { + ActiveRequestTracker.shared.markIdle(requestConversationId) + self.streamingPartial = "" + VoiceLoopCoordinator.shared.setState(.idle) + let errorMessage = MessageStruct(role: "assistant", content: "Sorry – Apple's on-device model couldn't respond. You can try again or switch models in Settings ▸ Model.") + if let target = self.conversationManager.getConversation(by: requestConversationId) { + self.conversationManager.addMessage(errorMessage, to: target) + } + let isStillViewing = self.currentConversationEntity?.id == requestConversationId + if isStillViewing { + self.messages.append(errorMessage) + self.messageIdToAnimate = errorMessage.id + self.tableView.reloadData() + self.scrollToLastMessage() + } + EarconPlayer.shared.play(.error) + } } } return @@ -1393,6 +1665,108 @@ extension MessagingVC: MessageBoxDelegate { stopSpeaking() } + // MARK: - Background handoff to a Loop Runner + + /// Returns true (and cleans up) if the local turn `token` was handed off to a + /// runner while in flight — the caller should then discard its local result. + private func discardIfHandedOff(_ token: String, _ conversationId: String) -> Bool { + guard abandonedLocalTurns.remove(token) != nil else { return false } + if currentConversationEntity?.id == conversationId { ai_state = .None } + ActiveRequestTracker.shared.markIdle(conversationId) + if currentLocalTurnToken == token { currentLocalTurnToken = nil } + return true + } + + /// Plain user/assistant text history for the active conversation, prefixed + /// with the system prompt, in the runner's `{role, content}` shape. Tool + /// calls, function results, and attachments are dropped — the runner speaks + /// plain OpenAI chat messages only. + private func handoffPayload(for conversationId: String) -> [[String: String]] { + var payload: [[String: String]] = [["role": "system", "content": base_system_prompt]] + for m in contextMessages(for: conversationId) { + guard m.role == "user" || m.role == "assistant" else { continue } + guard !m.content.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { continue } + payload.append(["role": m.role, "content": m.content]) + } + return payload + } + + /// Called when the app backgrounds. If a local turn is in flight in a local + /// conversation and a runner is reachable, hand the turn off to the runner to + /// finish (it pushes back on completion) and abandon the local result. + /// Returns true only when a handoff was actually submitted — the local turn + /// is abandoned ONLY then, so a failed handoff never drops the only reply. + func handoffInFlightTurnIfEligible() async -> Bool { + // Read VC state + build the payload on main — these touch state the table + // view also reads, so we must not race them from the background Task. We + // also hard-cancel local inference HERE (before awaiting the submit) and + // mark the turn abandoned, so the un-cancellable local request can't + // finish first and "win." If the submit then fails we surface a notice + // (below), since we've already stopped the local turn. + let prep: (token: String, convId: String, messages: [[String: String]])? = await MainActor.run { + guard self.ai_state != .None else { + Self.handoffLog.info("handoff skipped: no turn in flight (ai_state == .None)") + return nil + } + guard let token = self.currentLocalTurnToken else { + Self.handoffLog.info("handoff skipped: no local turn token") + return nil + } + guard let convId = self.activeRequestConversationId else { + Self.handoffLog.info("handoff skipped: no active request conversation") + return nil + } + guard let conv = self.conversationManager.getConversation(by: convId) else { + Self.handoffLog.info("handoff skipped: conversation not found") + return nil + } + guard conv.backendKind != .remote else { + Self.handoffLog.info("handoff skipped: conversation is remote (already runs on a VM)") + return nil + } + let messages = self.handoffPayload(for: convId) + guard messages.count > 1 else { + Self.handoffLog.info("handoff skipped: no user turn to send") + return nil + } + // Stop local inference now and mark the turn abandoned so its + // (cancelled) completion is discarded rather than written. + self.abandonedLocalTurns.insert(token) + let cancelled = LocalInferenceController.shared.cancelActive() + self.ai_state = .None + ActiveRequestTracker.shared.markIdle(convId) + self.tableView.reloadData() + Self.handoffLog.info("handoff eligible — local inference cancelled: \(cancelled, privacy: .public)") + return (token, convId, messages) + } + guard let prep = prep else { return false } + + let userId = LoopRunnerClient.deviceUserId + Self.handoffLog.info("handoff submitting (user_id set: \(!userId.isEmpty, privacy: .public))") + guard let result = await LoopRunnerPoller.shared.submitHandoff( + messages: prep.messages, conversationId: prep.convId, userId: userId) else { + // We already cancelled local — leave a notice so the chat isn't stuck + // with a user message and no reply. + let reason = LoopRunnerPoller.shared.lastHandoffError ?? "couldn't reach your VM" + Self.handoffLog.error("handoff submit failed: \(reason, privacy: .public)") + await MainActor.run { + guard let conv = self.conversationManager.getConversation(by: prep.convId) else { return } + let notice = MessageStruct(role: "assistant", + content: "⚠️ Couldn't continue on your VM: \(reason)") + self.conversationManager.addMessage(notice, to: conv) + if self.currentConversationEntity?.id == prep.convId { + self.currentConversationEntity = self.conversationManager.getConversation(by: prep.convId) + self.loadMessagesFromConversation(conv, refreshIfRemote: false) + } + } + return false + } + + RunnerTurnApplier.recordHandoff(turnId: result.turnId, conversationId: prep.convId, model: result.model) + Self.handoffLog.info("handoff submitted to \(result.runner.nickname, privacy: .public) [\(result.model, privacy: .public)]: turn \(result.turnId, privacy: .public)") + return true + } + /// Resolves the shimmer label copy for a model-emitted function call. /// Skills get first crack via their own statusText(for:); legacy in-VC /// tools and a generic fallback handle the rest. @@ -1413,9 +1787,15 @@ extension MessagingVC: MessageBoxDelegate { if let s = TwitterSkill.shared.statusText(for: call) { return s } if let s = SSHSkill.shared.statusText(for: call) { return s } if let s = MuniRealtimeSkill.shared.statusText(for: call) { return s } + if let s = GoogleDriveSkill.shared.statusText(for: call) { return s } + if let s = GoogleGmailSkill.shared.statusText(for: call) { return s } + if let s = GoogleCalendarSkill.shared.statusText(for: call) { return s } #if canImport(HealthKit) && os(iOS) if let s = HealthSkill.shared.statusText(for: call) { return s } #endif + #if os(iOS) + if let s = StorySkill.shared.statusText(for: call) { return s } + #endif if let s = DynamicSkillRegistry.shared.statusText(for: call) { return s } switch call.name { @@ -1803,6 +2183,13 @@ extension MessagingVC: MessageBoxDelegate { } conversationManager.addMessage(message, to: conversation) + // Backgrounded: the user can't see the bubble land or hear the + // TTS (which we skip — see playMessageSynthesizer), so post a local + // notification announcing the reply finished. + if isBackgrounded { + postReplyNotification(for: message, conversationId: conversation.id) + } + if !isViewing { // Response arrived for a chat the user has navigated away from. // Surface a green dot on the hamburger so they know there's @@ -1948,6 +2335,8 @@ extension MessagingVC: MessageBoxDelegate { let reqConvId = conversation.id let context = self.contextMessages(for: reqConvId) self.beginStreamingTurn() + let localTurnToken = UUID().uuidString + self.currentLocalTurnToken = localTurnToken Cloud.connection.chat(messages: context, onPartial: streamingPartialHandler(for: reqConvId)) { [weak self] responseMessage, error in // Fires on URLSession's delegate queue. `ai_state` and // `self.messages` are read by the table view on the main thread @@ -1958,6 +2347,7 @@ extension MessagingVC: MessageBoxDelegate { // updates detected". DispatchQueue.main.async { guard let self = self else { return } + if self.discardIfHandedOff(localTurnToken, reqConvId) { return } if self.currentConversationEntity?.id == reqConvId { self.ai_state = .None } @@ -2018,11 +2408,52 @@ extension MessagingVC: MessageBoxDelegate { /// is safe. private static let speechSanitizer = SpeechSanitizer() + /// True when the app is in the background. Read on main (UIApplication's + /// `applicationState` is main-thread-only); all callers here run on main. + private var isBackgrounded: Bool { + UIApplication.shared.applicationState == .background + } + + /// Drop a local notification announcing the assistant's reply finished while + /// the app was backgrounded. Best-effort: if notification permission hasn't + /// been granted we silently bail (asking from a background completion + /// handler would be rude). Mirrors `SubAgentNotifications.deliver`. + private func postReplyNotification(for message: MessageStruct, conversationId: String) { + let body = MessagingVC.speechSanitizer.sanitize(message.content) + let preview = body.isEmpty ? message.content : body + guard !preview.isEmpty else { return } + + let center = UNUserNotificationCenter.current() + center.getNotificationSettings { settings in + var granted = settings.authorizationStatus == .authorized + || settings.authorizationStatus == .provisional + if settings.authorizationStatus == .ephemeral { granted = true } + guard granted else { return } + + let content = UNMutableNotificationContent() + content.title = "Loop" + content.body = String(preview.prefix(200)) + content.sound = .default + content.userInfo = ["type": "assistant_reply", "conversation_id": conversationId] + + let request = UNNotificationRequest( + identifier: "loop.reply.\(message.id)", + content: content, + trigger: nil // fire immediately + ) + center.add(request, withCompletionHandler: nil) + } + } + func playMessageSynthesizer(message: MessageStruct) { // Onboarding messages are scripted UI, not assistant speech. Speaking // them would be jarring (and TTS isn't even configured yet at this // point in the flow). Drop the avatar back to idle and return early. - if message.onboardingCard != nil { + // + // Exception: managed onboarding deliberately reads its messages aloud + // (after a voice is picked) so the chosen voice demos itself. The + // `isMuted` check below keeps the pre-pick voice prompt silent. + if message.onboardingCard != nil && !AppFlags.isManaged { VoiceLoopCoordinator.shared.setState(.idle) return } @@ -2036,6 +2467,15 @@ extension MessagingVC: MessageBoxDelegate { return } + // Backgrounded: skip audio. Cloud TTS opens a second network request + // that iOS suspends once the app leaves the foreground, so it usually + // fails — and the user can't hear it anyway. The reply itself is + // surfaced via a local notification from `processMessage`. + if isBackgrounded { + VoiceLoopCoordinator.shared.setState(.idle) + return + } + // Stop any ongoing audio stopSpeaking() @@ -2183,9 +2623,29 @@ extension MessagingVC: MessageBoxDelegate { let coord = VoiceLoopCoordinator.shared if coord.state == .speaking { coord.setState(.idle) + // Deactivate the audio session with .notifyOthersOnDeactivation so + // the system sends "interruption ended — you may resume" to any + // other audio app (Apple Music, Spotify, podcasts) that was paused + // when we activated our .playback session. Without this, system + // media stays paused indefinitely after Loop's TTS finishes. + deactivateAudioSession() } } + /// Deactivate the shared audio session and notify other apps they may + /// resume playback. Called after TTS finishes and after recording ends. + /// Failures are best-effort — the session might already be inactive + /// (e.g. from the recording teardown path) which is harmless. + private func deactivateAudioSession() { + #if !os(macOS) + do { + try AVAudioSession.sharedInstance().setActive(false, options: [.notifyOthersOnDeactivation]) + } catch { + // best-effort; session may already be inactive + } + #endif + } + /// Speak `text` using on-device AVSpeechSynthesizer. No network, no Deepgram. /// Picks the most lifelike voice available on the device (see /// `preferredOfflineVoice`). @@ -2201,7 +2661,7 @@ extension MessagingVC: MessageBoxDelegate { return } do { - try AVAudioSession.sharedInstance().setCategory(.playback, mode: .default, options: [.mixWithOthers]) + try AVAudioSession.sharedInstance().setCategory(.playback, mode: .default, options: [.duckOthers]) try AVAudioSession.sharedInstance().setActive(true) } catch { print("Offline TTS: audio session setup failed (\(error)) — speaking anyway") @@ -2379,6 +2839,15 @@ extension MessagingVC { /// tracks the currently selected backend (and any added/renamed backends). func updateBackendIndicator() { guard let button = backendIndicatorButton else { return } + // Managed builds (LOOP_FLAG set) pin the backend: show "Managed" and + // drop the picker so it can't be tapped/switched. + if AppFlags.isManaged { + button.setTitle(AppFlags.managedLabel, for: .normal) + button.menu = nil + button.showsMenuAsPrimaryAction = false + button.isUserInteractionEnabled = false + return + } button.setTitle(ExecutionBackendStore.shared.selectedBackend.displayName, for: .normal) button.menu = buildBackendMenu() } @@ -2474,10 +2943,17 @@ extension MessagingVC { // hardcoded curated list per voiceOptions. let voiceMenu = buildVoiceMenu(for: activeProvider) - // When muted, hide everything below the toggle — they aren't doing anything. - let children: [UIMenuElement] = isMuted - ? [muteAction] - : [muteAction, speedMenu, providerMenu, voiceMenu] + // When muted, hide everything below the toggle — they aren't doing + // anything. Managed builds drop the provider (model) picker; TTS is + // pinned to ElevenLabs Flash v2.5. + let children: [UIMenuElement] + if isMuted { + children = [muteAction] + } else if AppFlags.isManaged { + children = [muteAction, speedMenu, voiceMenu] + } else { + children = [muteAction, speedMenu, providerMenu, voiceMenu] + } return UIMenu(title: "", children: children) } @@ -2569,12 +3045,13 @@ extension MessagingVC { backendButton.showsMenuAsPrimaryAction = true self.backendIndicatorButton = backendButton - let views: [UIView] = [tableView, messageBox, subAgentStatusBar, actionButtonReminderBar, backendButton] + let views: [UIView] = [tableView, messageBox, subAgentStatusBar, topBannerScroll, actionButtonReminderBar, backendButton] for view in views { view.translatesAutoresizingMaskIntoConstraints = false self.view.addSubview(view) } subAgentStatusBar.delegate = self + topBannerScroll.bannerDelegate = self actionButtonReminderBar.delegate = self bottomConstraint = messageBox.bottomAnchor.constraint(equalTo: backendButton.topAnchor) NSLayoutConstraint.activate([ @@ -2589,8 +3066,15 @@ extension MessagingVC { subAgentStatusBar.topAnchor.constraint(equalTo: self.view.safeAreaLayoutGuide.topAnchor), subAgentStatusBar.trailingAnchor.constraint(equalTo: self.view.trailingAnchor), + // Music mini-player banner sits below the sub-agent status bar. + // Scrolls horizontally when both the sub-agent pill and music pill + // coexist (the sub-agent pill remains in its fixed position above). + topBannerScroll.leadingAnchor.constraint(equalTo: self.view.leadingAnchor), + topBannerScroll.topAnchor.constraint(equalTo: subAgentStatusBar.bottomAnchor), + topBannerScroll.trailingAnchor.constraint(equalTo: self.view.trailingAnchor), + actionButtonReminderBar.leadingAnchor.constraint(equalTo: self.view.leadingAnchor), - actionButtonReminderBar.topAnchor.constraint(equalTo: subAgentStatusBar.bottomAnchor), + actionButtonReminderBar.topAnchor.constraint(equalTo: topBannerScroll.bottomAnchor), actionButtonReminderBar.trailingAnchor.constraint(equalTo: self.view.trailingAnchor), tableView.leadingAnchor.constraint(equalTo: self.view.leadingAnchor), @@ -2624,6 +3108,14 @@ extension MessagingVC { // Improve scrolling performance tableView.delaysContentTouches = false // tableView.canCancelContentTouches = true + + // iMessage-style swipe-left to reveal each message's posted time. One + // pan on the table drives every visible cell's slide in lock-step (see + // `handleTimeRevealPan`), so the whole transcript moves as one. + timeRevealPan = UIPanGestureRecognizer(target: self, action: #selector(handleTimeRevealPan(_:))) + timeRevealPan.delegate = self + tableView.addGestureRecognizer(timeRevealPan) + NotificationCenter.default.addObserver(self, selector: #selector(keyboardWillShow(_:)), name: UIResponder.keyboardWillShowNotification, object: nil) NotificationCenter.default.addObserver(self, selector: #selector(keyboardWillHide(_:)), name: UIResponder.keyboardWillHideNotification, object: nil) @@ -2990,6 +3482,10 @@ extension MessagingVC: UITableViewDelegate, UITableViewDataSource { // matter — moved up here to keep all three together. cell.imageDelegate = self cell.pdfDelegate = self + #if os(iOS) + cell.storyDelegate = self + cell.browseDelegate = self + #endif cell.onboardingDelegate = self configureToolDisclosure(for: cell, message: message) cell.setData(data: message, shouldAnimate: message.id == self.messageIdToAnimate) @@ -3161,10 +3657,64 @@ extension MessagingVC: MessagingCellToolDelegate { } } +// MARK: - Swipe-to-reveal timestamps (iMessage-style) + +extension MessagingVC { + /// Translates every visible cell's content left in lock-step as the user + /// swipes, sliding the bubbles out and the trailing time labels in. Releases + /// spring back to rest. Vertical scrolling is locked for the duration so the + /// reveal stays a clean horizontal motion, like iMessage. + @objc func handleTimeRevealPan(_ pan: UIPanGestureRecognizer) { + switch pan.state { + case .began: + tableView.isScrollEnabled = false + applyTimeReveal(rubberBanded(max(0, -pan.translation(in: tableView).x))) + case .changed: + applyTimeReveal(rubberBanded(max(0, -pan.translation(in: tableView).x))) + case .ended, .cancelled, .failed: + tableView.isScrollEnabled = true + UIView.animate(withDuration: 0.32, delay: 0, + usingSpringWithDamping: 0.82, initialSpringVelocity: 0, + options: [.allowUserInteraction, .beginFromCurrentState]) { + self.applyTimeReveal(0) + } + default: + break + } + } + + /// Applies the same slide offset to all on-screen message cells. + private func applyTimeReveal(_ offset: CGFloat) { + for case let cell as MessagingCell in tableView.visibleCells { + cell.setTimeRevealOffset(offset) + } + } + + /// Eases the slide past `timeRevealMax` so an over-pull feels resistive + /// rather than tracking the finger 1:1. + private func rubberBanded(_ raw: CGFloat) -> CGFloat { + guard raw > timeRevealMax else { return raw } + return timeRevealMax + (raw - timeRevealMax) * 0.18 + } +} + // MARK: - UIGestureRecognizerDelegate extension MessagingVC: UIGestureRecognizerDelegate { + func gestureRecognizerShouldBegin(_ gestureRecognizer: UIGestureRecognizer) -> Bool { + // The timestamp-reveal pan only engages on a predominantly horizontal, + // leftward drag — vertical drags fall through to normal scrolling. + if gestureRecognizer == timeRevealPan { + let v = timeRevealPan.velocity(in: tableView) + return v.x < 0 && abs(v.x) > abs(v.y) + } + return true + } + func gestureRecognizer(_ gestureRecognizer: UIGestureRecognizer, shouldRecognizeSimultaneouslyWith otherGestureRecognizer: UIGestureRecognizer) -> Bool { + // The reveal pan must coexist with the table's own scroll pan so it + // isn't blocked from starting. + if gestureRecognizer == timeRevealPan { return true } // Don't allow simultaneous recognition with the side drawer's pan gesture if let sideDrawerPan = sideDrawer?.panGestureRecognizer, otherGestureRecognizer == sideDrawerPan { return false @@ -3174,6 +3724,18 @@ extension MessagingVC: UIGestureRecognizerDelegate { } func gestureRecognizer(_ gestureRecognizer: UIGestureRecognizer, shouldReceive touch: UITouch) -> Bool { + // The reveal pan accepts touches anywhere on the transcript, EXCEPT + // inside an image-search gallery — a horizontal swipe there should + // scroll the thumbnails, not drag the whole transcript to reveal + // timestamps. + if gestureRecognizer == timeRevealPan { + var v: UIView? = touch.view + while let cur = v { + if cur is ImageGalleryScrollView { return false } + v = cur.superview + } + return true + } // Only respond to touches that start near the left edge and when side drawer is not open let location = touch.location(in: view) return location.x <= 20 && sideDrawer == nil @@ -3253,6 +3815,7 @@ extension MessagingVC: AVAudioPlayerDelegate { audioPlayer = nil stopTTSMetering() VoiceLoopCoordinator.shared.setState(.idle) + deactivateAudioSession() } func audioPlayerDecodeErrorDidOccur(_ player: AVAudioPlayer, error: Error?) { @@ -3263,6 +3826,7 @@ extension MessagingVC: AVAudioPlayerDelegate { audioPlayer = nil stopTTSMetering() VoiceLoopCoordinator.shared.setState(.idle) + deactivateAudioSession() } } @@ -3280,6 +3844,7 @@ extension MessagingVC: AVSpeechSynthesizerDelegate { } self.offlineSpeechMessageId = nil VoiceLoopCoordinator.shared.setState(.idle) + self.deactivateAudioSession() } } @@ -3302,7 +3867,7 @@ extension MessagingVC { // Match the existing playback session config so we don't fight the mic // engine if it was just torn down. do { - try AVAudioSession.sharedInstance().setCategory(.playback, mode: .default, options: [.mixWithOthers]) + try AVAudioSession.sharedInstance().setCategory(.playback, mode: .default, options: [.duckOthers]) try AVAudioSession.sharedInstance().setActive(true) } catch { print("DeepgramTTS: audio session setup failed (\(error)) — falling back") @@ -3343,6 +3908,7 @@ extension MessagingVC { self.speechBuffer = "" } VoiceLoopCoordinator.shared.setState(.idle) + self.deactivateAudioSession() return } self.speakOffline(text: text, messageId: messageId) @@ -3357,6 +3923,7 @@ extension MessagingVC { } self.deepgramTTS = nil VoiceLoopCoordinator.shared.setState(.idle) + self.deactivateAudioSession() } } // Real per-buffer RMS into the avatar's speaking-mode pulse. @@ -3412,7 +3979,7 @@ extension MessagingVC { guard let apiKey = MessagingVC.elevenLabsAPIKey else { return false } do { - try AVAudioSession.sharedInstance().setCategory(.playback, mode: .default, options: []) + try AVAudioSession.sharedInstance().setCategory(.playback, mode: .default, options: [.duckOthers]) try AVAudioSession.sharedInstance().setActive(true) } catch { print("ElevenLabsTTS: audio session setup failed (\(error)) — falling back") @@ -3479,7 +4046,7 @@ extension MessagingVC { guard let apiKey = MessagingVC.openAIAPIKey else { return false } do { - try AVAudioSession.sharedInstance().setCategory(.playback, mode: .default, options: []) + try AVAudioSession.sharedInstance().setCategory(.playback, mode: .default, options: [.duckOthers]) try AVAudioSession.sharedInstance().setActive(true) } catch { print("OpenAITTS: audio session setup failed (\(error)) — falling back") @@ -3538,7 +4105,7 @@ extension MessagingVC { private func playMP3Data(_ data: Data, messageId: String, speed: Double, providerLabel: String) { guard self.currentSpeechMessageId == messageId else { return } do { - try AVAudioSession.sharedInstance().setCategory(.playback, mode: .default) + try AVAudioSession.sharedInstance().setCategory(.playback, mode: .default, options: [.duckOthers]) try AVAudioSession.sharedInstance().setActive(true) let player = try AVAudioPlayer(data: data) player.delegate = self @@ -3877,6 +4444,173 @@ extension MessagingVC: MessagingCellPDFDelegate { } } +// MARK: - StorySkillHost + +#if os(iOS) +extension MessagingVC: StorySkillHost { + + func storySkillDidStartGenerating(_ attachment: StoryAttachment) { + // Retry path: a placeholder for this id already exists — flip its + // state back to .generating in place. + if let idx = self.messages.firstIndex(where: { $0.storyAttachment?.id == attachment.id }) { + self.messages[idx].storyAttachment = attachment + DispatchQueue.main.async { + self.reloadStoryRow(attachmentId: attachment.id) + } + return + } + + let placeholder = MessageStruct( + id: "story-\(attachment.id)", + role: "assistant", + content: "", + model: "loop-story", + storyAttachment: attachment + ) + let conversation = ensureCurrentConversation() + conversationManager.addMessage(placeholder, to: conversation) + currentConversationEntity = conversationManager.currentConversation + self.messages.append(placeholder) + DispatchQueue.main.async { + self.tableView.reloadData() + self.scrollToLastMessage() + } + } + + func storySkillDidFinishGenerating(_ attachment: StoryAttachment) { + // Find the placeholder and mutate its attachment in place so the + // card flips spinner → ready without losing scroll position. + guard let idx = self.messages.firstIndex(where: { $0.storyAttachment?.id == attachment.id }) else { + return + } + self.messages[idx].storyAttachment = attachment + DispatchQueue.main.async { + self.reloadStoryRow(attachmentId: attachment.id) + } + } + + /// Reload the row carrying a story attachment, guarding against the + /// table/data-source row-count drift that happens when a render finishes + /// mid-stream (the same drift `renderStreamingPartial` defends against). + /// On any mismatch — or if the row isn't currently in `visible_messages` — + /// fall back to a full reload so we never trip "Invalid batch updates". + private func reloadStoryRow(attachmentId: String) { + let expectedRows = visible_messages.count + (ai_state != .None ? 1 : 0) + guard tableView.numberOfRows(inSection: 0) == expectedRows, + let visibleIdx = visible_messages.firstIndex(where: { $0.storyAttachment?.id == attachmentId }) + else { + tableView.reloadData() + return + } + tableView.reloadRows(at: [IndexPath(row: visibleIdx, section: 0)], with: .none) + } +} + +// MARK: - MessagingCellStoryDelegate + +extension MessagingVC: MessagingCellStoryDelegate { + + func messagingCellDidTapStory(attachmentId: String) { + guard let attachment = self.messages + .first(where: { $0.storyAttachment?.id == attachmentId })? + .storyAttachment, + attachment.status == .ready, + let url = attachment.fileURL, + FileManager.default.fileExists(atPath: url.path) + else { return } + let player = StoryPlayerVC() + player.storyAttachment = attachment + player.modalPresentationStyle = .fullScreen + present(player, animated: true) + } + + func messagingCellDidTapStoryRetry(attachmentId: String) { + guard let attachment = self.messages + .first(where: { $0.storyAttachment?.id == attachmentId })? + .storyAttachment + else { return } + let convId = conversationManager.currentConversation?.id + StoryGenerationService.shared.submit( + title: attachment.title, + template: attachment.template, + jsonPayload: attachment.jsonPayload, + attachmentId: attachment.id, + conversationId: convId + ) + } +} + +// MARK: - BrowseSkillHost + +extension MessagingVC: BrowseSkillHost { + + func browseSkillDidStart(_ attachment: BrowseAttachment) { + // A placeholder for this id may already exist if the model re-emits; + // update in place rather than dropping a second card. + if let idx = self.messages.firstIndex(where: { $0.browseAttachment?.id == attachment.id }) { + self.messages[idx].browseAttachment = attachment + DispatchQueue.main.async { self.reloadBrowseRow(attachmentId: attachment.id) } + return + } + let placeholder = MessageStruct( + id: "browse-\(attachment.id)", + role: "assistant", + content: "", + model: "loop-browse", + browseAttachment: attachment + ) + let conversation = ensureCurrentConversation() + conversationManager.addMessage(placeholder, to: conversation) + currentConversationEntity = conversationManager.currentConversation + self.messages.append(placeholder) + DispatchQueue.main.async { + self.tableView.reloadData() + self.scrollToLastMessage() + } + } + + func browseSkillDidUpdate(_ attachment: BrowseAttachment) { + guard let idx = self.messages.firstIndex(where: { $0.browseAttachment?.id == attachment.id }) else { return } + self.messages[idx].browseAttachment = attachment + DispatchQueue.main.async { self.reloadBrowseRow(attachmentId: attachment.id) } + } + + func browseSkillDidFinish(_ attachment: BrowseAttachment) { + guard let idx = self.messages.firstIndex(where: { $0.browseAttachment?.id == attachment.id }) else { return } + self.messages[idx].browseAttachment = attachment + DispatchQueue.main.async { self.reloadBrowseRow(attachmentId: attachment.id) } + } + + /// Same row-count-drift guard as `reloadStoryRow` — fall back to a full + /// reload on any mismatch so we never trip "Invalid batch updates". + private func reloadBrowseRow(attachmentId: String) { + let expectedRows = visible_messages.count + (ai_state != .None ? 1 : 0) + guard tableView.numberOfRows(inSection: 0) == expectedRows, + let visibleIdx = visible_messages.firstIndex(where: { $0.browseAttachment?.id == attachmentId }) + else { + tableView.reloadData() + return + } + tableView.reloadRows(at: [IndexPath(row: visibleIdx, section: 0)], with: .none) + } +} + +// MARK: - MessagingCellBrowseDelegate + +extension MessagingVC: MessagingCellBrowseDelegate { + func messagingCellDidTapBrowse(attachmentId: String) { + guard let attachment = self.messages + .first(where: { $0.browseAttachment?.id == attachmentId })? + .browseAttachment + else { return } + let player = BrowsePlayerVC() + player.attachment = attachment + player.modalPresentationStyle = .fullScreen + present(player, animated: true) + } +} +#endif + // MARK: - SlashCommandHost extension MessagingVC: SlashCommandHost { @@ -4048,6 +4782,14 @@ extension MessagingVC: TwitterSkillHost { } } +// MARK: - Top banner scroll delegate + +extension MessagingVC: TopBannerScrollViewDelegate { + func topBannerHeightDidChange() { + view.layoutIfNeeded() + } +} + // MARK: - Sub-agent runtime hooks extension MessagingVC: SubAgentStatusBarDelegate { @@ -4093,6 +4835,18 @@ extension MessagingVC: SubAgentStatusBarDelegate { loadMessagesFromConversation(refreshed, refreshIfRemote: false) } + /// A handed-off local turn finished on a runner and its reply was written + /// into a conversation. Reload if it's the one on screen. Gated on idle so we + /// never reload mid-stream (which can trip "Invalid batch updates detected"). + @objc func handleRunnerTurnApplied(_ notification: Notification) { + guard ai_state == .None, + let conversationId = notification.userInfo?["conversation_id"] as? String, + let current = currentConversationEntity, current.id == conversationId, + let refreshed = conversationManager.getConversation(by: conversationId) else { return } + currentConversationEntity = refreshed + loadMessagesFromConversation(refreshed, refreshIfRemote: false) + } + @objc func handleSubAgentMessage(_ notification: Notification) { guard let conversationId = notification.userInfo?["conversationId"] as? String, let current = currentConversationEntity, @@ -4201,6 +4955,13 @@ extension MessagingVC: OnboardingCoordinatorHost, OnboardingCardDelegate { sendHapticGenerator.impactOccurred() } else if message.role == "assistant" { responseHapticGenerator.impactOccurred() + // Managed onboarding reads its assistant messages aloud so the + // chosen voice demos itself. The voice-picker prompt (message 1) + // stays silent because audio is muted until a voice is selected; + // `playMessageSynthesizer` early-returns while muted. + if AppFlags.isManaged { + playMessageSynthesizer(message: message) + } } DispatchQueue.main.async { [weak self] in self?.scrollOnboardingToBottom() @@ -4387,6 +5148,21 @@ extension MessagingVC: ActionButtonReminderBarDelegate { func actionButtonReminderBarDismissed() { // The bar refreshes itself on dismiss tap; nothing extra to do here. } + + // MARK: - Card Feed Pill Alert + + @objc private func handleCardAdded(_ notification: Notification) { + guard let card = notification.object as? Card else { return } + CardPillAlert.show(in: self, cardId: card.id) { [weak self] cardId in + // Tapping the pill opens the card's detail view. There is no Feed + // tab — cards otherwise surface on the new-chat swipe stack. + guard let self = self else { return } + let latest = CardStore.shared.card(for: cardId) ?? card + let detail = CardDetailViewController(card: latest) + let nav = UINavigationController(rootViewController: detail) + self.present(nav, animated: true) + } + } } /// Inline `OnboardingCardDelegate` shim used by the reminder modal — it diff --git a/LoopIOS/MusicMiniPlayer/MusicMiniPlayerView.swift b/LoopIOS/MusicMiniPlayer/MusicMiniPlayerView.swift new file mode 100644 index 0000000..36315d9 --- /dev/null +++ b/LoopIOS/MusicMiniPlayer/MusicMiniPlayerView.swift @@ -0,0 +1,577 @@ +// +// MusicMiniPlayerView.swift +// Loop +// +// Compact music mini-player that lives in the top banner scroll view. +// Two states: minimized pill (track + play/pause) and expanded card +// (full controls, scrubber, album art). Driven by MusicController state. +// + +#if os(iOS) + +import UIKit +import MusicKit +import MediaPlayer + +// MARK: - MusicMiniPlayerDelegate + +protocol MusicMiniPlayerDelegate: AnyObject { + /// Called when the player visibility changes (show/hide) so the parent + /// banner can re-layout. + func musicMiniPlayerVisibilityChanged(_ visible: Bool) + /// Called when the player expands/collapses so the parent can adjust. + func musicMiniPlayerDidChangeState(expanded: Bool) +} + +// MARK: - MusicMiniPlayerView + +final class MusicMiniPlayerView: UIView { + + weak var delegate: MusicMiniPlayerDelegate? + + enum DisplayState { + case hidden + case minimized + case expanded + } + + private(set) var displayState: DisplayState = .hidden + + // MARK: - Minimized pill subviews + + private let pillContainer = UIView() + private let pillArtwork = UIImageView() + private let pillTitleLabel = UILabel() + private let pillArtistLabel = UILabel() + private let pillPlayPauseButton = UIButton(type: .system) + + // MARK: - Expanded card subviews + + private let cardContainer = UIView() + private let cardArtwork = UIImageView() + private let cardTitleLabel = UILabel() + private let cardArtistLabel = UILabel() + private let cardPlayPauseButton = UIButton(type: .system) + private let cardSkipButton = UIButton(type: .system) + private let cardCollapseButton = UIButton(type: .system) + private let cardProgressView = UIProgressView(progressViewStyle: .default) + private let cardDismissButton = UIButton(type: .system) + + // MARK: - State tracking + + private var pollTimer: Timer? + private var lastActiveTime: Date? + private let autoDismissInterval: TimeInterval = 300 // 5 minutes + private var isUserDismissed = false + + // MARK: - Constraints + + private var pillHeightConstraint: NSLayoutConstraint? + private var cardHeightConstraint: NSLayoutConstraint? + + // MARK: - Init + + override init(frame: CGRect) { + super.init(frame: frame) + setupViews() + setupGestures() + observeNotifications() + startPolling() + } + + required init?(coder: NSCoder) { + super.init(coder: coder) + setupViews() + setupGestures() + observeNotifications() + startPolling() + } + + deinit { + pollTimer?.invalidate() + NotificationCenter.default.removeObserver(self) + } + + // MARK: - Setup + + private func setupViews() { + backgroundColor = .clear + clipsToBounds = false + + setupPill() + setupCard() + + // Start hidden + pillContainer.isHidden = true + cardContainer.isHidden = true + pillContainer.alpha = 0 + cardContainer.alpha = 0 + } + + private func setupPill() { + pillContainer.translatesAutoresizingMaskIntoConstraints = false + pillContainer.backgroundColor = UIColor.systemBackground.withAlphaComponent(0.92) + pillContainer.layer.cornerRadius = 18 + pillContainer.layer.borderWidth = 0.5 + pillContainer.layer.borderColor = UIColor.separator.cgColor + pillContainer.layer.shadowColor = UIColor.black.cgColor + pillContainer.layer.shadowOpacity = 0.08 + pillContainer.layer.shadowOffset = CGSize(width: 0, height: 2) + pillContainer.layer.shadowRadius = 6 + addSubview(pillContainer) + + pillArtwork.translatesAutoresizingMaskIntoConstraints = false + pillArtwork.contentMode = .scaleAspectFill + pillArtwork.clipsToBounds = true + pillArtwork.layer.cornerRadius = 4 + pillArtwork.backgroundColor = .systemGray5 + pillArtwork.image = UIImage(systemName: "music.note") + pillArtwork.tintColor = .secondaryLabel + pillContainer.addSubview(pillArtwork) + + pillTitleLabel.translatesAutoresizingMaskIntoConstraints = false + pillTitleLabel.font = .systemFont(ofSize: 12, weight: .semibold) + pillTitleLabel.textColor = .label + pillTitleLabel.lineBreakMode = .byTruncatingTail + pillContainer.addSubview(pillTitleLabel) + + pillArtistLabel.translatesAutoresizingMaskIntoConstraints = false + pillArtistLabel.font = .systemFont(ofSize: 10, weight: .regular) + pillArtistLabel.textColor = .secondaryLabel + pillArtistLabel.lineBreakMode = .byTruncatingTail + pillContainer.addSubview(pillArtistLabel) + + pillPlayPauseButton.translatesAutoresizingMaskIntoConstraints = false + pillPlayPauseButton.tintColor = .label + pillPlayPauseButton.setImage(UIImage(systemName: "pause.fill", + withConfiguration: UIImage.SymbolConfiguration(pointSize: 12, weight: .semibold)), for: .normal) + pillPlayPauseButton.addTarget(self, action: #selector(togglePlayPause), for: .touchUpInside) + pillContainer.addSubview(pillPlayPauseButton) + + pillHeightConstraint = pillContainer.heightAnchor.constraint(equalToConstant: 36) + + NSLayoutConstraint.activate([ + pillContainer.leadingAnchor.constraint(equalTo: leadingAnchor), + pillContainer.trailingAnchor.constraint(equalTo: trailingAnchor), + pillContainer.topAnchor.constraint(equalTo: topAnchor), + pillHeightConstraint!, + + pillArtwork.leadingAnchor.constraint(equalTo: pillContainer.leadingAnchor, constant: 8), + pillArtwork.centerYAnchor.constraint(equalTo: pillContainer.centerYAnchor), + pillArtwork.widthAnchor.constraint(equalToConstant: 24), + pillArtwork.heightAnchor.constraint(equalToConstant: 24), + + pillTitleLabel.leadingAnchor.constraint(equalTo: pillArtwork.trailingAnchor, constant: 8), + pillTitleLabel.topAnchor.constraint(equalTo: pillContainer.topAnchor, constant: 5), + pillTitleLabel.trailingAnchor.constraint(lessThanOrEqualTo: pillPlayPauseButton.leadingAnchor, constant: -8), + + pillArtistLabel.leadingAnchor.constraint(equalTo: pillArtwork.trailingAnchor, constant: 8), + pillArtistLabel.topAnchor.constraint(equalTo: pillTitleLabel.bottomAnchor, constant: 1), + pillArtistLabel.trailingAnchor.constraint(lessThanOrEqualTo: pillPlayPauseButton.leadingAnchor, constant: -8), + + pillPlayPauseButton.trailingAnchor.constraint(equalTo: pillContainer.trailingAnchor, constant: -10), + pillPlayPauseButton.centerYAnchor.constraint(equalTo: pillContainer.centerYAnchor), + pillPlayPauseButton.widthAnchor.constraint(equalToConstant: 28), + pillPlayPauseButton.heightAnchor.constraint(equalToConstant: 28), + ]) + } + + private func setupCard() { + cardContainer.translatesAutoresizingMaskIntoConstraints = false + cardContainer.backgroundColor = UIColor.systemBackground.withAlphaComponent(0.96) + cardContainer.layer.cornerRadius = 16 + cardContainer.layer.borderWidth = 0.5 + cardContainer.layer.borderColor = UIColor.separator.cgColor + cardContainer.layer.shadowColor = UIColor.black.cgColor + cardContainer.layer.shadowOpacity = 0.12 + cardContainer.layer.shadowOffset = CGSize(width: 0, height: 4) + cardContainer.layer.shadowRadius = 12 + addSubview(cardContainer) + + cardArtwork.translatesAutoresizingMaskIntoConstraints = false + cardArtwork.contentMode = .scaleAspectFill + cardArtwork.clipsToBounds = true + cardArtwork.layer.cornerRadius = 8 + cardArtwork.backgroundColor = .systemGray5 + cardArtwork.image = UIImage(systemName: "music.note") + cardArtwork.tintColor = .secondaryLabel + cardArtwork.isUserInteractionEnabled = true + let artTap = UITapGestureRecognizer(target: self, action: #selector(openInAppleMusic)) + cardArtwork.addGestureRecognizer(artTap) + cardContainer.addSubview(cardArtwork) + + cardTitleLabel.translatesAutoresizingMaskIntoConstraints = false + cardTitleLabel.font = .systemFont(ofSize: 15, weight: .semibold) + cardTitleLabel.textColor = .label + cardTitleLabel.lineBreakMode = .byTruncatingTail + cardTitleLabel.isUserInteractionEnabled = true + let titleTap = UITapGestureRecognizer(target: self, action: #selector(openInAppleMusic)) + cardTitleLabel.addGestureRecognizer(titleTap) + cardContainer.addSubview(cardTitleLabel) + + cardArtistLabel.translatesAutoresizingMaskIntoConstraints = false + cardArtistLabel.font = .systemFont(ofSize: 12, weight: .regular) + cardArtistLabel.textColor = .secondaryLabel + cardArtistLabel.lineBreakMode = .byTruncatingTail + cardContainer.addSubview(cardArtistLabel) + + cardProgressView.translatesAutoresizingMaskIntoConstraints = false + cardProgressView.progressTintColor = .systemPink + cardProgressView.trackTintColor = .systemGray5 + cardContainer.addSubview(cardProgressView) + + cardPlayPauseButton.translatesAutoresizingMaskIntoConstraints = false + cardPlayPauseButton.tintColor = .label + cardPlayPauseButton.setImage(UIImage(systemName: "pause.fill", + withConfiguration: UIImage.SymbolConfiguration(pointSize: 22, weight: .bold)), for: .normal) + cardPlayPauseButton.addTarget(self, action: #selector(togglePlayPause), for: .touchUpInside) + cardContainer.addSubview(cardPlayPauseButton) + + cardSkipButton.translatesAutoresizingMaskIntoConstraints = false + cardSkipButton.tintColor = .label + cardSkipButton.setImage(UIImage(systemName: "forward.fill", + withConfiguration: UIImage.SymbolConfiguration(pointSize: 18, weight: .semibold)), for: .normal) + cardSkipButton.addTarget(self, action: #selector(skipTrack), for: .touchUpInside) + cardContainer.addSubview(cardSkipButton) + + cardCollapseButton.translatesAutoresizingMaskIntoConstraints = false + cardCollapseButton.tintColor = .secondaryLabel + cardCollapseButton.setImage(UIImage(systemName: "chevron.down", + withConfiguration: UIImage.SymbolConfiguration(pointSize: 12, weight: .semibold)), for: .normal) + cardCollapseButton.addTarget(self, action: #selector(collapseToMinimized), for: .touchUpInside) + cardContainer.addSubview(cardCollapseButton) + + cardDismissButton.translatesAutoresizingMaskIntoConstraints = false + cardDismissButton.tintColor = .tertiaryLabel + cardDismissButton.setImage(UIImage(systemName: "xmark", + withConfiguration: UIImage.SymbolConfiguration(pointSize: 10, weight: .semibold)), for: .normal) + cardDismissButton.addTarget(self, action: #selector(dismissPlayer), for: .touchUpInside) + cardContainer.addSubview(cardDismissButton) + + cardHeightConstraint = cardContainer.heightAnchor.constraint(equalToConstant: 160) + + NSLayoutConstraint.activate([ + cardContainer.leadingAnchor.constraint(equalTo: leadingAnchor), + cardContainer.trailingAnchor.constraint(equalTo: trailingAnchor), + cardContainer.topAnchor.constraint(equalTo: topAnchor), + cardHeightConstraint!, + + cardCollapseButton.topAnchor.constraint(equalTo: cardContainer.topAnchor, constant: 8), + cardCollapseButton.centerXAnchor.constraint(equalTo: cardContainer.centerXAnchor), + + cardDismissButton.topAnchor.constraint(equalTo: cardContainer.topAnchor, constant: 8), + cardDismissButton.trailingAnchor.constraint(equalTo: cardContainer.trailingAnchor, constant: -12), + + cardArtwork.leadingAnchor.constraint(equalTo: cardContainer.leadingAnchor, constant: 16), + cardArtwork.topAnchor.constraint(equalTo: cardCollapseButton.bottomAnchor, constant: 8), + cardArtwork.widthAnchor.constraint(equalToConstant: 72), + cardArtwork.heightAnchor.constraint(equalToConstant: 72), + + cardTitleLabel.leadingAnchor.constraint(equalTo: cardArtwork.trailingAnchor, constant: 12), + cardTitleLabel.topAnchor.constraint(equalTo: cardArtwork.topAnchor, constant: 4), + cardTitleLabel.trailingAnchor.constraint(equalTo: cardContainer.trailingAnchor, constant: -16), + + cardArtistLabel.leadingAnchor.constraint(equalTo: cardArtwork.trailingAnchor, constant: 12), + cardArtistLabel.topAnchor.constraint(equalTo: cardTitleLabel.bottomAnchor, constant: 4), + cardArtistLabel.trailingAnchor.constraint(equalTo: cardContainer.trailingAnchor, constant: -16), + + cardProgressView.leadingAnchor.constraint(equalTo: cardContainer.leadingAnchor, constant: 16), + cardProgressView.trailingAnchor.constraint(equalTo: cardContainer.trailingAnchor, constant: -16), + cardProgressView.topAnchor.constraint(equalTo: cardArtwork.bottomAnchor, constant: 12), + + cardPlayPauseButton.centerXAnchor.constraint(equalTo: cardContainer.centerXAnchor), + cardPlayPauseButton.topAnchor.constraint(equalTo: cardProgressView.bottomAnchor, constant: 8), + cardPlayPauseButton.widthAnchor.constraint(equalToConstant: 44), + cardPlayPauseButton.heightAnchor.constraint(equalToConstant: 44), + + cardSkipButton.leadingAnchor.constraint(equalTo: cardPlayPauseButton.trailingAnchor, constant: 24), + cardSkipButton.centerYAnchor.constraint(equalTo: cardPlayPauseButton.centerYAnchor), + cardSkipButton.widthAnchor.constraint(equalToConstant: 36), + cardSkipButton.heightAnchor.constraint(equalToConstant: 36), + ]) + } + + private func setupGestures() { + // Tap pill to expand + let pillTap = UITapGestureRecognizer(target: self, action: #selector(expandFromPill)) + pillContainer.addGestureRecognizer(pillTap) + pillContainer.isUserInteractionEnabled = true + + // Swipe down on card to collapse + let cardSwipeDown = UISwipeGestureRecognizer(target: self, action: #selector(collapseToMinimized)) + cardSwipeDown.direction = .down + cardContainer.addGestureRecognizer(cardSwipeDown) + + // Swipe left/right on pill to dismiss + let pillSwipeLeft = UISwipeGestureRecognizer(target: self, action: #selector(dismissPlayer)) + pillSwipeLeft.direction = .left + pillContainer.addGestureRecognizer(pillSwipeLeft) + + let pillSwipeRight = UISwipeGestureRecognizer(target: self, action: #selector(dismissPlayer)) + pillSwipeRight.direction = .right + pillContainer.addGestureRecognizer(pillSwipeRight) + } + + private func observeNotifications() { + NotificationCenter.default.addObserver( + self, + selector: #selector(voiceStateChanged), + name: .voiceLoopStateDidChange, + object: nil + ) + } + + // MARK: - Polling + + private func startPolling() { + pollTimer = Timer.scheduledTimer(withTimeInterval: 1.0, repeats: true) { [weak self] _ in + Task { @MainActor in + self?.pollMusicState() + } + } + } + + @MainActor + private func pollMusicState() { + let controller = MusicController.shared + let player = ApplicationMusicPlayer.shared + let playbackStatus = player.state.playbackStatus + + let isPlaying = playbackStatus == .playing + let isPaused = playbackStatus == .paused + + if isPlaying { + lastActiveTime = Date() + isUserDismissed = false + } + + // Determine visibility + let shouldShow: Bool + if isUserDismissed { + shouldShow = false + } else if isPlaying { + shouldShow = true + } else if isPaused, let lastActive = lastActiveTime { + shouldShow = Date().timeIntervalSince(lastActive) < autoDismissInterval + } else if controller.nowPlaying != nil && isPaused { + // Paused but within window + if lastActiveTime == nil { lastActiveTime = Date() } + shouldShow = true + } else { + shouldShow = false + } + + if shouldShow && displayState == .hidden { + transitionTo(.minimized, animated: true) + } else if !shouldShow && displayState != .hidden { + transitionTo(.hidden, animated: true) + } + + // Update content + if displayState != .hidden { + updateContent(controller: controller, isPlaying: isPlaying) + } + } + + // MARK: - Content Updates + + @MainActor + private func updateContent(controller: MusicController, isPlaying: Bool) { + let np = controller.nowPlaying + + let title = np?.title ?? "Not Playing" + let artist = np?.artist ?? "" + + // Pill + pillTitleLabel.text = title + pillArtistLabel.text = artist + + // Card + cardTitleLabel.text = title + cardArtistLabel.text = artist + + // Play/pause icon + let iconName = isPlaying ? "pause.fill" : "play.fill" + pillPlayPauseButton.setImage(UIImage(systemName: iconName, + withConfiguration: UIImage.SymbolConfiguration(pointSize: 12, weight: .semibold)), for: .normal) + cardPlayPauseButton.setImage(UIImage(systemName: iconName, + withConfiguration: UIImage.SymbolConfiguration(pointSize: 22, weight: .bold)), for: .normal) + + // Progress + let player = ApplicationMusicPlayer.shared + let currentTime = player.playbackTime + let duration = MPNowPlayingInfoCenter.default().nowPlayingInfo?[MPMediaItemPropertyPlaybackDuration] as? TimeInterval ?? 0 + if duration > 0 { + cardProgressView.progress = Float(currentTime / duration) + } else { + cardProgressView.progress = 0 + } + + // Artwork — use MusicKit's current entry artwork if available + loadArtwork() + } + + private func loadArtwork() { + let player = ApplicationMusicPlayer.shared + guard let entry = player.queue.currentEntry else { return } + + Task { @MainActor in + let artworkURL: URL? + switch entry.item { + case .song(let song): + artworkURL = song.artwork?.url(width: 120, height: 120) + default: + artworkURL = nil + } + + guard let url = artworkURL else { return } + // Load artwork data off main thread, then apply + let loadedImage = await Task.detached { + guard let data = try? Data(contentsOf: url) else { return nil as UIImage? } + return UIImage(data: data) + }.value + if let image = loadedImage { + self.pillArtwork.image = image + self.cardArtwork.image = image + } + } + } + + // MARK: - State Transitions + + func transitionTo(_ state: DisplayState, animated: Bool) { + guard state != displayState else { return } + let oldState = displayState + displayState = state + + let work: () -> Void + switch state { + case .hidden: + work = { + self.pillContainer.alpha = 0 + self.cardContainer.alpha = 0 + } + case .minimized: + pillContainer.isHidden = false + work = { + self.pillContainer.alpha = 1 + self.cardContainer.alpha = 0 + } + case .expanded: + cardContainer.isHidden = false + work = { + self.pillContainer.alpha = 0 + self.cardContainer.alpha = 1 + } + } + + let completion: (Bool) -> Void = { _ in + switch state { + case .hidden: + self.pillContainer.isHidden = true + self.cardContainer.isHidden = true + case .minimized: + self.cardContainer.isHidden = true + case .expanded: + self.pillContainer.isHidden = true + } + + let wasVisible = oldState != .hidden + let isVisible = state != .hidden + if wasVisible != isVisible { + self.delegate?.musicMiniPlayerVisibilityChanged(isVisible) + } + self.delegate?.musicMiniPlayerDidChangeState(expanded: state == .expanded) + } + + if animated { + UIView.animate(withDuration: 0.3, delay: 0, + usingSpringWithDamping: 0.85, + initialSpringVelocity: 0.5, + options: [.curveEaseInOut], + animations: work, + completion: completion) + } else { + work() + completion(true) + } + } + + // MARK: - Intrinsic sizing + + var currentHeight: CGFloat { + switch displayState { + case .hidden: return 0 + case .minimized: return 44 + case .expanded: return 168 + } + } + + override var intrinsicContentSize: CGSize { + CGSize(width: UIView.noIntrinsicMetric, height: currentHeight) + } + + // MARK: - Actions + + @objc private func togglePlayPause() { + Task { @MainActor in + let player = ApplicationMusicPlayer.shared + if player.state.playbackStatus == .playing { + MusicController.shared.pause(reason: .userExplicit) + } else { + try? await MusicController.shared.userResume() + } + } + } + + @objc private func skipTrack() { + Task { @MainActor in + try? await MusicController.shared.skip() + } + } + + @objc private func expandFromPill() { + transitionTo(.expanded, animated: true) + } + + @objc private func collapseToMinimized() { + transitionTo(.minimized, animated: true) + } + + @objc private func dismissPlayer() { + isUserDismissed = true + transitionTo(.hidden, animated: true) + } + + @objc private func openInAppleMusic() { + // Deep-link to currently playing track in Apple Music + if let url = URL(string: "music://") { + UIApplication.shared.open(url) + } + } + + // MARK: - Voice state auto-minimize + + @objc private func voiceStateChanged() { + let state = VoiceLoopCoordinator.shared.state + if state == .recording && displayState == .expanded { + transitionTo(.minimized, animated: true) + } + } + + // MARK: - Public API + + /// Call when the user explicitly stopped music via agent command + func handleMusicStopped() { + lastActiveTime = nil + isUserDismissed = false + transitionTo(.hidden, animated: true) + } + + /// Reset user dismiss flag (e.g. when new music starts playing) + func resetDismiss() { + isUserDismissed = false + } +} + +#endif diff --git a/LoopIOS/MusicMiniPlayer/TopBannerScrollView.swift b/LoopIOS/MusicMiniPlayer/TopBannerScrollView.swift new file mode 100644 index 0000000..24577e4 --- /dev/null +++ b/LoopIOS/MusicMiniPlayer/TopBannerScrollView.swift @@ -0,0 +1,146 @@ +// +// TopBannerScrollView.swift +// Loop +// +// Horizontally scrollable banner container that hosts the music mini-player. +// Sits below the sub-agent status bar in the chat view. When the music +// player is visible, this banner expands; when hidden, it collapses to +// zero height. The scroll view allows horizontal scrolling if the content +// is wider than the screen (e.g. future additional banner items). +// + +#if os(iOS) + +import UIKit + +protocol TopBannerScrollViewDelegate: AnyObject { + func topBannerHeightDidChange() +} + +final class TopBannerScrollView: UIView { + + weak var bannerDelegate: TopBannerScrollViewDelegate? + + // MARK: - Subviews + + private let scrollView = UIScrollView() + private let contentStack = UIStackView() + + /// The music mini-player. + let musicMiniPlayer = MusicMiniPlayerView() + + /// Height constraint toggled to collapse the entire banner when the + /// music player is hidden. + private var heightConstraint: NSLayoutConstraint! + + // MARK: - Init + + override init(frame: CGRect) { + super.init(frame: frame) + setup() + } + + required init?(coder: NSCoder) { + super.init(coder: coder) + setup() + } + + // MARK: - Setup + + private func setup() { + backgroundColor = .clear + translatesAutoresizingMaskIntoConstraints = false + clipsToBounds = false + + scrollView.translatesAutoresizingMaskIntoConstraints = false + scrollView.showsHorizontalScrollIndicator = false + scrollView.showsVerticalScrollIndicator = false + scrollView.alwaysBounceHorizontal = false + scrollView.clipsToBounds = false + addSubview(scrollView) + + contentStack.translatesAutoresizingMaskIntoConstraints = false + contentStack.axis = .horizontal + contentStack.spacing = 8 + contentStack.alignment = .center + contentStack.distribution = .fill + scrollView.addSubview(contentStack) + + // Configure music mini-player + musicMiniPlayer.translatesAutoresizingMaskIntoConstraints = false + musicMiniPlayer.delegate = self + contentStack.addArrangedSubview(musicMiniPlayer) + + // Music player width fills available space in minimized, expands in expanded + let musicWidth = musicMiniPlayer.widthAnchor.constraint(equalTo: scrollView.frameLayoutGuide.widthAnchor) + musicWidth.priority = .defaultHigh + musicWidth.isActive = true + + heightConstraint = heightAnchor.constraint(equalToConstant: 0) + heightConstraint.isActive = true + + NSLayoutConstraint.activate([ + scrollView.leadingAnchor.constraint(equalTo: leadingAnchor, constant: 12), + scrollView.trailingAnchor.constraint(equalTo: trailingAnchor, constant: -12), + scrollView.topAnchor.constraint(equalTo: topAnchor), + scrollView.bottomAnchor.constraint(equalTo: bottomAnchor), + + contentStack.leadingAnchor.constraint(equalTo: scrollView.contentLayoutGuide.leadingAnchor), + contentStack.trailingAnchor.constraint(equalTo: scrollView.contentLayoutGuide.trailingAnchor), + contentStack.topAnchor.constraint(equalTo: scrollView.contentLayoutGuide.topAnchor), + contentStack.bottomAnchor.constraint(equalTo: scrollView.contentLayoutGuide.bottomAnchor), + contentStack.heightAnchor.constraint(equalTo: scrollView.frameLayoutGuide.heightAnchor), + ]) + } + + // MARK: - Layout refresh + + /// Recalculates the banner height based on the music player state. + func refreshLayout(animated: Bool = true) { + let musicVisible = musicMiniPlayer.displayState != .hidden + + let targetHeight: CGFloat + if musicMiniPlayer.displayState == .expanded { + targetHeight = musicMiniPlayer.currentHeight + 8 + } else if musicVisible { + targetHeight = 48 + } else { + targetHeight = 0 + } + + guard heightConstraint.constant != targetHeight else { return } + + let work = { + self.heightConstraint.constant = targetHeight + self.superview?.layoutIfNeeded() + } + + if animated { + UIView.animate(withDuration: 0.25, animations: work) { _ in + self.bannerDelegate?.topBannerHeightDidChange() + } + } else { + work() + bannerDelegate?.topBannerHeightDidChange() + } + } + + /// Conversation scope pass-through (unused here but keeps API consistent). + var conversationId: String? { + didSet { /* Music player is global, not conversation-scoped */ } + } +} + +// MARK: - MusicMiniPlayerDelegate + +extension TopBannerScrollView: MusicMiniPlayerDelegate { + func musicMiniPlayerVisibilityChanged(_ visible: Bool) { + refreshLayout(animated: true) + } + + func musicMiniPlayerDidChangeState(expanded: Bool) { + refreshLayout(animated: true) + } +} + +#endif diff --git a/LoopIOS/Onboarding/OnboardingCoordinator.swift b/LoopIOS/Onboarding/OnboardingCoordinator.swift index d431836..f8d2e00 100644 --- a/LoopIOS/Onboarding/OnboardingCoordinator.swift +++ b/LoopIOS/Onboarding/OnboardingCoordinator.swift @@ -138,6 +138,9 @@ final class OnboardingCoordinator { static let voiceOpenAI = "tts.openai" static let skipTTS = "tts.skip" static let startChatting = "done.start" + /// Managed onboarding: one chip per ElevenLabs voice. The voice id is + /// appended after this prefix so `handleChoice` can recover it. + static let managedVoicePrefix = "tts.voice." } private(set) var currentStep: StepID = .greeting @@ -190,7 +193,13 @@ final class OnboardingCoordinator { if hasResumed { return } hasResumed = true - let resumed = StepID(rawValue: OnboardingState.lastStep) ?? .greeting + var resumed = StepID(rawValue: OnboardingState.lastStep) ?? .greeting + // Managed builds run a trimmed 3-step script (voice → Action Button → + // intro): the model/key/integrations steps don't apply, so never + // resume earlier than the voice step. + if AppFlags.isManaged, resumed.rawValue < StepID.ttsOffer.rawValue { + resumed = .ttsOffer + } currentStep = resumed // First launch: seed the self-docs and pick a sensible starting @@ -198,13 +207,20 @@ final class OnboardingCoordinator { // Secrets.xcconfig, iCloud-KVS sync), let `ModelSelectionStore`'s // default logic pick that provider — pinning Apple would hide a // working key and make the greeting inaccurate. Pin Apple only when - // no hosted key exists. - if resumed == .greeting { + // no hosted key exists. (Managed builds skip model setup entirely.) + let isFreshStart = resumed == .greeting || (AppFlags.isManaged && resumed == .ttsOffer) + if isFreshStart { AppSignals.emit("onboarding_started") - if !ModelProvider.hasAnyProviderKey { + if !AppFlags.isManaged, !ModelProvider.hasAnyProviderKey { pinAppleFoundationModel() } AgentHarness.shared.seedSelfDocsIfMissing() + // Managed: keep audio muted through the voice-picker prompt so + // message 1 isn't spoken. Selecting a voice unmutes, so messages + // 2 and 3 read aloud in the chosen voice. + if AppFlags.isManaged { + iCloudKVSDefaults.shared.set(true, forKey: "audioMuted") + } if deferGreetingUntilFirstMessage { // Host wants the greeting to land as a *reply* to the user's // opener, so hold the post. `handleUserText` will fire it @@ -408,6 +424,17 @@ final class OnboardingCoordinator { } private func handleChoice(id: String, label: String) { + // Managed onboarding: the voice-picker chips each carry an ElevenLabs + // voice id. Persist the pick (TTS is locked to ElevenLabs Flash v2.5) + // and move on to the Action Button step. + if AppFlags.isManaged, currentStep == .ttsOffer, + id.hasPrefix(ChipId.managedVoicePrefix) { + commitAnswer(echo: label) + selectElevenLabsVoice(String(id.dropFirst(ChipId.managedVoicePrefix.count))) + advance(to: .actionButton) + return + } + switch (currentStep, id) { // Model picks fire from .greeting now (the first step). .modelChoice @@ -707,6 +734,27 @@ final class OnboardingCoordinator { ])) case .ttsOffer: + // Managed builds open here (step 1 of 3) and pick directly from + // the ElevenLabs voice list — TTS is locked to ElevenLabs Flash + // v2.5, so we offer voices rather than providers. + if AppFlags.isManaged { + // Curated shortlist, in display order. Resolved against the + // provider's `voiceOptions` so labels/ids stay in sync. + let orderedVoiceIds = [ + "ZSNL4hPqCnqoMPaI4jGX", // Hannah + "sIivXWc5MTlPIP3kJXhg", // Hayes + "M6ic45wruJGWAxLFEMNK", // Zoe + "hmMWXCj9K7N5mCPcRkfC", // Rory + ] + let options = TTSProvider.elevenLabsFlashV25.voiceOptions + let voiceChips: [OnboardingChoiceOption] = orderedVoiceIds.compactMap { id in + guard let opt = options.first(where: { $0.id == id }) else { return nil } + return OnboardingChoiceOption(id: ChipId.managedVoicePrefix + opt.id, label: opt.label) + } + return assistantMessage( + text: "Nice to meet you! **Let's get set up with this Harness.**\n\n**Pick a voice** for replies. You can change it anytime from the speaker menu.", + card: .suggestions(options: voiceChips)) + } return assistantMessage( text: "**Pick a voice** for replies. You can change it anytime from the speaker menu.", card: .suggestions(options: [ @@ -718,15 +766,27 @@ final class OnboardingCoordinator { ])) case .actionButton: + // Managed onboarding uses its own lead-in; both keep the Action + // Button bold and the same walkthrough card. + let actionButtonText = AppFlags.isManaged + ? "Let's bind your iPhone's **Action Button** so you can talk to me from anywhere." + : "One last thing — bind your iPhone's **Action Button** so you can talk to me from anywhere." return assistantMessage( - text: "One last thing — bind your iPhone's **Action Button** so you can talk to me from anywhere.", + text: actionButtonText, card: .actionButtonWalkthrough) case .done: - // Terminal message. We no longer ask the user to name the - // assistant, so this is the single sign-off that hands the - // conversation over to the real chat. No chip — the message bar - // is the obvious next step. + // Terminal message. Managed builds close with Loop's self-intro + // and an open question; the typed answer becomes the user's first + // real message (and seeds what the harness knows about them). + if AppFlags.isManaged { + return assistantMessage( + text: "**Awesome. We're all set!** To introduce myself — I'm **Loop**, designed to be your personal self-improving agent.\n\nThink of me as a living memory you can text or talk to: I remember your projects, manage your calendar, search the web, draft documents, and run tasks in the background — all from one conversation that syncs across your devices. The more we work together, the sharper I get.\n\n**What should I know about you to be most useful?**", + card: .answered) + } + // We no longer ask the user to name the assistant, so this is the + // single sign-off that hands the conversation over to the real + // chat. No chip — the message bar is the obvious next step. return assistantMessage( text: "Awesome. We're all set! I'm excited to be at your service. **How can I be helpful?**", card: .answered) @@ -752,6 +812,17 @@ final class OnboardingCoordinator { iCloudKVSDefaults.shared.set(false, forKey: "audioMuted") } + /// Managed onboarding voice pick. TTS is locked to ElevenLabs Flash v2.5 + /// (see `MessagingVC.ttsProvider`), so we persist the chosen voice id + /// against that provider's key and unmute. Writing `ttsProvider` too keeps + /// the stored value coherent even though the managed getter ignores it. + private func selectElevenLabsVoice(_ voiceId: String) { + let provider = TTSProvider.elevenLabsFlashV25 + iCloudKVSDefaults.shared.set(provider.rawValue, forKey: "ttsProvider") + iCloudKVSDefaults.shared.set(voiceId, forKey: "ttsVoice.\(provider.rawValue)") + iCloudKVSDefaults.shared.set(false, forKey: "audioMuted") + } + /// True when a non-empty key is saved for `key`. Used by the TTS step to /// decide whether the user can use a cloud voice immediately or needs to /// paste a key first. diff --git a/LoopIOS/Runner/BackgroundTurnRunner.swift b/LoopIOS/Runner/BackgroundTurnRunner.swift new file mode 100644 index 0000000..505b6b6 --- /dev/null +++ b/LoopIOS/Runner/BackgroundTurnRunner.swift @@ -0,0 +1,101 @@ +// +// BackgroundTurnRunner.swift +// Loop +// +// Self-contained background handoff: when the user backgrounds the app mid-turn, +// we run the turn on their SSH VM as a single detached one-shot — NO server, +// tunnel, port, or deployed binary. One SSH command writes a tiny Python script +// + the request JSON to `~/.loop/` and launches `python3` detached (`nohup`). +// The script calls the model directly and POSTs the reply to the push backend, +// which alerts the device. +// +// This replaces the earlier "deploy + run a Go HTTP runner" path, whose +// multi-handshake install was too slow to finish before the app suspended. The +// one-shot needs only `python3` (stdlib only) on the VM and a single SSH +// round-trip, so it reliably fits the background-handoff window. +// +// It also uses the user's *selected* model/provider (not a hardcoded one), so +// the reply is closer to what local inference would have produced. +// + +import Foundation +import os + +final class BackgroundTurnRunner { + + static let shared = BackgroundTurnRunner() + private init() {} + + private static let log = Logger(subsystem: "com.bhat.intel", category: "handoff") + // File layout + model/key selection + the Python agent loop are shared with + // the recurring VM-cron path; see `VMAgentRuntime`. + private static let remoteDir = VMAgentRuntime.remoteDir + private static let pushURL = VMAgentRuntime.pushURL + + enum RunResult { + /// Launched successfully; `model` is the display name that will run on the VM. + case success(model: String) + case failure(String) + } + + /// Launch the turn on the VM. Returns `.success` once the one-shot is + /// confirmed launched (it pushes on its own when the model replies), or + /// `.failure(reason)` with a user-surfaceable message. + func run(messages: [[String: String]], + conversationId: String, + userId: String, + turnId: String, + on config: SSHConfig) async -> RunResult { + guard config.isConfigured else { return .failure("No SSH connection selected.") } + guard !userId.isEmpty else { return .failure("No device push id yet — open the app once with notifications enabled.") } + guard let pc = VMAgentRuntime.providerConfig() else { + return .failure("No cloud model API key set (add one in Settings → Keys).") + } + + let cfg: [String: Any] = [ + "provider": pc.provider, + "model": pc.modelID, + "api_key": pc.key, + "messages": messages, + "user_id": userId, + "conversation_id": conversationId, + "turn_id": turnId, + "push_url": Self.pushURL, + "result_path": "\(Self.remoteDir)/turns/\(turnId).json", + // All the user's cloud keys (by env-var name), so the runner's tools + // can use any connected service dynamically — no per-tool porting. + "env": VMAgentRuntime.exportableKeys(), + ] + guard let cfgData = try? JSONSerialization.data(withJSONObject: cfg) else { + return .failure("Could not encode the request.") + } + + let cfgB64 = cfgData.base64EncodedString() + let scriptB64 = Data(VMAgentRuntime.pythonScript.utf8).base64EncodedString() + + // One round-trip: ensure python3, write script + request synchronously, + // then launch python detached so it outlives the SSH channel. + let cmd = """ + command -v python3 >/dev/null 2>&1 || { echo LOOP_NOPYTHON; exit 0; } + mkdir -p \(Self.remoteDir)/turns && printf %s '\(scriptB64)' | base64 -d > \(Self.remoteDir)/oneshot.py && printf %s '\(cfgB64)' | base64 -d > \(Self.remoteDir)/req-\(turnId).json || { echo LOOP_WRITEFAIL; exit 0; } + cd \(Self.remoteDir) && nohup python3 oneshot.py req-\(turnId).json >>oneshot.log 2>&1 String { + /// Start a new turn on the runner. With `async: true` the runner persists the + /// turn, returns `202 {"id":...}` immediately, and finishes in the background + /// (the handoff path). With `async: false` the runner streams SSE — not what + /// this JSON-parsing method expects, so callers should keep `async: true`. + func startTurn(messages: [[String: String]], conversationId: String, userId: String, async: Bool = true) async throws -> String { var request = makeRequest(path: "/turn", method: "POST") - request.httpBody = try JSONSerialization.data( - withJSONObject: ["messages": messages] - ) + let payload: [String: Any] = [ + "messages": messages, + "conversation_id": conversationId, + "user_id": userId, + "async": async, + ] + request.httpBody = try JSONSerialization.data(withJSONObject: payload) request.setValue("application/json", forHTTPHeaderField: "Content-Type") let (data, response) = try await session.data(for: request) try validate(response) @@ -83,6 +90,14 @@ final class LoopRunnerClient { return turnId } + /// The stable device user id the central push backend targets (see + /// `PushTokenBridge` / PUSH_NOTIFICATIONS.md). Empty when analytics/push has + /// never run on this device — the runner then skips the completion push and + /// delivery falls back to the foreground poller. + static var deviceUserId: String { + UserDefaults.standard.string(forKey: "loop.analytics.user_id") ?? "" + } + /// Fetch a single turn by id. func getTurn(id: String) async throws -> RunnerTurn { let request = makeRequest(path: "/turn/\(id)") @@ -180,7 +195,27 @@ protocol RunnerPolling { func pollJobs(since: Date) async throws -> (jobs: [RunnerJob], serverTime: Date) } +/// The submit surface used by the background-handoff path. Both transports +/// implement it so a handoff works whether the runner is reached directly, +/// over a tunnel, or via SSH-exec. +protocol RunnerSubmitting { + func startTurn(messages: [[String: String]], conversationId: String, userId: String, async: Bool) async throws -> String +} + +/// Fetch a single turn by id — used by the completion-push tap and foreground +/// reconciliation to read a handed-off turn's final response. +protocol RunnerFetching { + func getTurn(id: String) async throws -> RunnerTurn +} + +/// Combined transport capability — poll + submit + fetch. `makeClient` vends this +/// so the poller can poll, hand off, and fetch without rebuilding the transport. +protocol RunnerTransport: RunnerPolling, RunnerSubmitting, RunnerFetching {} + extension LoopRunnerClient: RunnerPolling {} +extension LoopRunnerClient: RunnerSubmitting {} +extension LoopRunnerClient: RunnerFetching {} +extension LoopRunnerClient: RunnerTransport {} // MARK: - ISO8601 helpers diff --git a/LoopIOS/Runner/LoopRunnerPoller.swift b/LoopIOS/Runner/LoopRunnerPoller.swift index 44d8a75..99ccbbe 100644 --- a/LoopIOS/Runner/LoopRunnerPoller.swift +++ b/LoopIOS/Runner/LoopRunnerPoller.swift @@ -232,8 +232,19 @@ final class LoopRunnerPoller { /// to the `curl`-over-SSH-exec client if the tunnel can't be established /// (e.g. `AllowTcpForwarding no`). Non-SSH runners use the direct URLSession /// client. Returns nil if the shared secret or URL is missing. - private func makeClient(for runner: RunnerConfig) async -> RunnerPolling? { - guard let secret = RunnerStore.shared.secret(for: runner.secretRef) else { return nil } + /// The runner used for the handoff + reconciliation paths: the loop-runner on + /// the active Settings → SSH connection (reached over the existing tunnel). + /// There's no separate "Loop Runner" to configure — the active SSH connection + /// IS the runner host. Empty when no SSH connection is set up. + private func effectiveRunners() -> [RunnerConfig] { + guard SSHConfigStore.shared.config.isConfigured else { return [] } + return [RunnerConfig.autoSSHRunner] + } + + private func makeClient(for runner: RunnerConfig) async -> RunnerTransport? { + // No Keychain secret → empty bearer (the auto-SSH fallback runner runs + // with auth disabled behind the tunnel). + let secret = RunnerStore.shared.secret(for: runner.secretRef) ?? "" if let remotePort = runner.sshRemotePort { if let url = await SSHTunnelManager.shared.tunneledBaseURL(remotePort: remotePort) { return LoopRunnerClient(baseURL: url, sharedSecret: secret) @@ -244,6 +255,58 @@ final class LoopRunnerPoller { return LoopRunnerClient(baseURL: url, sharedSecret: secret) } + // MARK: - Background handoff (submit) + + /// Hand an in-flight local turn off to a runner. Picks the most + /// recently-reachable configured runner, submits the messages as an async + /// turn, and returns the runner + new turn id on success (nil if there's no + /// runner or every submit attempt failed). The caller abandons the local + /// turn ONLY on a non-nil result, so a failed handoff never loses the reply. + /// Reason the last handoff submit failed, surfaced in the chat notice. + private(set) var lastHandoffError: String? + + func submitHandoff(messages: [[String: String]], conversationId: String, userId: String) async -> (runner: RunnerConfig, turnId: String, model: String)? { + let sshConfig = SSHConfigStore.shared.config + guard sshConfig.isConfigured else { + lastHandoffError = "No SSH connection selected (Settings → SSH)." + Self.log.info("handoff: no active SSH connection") + return nil + } + + // Self-contained one-shot over SSH — no deployed server needed. + let turnId = UUID().uuidString + let result = await BackgroundTurnRunner.shared.run( + messages: messages, conversationId: conversationId, + userId: userId, turnId: turnId, on: sshConfig) + + switch result { + case .success(let model): + lastHandoffError = nil + return (RunnerConfig.autoSSHRunner, turnId, model) + case .failure(let reason): + lastHandoffError = reason + Self.log.error("handoff failed: \(reason, privacy: .public)") + return nil + } + } + + /// Fetch a single turn for reconciliation (completion-push tap / foreground + /// push). Tries the preferred runner first (when the local notification + /// carries `runner_id`), then any other configured runner — the remote + /// completion push has no runner_id, so we may have to probe. + func fetchTurn(turnId: String, preferredRunnerId: String?) async -> RunnerTurn? { + var runners = effectiveRunners() + if let preferredRunnerId, let idx = runners.firstIndex(where: { $0.id == preferredRunnerId }) { + let r = runners.remove(at: idx) + runners.insert(r, at: 0) + } + for runner in runners { + guard let client = await makeClient(for: runner) else { continue } + if let turn = try? await client.getTurn(id: turnId) { return turn } + } + return nil + } + private func pollRunner(_ runner: RunnerConfig) async { guard let client = await makeClient(for: runner) else { return } @@ -287,6 +350,15 @@ final class LoopRunnerPoller { lock.unlock() guard isNew else { return } + // Handoff turn: reconcile the reply straight into its conversation. If we + // applied it, the reply is already in the chat — skip the banner. Returns + // false for interactive turns (no conversation) and duplicates, which + // fall through to the normal notification below. + if let text = turn.finalResponse, !text.isEmpty, + RunnerTurnApplier.applyRunnerTurn(turnId: turn.id, conversationId: turn.conversationId, text: text) { + return + } + let content = UNMutableNotificationContent() content.title = "Loop · \(runner.nickname)" if let error = turn.error, !error.isEmpty { diff --git a/LoopIOS/Runner/LoopRunnerSSHClient.swift b/LoopIOS/Runner/LoopRunnerSSHClient.swift index 6e3ff4c..8a605f5 100644 --- a/LoopIOS/Runner/LoopRunnerSSHClient.swift +++ b/LoopIOS/Runner/LoopRunnerSSHClient.swift @@ -63,6 +63,30 @@ final class LoopRunnerSSHClient { return try LoopRunnerClient.jsonDecoder.decode(RunnerHealthResponse.self, from: data) } + func getTurn(id: String) async throws -> RunnerTurn { + let data = try await get(path: "/turn/\(id)", authed: true, timeout: 15) + return try LoopRunnerClient.jsonDecoder.decode(RunnerTurn.self, from: data) + } + + /// Submit a turn over SSH (handoff path). POSTs the same JSON the URLSession + /// client sends; with `async: true` the runner replies `202 {"id":...}`. + func startTurn(messages: [[String: String]], conversationId: String, userId: String, async: Bool = true) async throws -> String { + let payload: [String: Any] = [ + "messages": messages, + "conversation_id": conversationId, + "user_id": userId, + "async": async, + ] + let bodyData = try JSONSerialization.data(withJSONObject: payload) + let bodyStr = String(data: bodyData, encoding: .utf8) ?? "{}" + let data = try await post(path: "/turn", jsonBody: bodyStr, timeout: 20) + let obj = try JSONSerialization.jsonObject(with: data) as? [String: Any] + guard let turnId = obj?["id"] as? String else { + throw RunnerError.invalidResponse + } + return turnId + } + // MARK: - Internals /// Runs a single GET over SSH via curl and returns the response body. Uses @@ -106,6 +130,42 @@ final class LoopRunnerSSHClient { return Data(body.utf8) } + /// Runs a single POST over SSH via curl with a JSON body and returns the + /// response body. Same status-on-last-line trick as `get`. + private func post(path: String, jsonBody: String, timeout: Int) async throws -> Data { + let url = "http://127.0.0.1:\(remotePort)\(path)" + var cmd = "curl -s -m \(timeout) -w '\\n%{http_code}' -X POST" + cmd += " -H " + Self.shellQuote("Authorization: Bearer \(sharedSecret)") + cmd += " -H " + Self.shellQuote("Content-Type: application/json") + cmd += " -d " + Self.shellQuote(jsonBody) + cmd += " " + Self.shellQuote(url) + + let res = try await SSHSkill.shared.runCommand(cmd, timeout: Double(timeout + 5)) + if res.exitCode != 0 { + let detail = res.stderr.trimmingCharacters(in: .whitespacesAndNewlines) + throw RunnerError.transport( + "curl POST over SSH failed (exit \(res.exitCode))" + (detail.isEmpty ? "" : ": \(detail)")) + } + + let out = res.stdout + guard let nl = out.lastIndex(of: "\n") else { + throw RunnerError.transport("empty response from runner over SSH") + } + let codeStr = out[out.index(after: nl)...].trimmingCharacters(in: .whitespacesAndNewlines) + let body = String(out[.. String { @@ -114,3 +174,6 @@ final class LoopRunnerSSHClient { } extension LoopRunnerSSHClient: RunnerPolling {} +extension LoopRunnerSSHClient: RunnerSubmitting {} +extension LoopRunnerSSHClient: RunnerFetching {} +extension LoopRunnerSSHClient: RunnerTransport {} diff --git a/LoopIOS/Runner/PushBridge.swift b/LoopIOS/Runner/PushBridge.swift new file mode 100644 index 0000000..ecc49b1 --- /dev/null +++ b/LoopIOS/Runner/PushBridge.swift @@ -0,0 +1,43 @@ +// +// PushBridge.swift +// Loop +// +// Decoupled seam for registering this device's APNs token with the backend. +// Mirrors `AppSignals`: the public code carries no compile-time dependency on +// the concrete sender. A receiver is discovered at runtime by class name via +// the Objective-C runtime, so when the private implementation is absent (public +// clones), `register(...)` is a cheap no-op — nothing is transmitted or stored. +// +// The concrete receiver lives in the gitignored `LoopIOS/Private/` folder +// (`PushTokenBridge`), alongside `AnalyticsBridge`, and POSTs the token to the +// loopharness push backend. See `LoopIOS/Private/PUSH_NOTIFICATIONS.md`. +// + +import Foundation + +/// Optional sink for the APNs device token. A concrete implementation can be +/// provided at runtime; it is discovered by class name via the Objective-C +/// runtime so this file carries no compile-time dependency on it. +@objc protocol PushTokenReceiver: NSObjectProtocol { + /// Hand the hex-encoded APNs device token to the backend. `environment` is + /// the APNs environment this token belongs to ("sandbox" or "production"), + /// so the backend routes pushes to the matching APNs host. + func registerToken(_ token: String, environment: String) +} + +enum PushBridge { + /// Resolved once, lazily. `NSClassFromString` returns nil when no receiver + /// class is compiled into the build, so `receiver` stays nil and every + /// `register` is a no-op. + private static let receiver: PushTokenReceiver? = { + guard let cls = NSClassFromString("PushTokenBridge") as? NSObject.Type, + let instance = cls.init() as? PushTokenReceiver else { return nil } + return instance + }() + + /// Register the device token with the backend. Safe to call from any thread; + /// the receiver is responsible for its own threading. + static func register(token: String, environment: String) { + receiver?.registerToken(token, environment: environment) + } +} diff --git a/LoopIOS/Runner/PushRegistration.swift b/LoopIOS/Runner/PushRegistration.swift index 3a982f3..6a59640 100644 --- a/LoopIOS/Runner/PushRegistration.swift +++ b/LoopIOS/Runner/PushRegistration.swift @@ -2,22 +2,27 @@ // PushRegistration.swift // Loop // -// APNs device-token registration hooks. This is the client half of the planned -// push-on-completion path: it obtains an APNs device token and persists it, so a -// future VM-side sender can wake the app (silent `content-available`) or alert -// the user when a long-running agent turn finishes while the app is backgrounded. +// APNs device-token registration hooks. This is the client half of the +// push-on-completion path: it obtains an APNs device token, persists it, and +// registers it with the loopharness push backend so a VM-side sender (e.g. a +// cron finishing on the VM) can alert the user when a long-running agent turn +// finishes while the app is backgrounded. // -// The VM-side sender is intentionally NOT built yet — `transmitToVM(token:)` is a -// documented stub. Registration is also inert by default: we only call -// `registerForRemoteNotifications()` when the user has ALREADY granted -// notification authorization (the scheduler/runner local-notification flows -// request it), so this adds no new permission prompt and changes no behavior. +// Registration is inert by default: we only call `registerForRemoteNotifications()` +// when the user has ALREADY granted notification authorization (the +// scheduler/runner local-notification flows request it), so this adds no new +// permission prompt. Backend transmission goes through `PushBridge`, whose +// concrete sender lives in the gitignored `LoopIOS/Private/` folder — so public +// clones (no sender compiled in) retain the token locally and post nothing. // // Wiring points (AppDelegate): // - didFinishLaunchingWithOptions -> registerIfAuthorized() -// - didRegisterForRemoteNotificationsWithDeviceToken -> store + transmitToVM +// - didRegisterForRemoteNotificationsWithDeviceToken -> store + registerWithBackend // - didFailToRegisterForRemoteNotificationsWithError -> log // +// Call `registerIfAuthorized()` again right after a notification-permission +// grant to obtain the token immediately rather than on the next launch. +// import Foundation import UserNotifications @@ -60,26 +65,70 @@ final class PushRegistration { #endif } - /// Stores the token and (eventually) hands it to the VM. Called from + /// Stores the token and hands it to the backend. Called from /// `didRegisterForRemoteNotificationsWithDeviceToken`. func didRegister(deviceToken: Data) { let hex = deviceToken.map { String(format: "%02x", $0) }.joined() UserDefaults.standard.set(hex, forKey: Self.tokenKey) Self.log.info("APNs device token registered (\(hex.count, privacy: .public) hex chars)") - transmitToVM(token: hex) + registerWithBackend(token: hex) } func didFailToRegister(error: Error) { Self.log.error("APNs registration failed: \(error.localizedDescription, privacy: .public)") } - // MARK: - Stub: transmit to VM (sender deferred) + // MARK: - Backend registration + + /// Hands the device token to `PushBridge`, which (when its private sender is + /// present) upserts it on the loopharness push backend at + /// `POST /loopharness/push/register`. Also emits an app signal for analytics. + private func registerWithBackend(token: String) { + let environment = Self.apnsEnvironment() + PushBridge.register(token: token, environment: environment) + AppSignals.emit("push_token_registered", ["environment": environment]) + Self.log.info("push token handed to bridge (env: \(environment, privacy: .public))") + } + + // MARK: - APNs environment + + /// The backend `environment` string for this build's APNs token: + /// "sandbox" for development APNs, "production" for production APNs. The + /// backend uses it to pick the matching APNs host when sending. + /// + /// Resolved from the embedded provisioning profile's `aps-environment` + /// entitlement when available (development -> sandbox, production -> + /// production). App Store builds carry no embedded profile, so they fall + /// back to the compile-time configuration (Release -> production). Xcode + /// dev builds resolve to sandbox; TestFlight/App Store to production. + static func apnsEnvironment() -> String { + if let aps = apsEnvironmentFromProvisioning() { + return aps == "production" ? "production" : "sandbox" + } + #if DEBUG + return "sandbox" + #else + return "production" + #endif + } - /// Intended contract (NOT yet implemented): POST the device token to the VM - /// over the persistent tunnel (e.g. `POST /device-token` on the Go runner) - /// so a VM-side sender holding the APNs `.p8` key can push on turn/job - /// completion. For now this only logs — there is no VM endpoint or sender. - private func transmitToVM(token: String) { - Self.log.info("transmitToVM: deferred — no VM sender yet (token retained locally)") + /// Parse `aps-environment` out of the embedded mobileprovision, if present. + private static func apsEnvironmentFromProvisioning() -> String? { + guard let url = Bundle.main.url(forResource: "embedded", withExtension: "mobileprovision"), + let data = try? Data(contentsOf: url), + let raw = String(data: data, encoding: .ascii), + let start = raw.range(of: "") else { + return nil + } + let plistString = String(raw[start.lowerBound.. 0 ? p : 8080 + } + + /// A synthetic runner that reuses the default Settings → SSH connection as + /// its transport — used when the user hasn't added an explicit Loop Runner + /// but does have an SSH VM. `secretRef` resolves to no Keychain entry, so the + /// client sends an empty bearer (the runner must run with `shared_secret` + /// empty, i.e. auth disabled behind the tunnel). + static var autoSSHRunner: RunnerConfig { + RunnerConfig(id: autoSSHID, + nickname: "My VM", + baseURL: "", + secretRef: "com.loop.runner.secret.\(autoSSHID)", + sshRemotePort: defaultRunnerPort) + } + init(id: String = UUID().uuidString, nickname: String, baseURL: String, @@ -64,6 +88,9 @@ struct RunnerTurn: Codable, Identifiable, Equatable { let error: String? let createdAt: Date let updatedAt: Date + /// The originating conversation for handoff turns (empty/nil for interactive + /// turns). Lets the poller reconcile a completed turn into the right chat. + let conversationId: String? var isCompleted: Bool { status == "completed" || status == "error" } @@ -74,6 +101,7 @@ struct RunnerTurn: Codable, Identifiable, Equatable { case error case createdAt = "created_at" case updatedAt = "updated_at" + case conversationId = "conversation_id" } } diff --git a/LoopIOS/Runner/RunnerProvisioner.swift b/LoopIOS/Runner/RunnerProvisioner.swift new file mode 100644 index 0000000..bfd8f9c --- /dev/null +++ b/LoopIOS/Runner/RunnerProvisioner.swift @@ -0,0 +1,204 @@ +// +// RunnerProvisioner.swift +// Loop +// +// Deploys + starts the Go `loop-runner` on the active SSH VM so background +// handoffs have somewhere to land. The runner is a separate binary from +// OpenClaw; nothing else puts it on the VM. +// +// Strategy (hybrid): the linux binaries ship gzipped inside the app (asset +// catalog Data Sets). On first use we detect the VM's arch (`uname -m`), and if +// the right version isn't already installed we stream the gzipped binary over +// the existing SSH connection in base64 chunks (no scp/SFTP available), then +// decompress it to `~/.loop/loop-runner`. `config.json` (model key, no auth) is +// (re)written each time. `ensureRunning` then health-checks and `nohup`-starts +// it on demand — cheap enough to call on every handoff. +// +// Transfer uses only `SSHSkill.runCommand` (command string, no stdin), so each +// chunk is appended via a quoted heredoc. ~25 round-trips for a ~4MB gz — +// fine for a one-time, foreground provision. +// + +import Foundation +import CryptoKit +import os +#if canImport(UIKit) +import UIKit +#elseif canImport(AppKit) +import AppKit +#endif + +enum RunnerProvisionError: Error, LocalizedError { + case archUnsupported(String) + case noBundledBinary(String) + case remote(String) + + var errorDescription: String? { + switch self { + case .archUnsupported(let u): return "Unsupported VM architecture: \(u)" + case .noBundledBinary(let a): return "No bundled runner binary for \(a)" + case .remote(let d): return d + } + } +} + +final class RunnerProvisioner { + + static let shared = RunnerProvisioner() + private init() {} + + private static let log = Logger(subsystem: "com.bhat.intel", category: "runner-provision") + private static let remoteDir = "$HOME/.loop" + /// Loopback port the runner listens on (matches the handoff transport). + static var port: Int { RunnerConfig.defaultRunnerPort } + + /// base64 chunk size (characters). Kept well under SSH channel-request limits. + private static let chunkSize = 180_000 + + private let lock = NSLock() + private var inFlight = false + /// Hosts verified up-to-date this session, so foreground re-checks are cheap. + private var verifiedHosts: Set = [] + + // MARK: - Provision + + /// Ensure the runner binary + config are installed on the host (idempotent, + /// version-checked). Foreground use — the first install can take ~10s. + func provisionIfNeeded(on config: SSHConfig) async { + guard config.isConfigured else { return } + + lock.lock() + if inFlight || verifiedHosts.contains(config.host) { lock.unlock(); return } + inFlight = true + lock.unlock() + defer { lock.lock(); inFlight = false; lock.unlock() } + + do { + try await provision(on: config) + lock.lock(); verifiedHosts.insert(config.host); lock.unlock() + } catch { + Self.log.error("provision failed: \(error.localizedDescription, privacy: .public)") + } + } + + private func provision(on config: SSHConfig) async throws { + // 1. Detect arch. + let uname = try await run("uname -m", on: config, timeout: 15) + guard let arch = Self.archToken(uname.stdout) else { + throw RunnerProvisionError.archUnsupported(uname.stdout.trimmingCharacters(in: .whitespacesAndNewlines)) + } + + // 2. Load the matching gzipped binary from the app bundle. + guard let gz = NSDataAsset(name: "loop-runner-linux-\(arch)")?.data else { + throw RunnerProvisionError.noBundledBinary(arch) + } + let version = Self.shortHash(gz) + + // 3. (Re)write config + ensure dir. config.json is cheap and may change + // (model key), so always refresh it. + _ = try await run("mkdir -p \(Self.remoteDir)", on: config, timeout: 15) + try await writeRemoteFile(makeConfigJSON(), to: "\(Self.remoteDir)/config.json", on: config) + + // 4. Skip the binary transfer if the installed version matches. + let installed = (try? await run("cat \(Self.remoteDir)/version 2>/dev/null", on: config, timeout: 15))? + .stdout.trimmingCharacters(in: .whitespacesAndNewlines) + if installed == version { + Self.log.info("runner already up to date (v\(version, privacy: .public)) on \(config.host, privacy: .public)") + return + } + + // 5. Stream the gz over in base64 chunks, then decompress. + Self.log.info("installing runner (\(arch, privacy: .public), v\(version, privacy: .public)) on \(config.host, privacy: .public)") + _ = try await run("rm -f \(Self.remoteDir)/runner.gz.b64", on: config, timeout: 15) + + let b64 = gz.base64EncodedString() + var idx = b64.startIndex + while idx < b64.endIndex { + let end = b64.index(idx, offsetBy: Self.chunkSize, limitedBy: b64.endIndex) ?? b64.endIndex + let chunk = String(b64[idx.. \(Self.remoteDir)/version" + let ar = try await run(assemble, on: config, timeout: 60) + guard ar.exitCode == 0 else { throw RunnerProvisionError.remote("assemble failed: \(ar.stderr)") } + Self.log.info("runner installed on \(config.host, privacy: .public)") + } + + // MARK: - Ensure running + + /// Start the runner if it isn't answering /health. Cheap; safe to call on + /// every handoff. Returns true once health is confirmed. + @discardableResult + func ensureRunning(on config: SSHConfig) async -> Bool { + guard config.isConfigured else { return false } + if await isHealthy(config) { return true } + + // Detached start: nohup + redirected stdio so it survives the SSH channel + // closing. + let start = "cd \(Self.remoteDir) && nohup ./loop-runner -config config.json " + + ">runner.log 2>&1 Bool { + let cmd = "curl -s -m 3 -o /dev/null -w '%{http_code}' 127.0.0.1:\(Self.port)/health 2>/dev/null" + guard let r = try? await run(cmd, on: config, timeout: 10) else { return false } + return r.stdout.contains("200") + } + + // MARK: - Helpers + + private func run(_ command: String, on config: SSHConfig, timeout: Double) async throws -> SSHSkill.CommandResult { + try await SSHSkill.shared.runCommand(command, on: config, timeout: timeout) + } + + /// The runner's config.json. Uses the OpenAI key (the runner calls gpt-4o) + /// and an empty shared_secret (auth disabled behind the private tunnel). + /// `push_send_url` is intentionally omitted so the Go runner uses its own + /// compiled-in default — keeping the backend URL out of this committed file. + private func makeConfigJSON() -> String { + let key = KeyStore.shared.value(for: .openAI) ?? "" + let dict: [String: Any] = [ + "model_api_key": key, + "shared_secret": "", + "listen_port": Self.port, + ] + let data = (try? JSONSerialization.data(withJSONObject: dict, options: [.prettyPrinted])) ?? Data("{}".utf8) + return String(data: data, encoding: .utf8) ?? "{}" + } + + private func writeRemoteFile(_ contents: String, to path: String, on config: SSHConfig) async throws { + // Small file — a single base64 echo is fine and avoids quoting issues. + let b64 = Data(contents.utf8).base64EncodedString() + let r = try await run("printf '%s' '\(b64)' | base64 -d > \(path)", on: config, timeout: 15) + guard r.exitCode == 0 else { throw RunnerProvisionError.remote("write \(path) failed: \(r.stderr)") } + } + + private static func archToken(_ uname: String) -> String? { + switch uname.trimmingCharacters(in: .whitespacesAndNewlines) { + case "x86_64", "amd64": return "amd64" + case "aarch64", "arm64": return "arm64" + default: return nil + } + } + + private static func shortHash(_ data: Data) -> String { + SHA256.hash(data: data).prefix(6).map { String(format: "%02x", $0) }.joined() + } +} diff --git a/LoopIOS/Runner/RunnerTurnApplier.swift b/LoopIOS/Runner/RunnerTurnApplier.swift new file mode 100644 index 0000000..36dc74f --- /dev/null +++ b/LoopIOS/Runner/RunnerTurnApplier.swift @@ -0,0 +1,154 @@ +// +// RunnerTurnApplier.swift +// Loop +// +// Single, exactly-once write point for a completed Loop Runner turn that was +// handed off from a local conversation. Two delivery channels race to deliver +// the same turn: +// +// 1. the completion APNs push (tapped → AppDelegate fetches the turn), and +// 2. the foreground poller (`LoopRunnerPoller`) seeing the completed turn. +// +// Both funnel through `applyRunnerTurn`, which dedupes by `turn_id` against a +// persisted set so the conversation gets exactly one assistant message no +// matter which channel wins (or whether the app was killed in between). +// +// Persistence is in UserDefaults and MUST survive app termination: the app may +// be force-quit before the push arrives, then cold-started by the tap. The +// turn→conversation map is the fallback for the poller path, where the runner's +// `/turns` response is the only source of `conversation_id`. +// + +import Foundation +import os + +extension Notification.Name { + /// Posted (main queue) after a handed-off runner turn is written into a + /// conversation, so an open MessagingVC can reload if it's that chat. + static let runnerTurnApplied = Notification.Name("loop.runner.turnApplied") +} + +enum RunnerTurnApplier { + + private static let log = Logger(subsystem: "com.bhat.intel", category: "RunnerTurnApplier") + + private static let appliedKey = "loop.runner.handoff.applied" // [turnId] (ordered) + private static let mapKey = "loop.runner.handoff.map" // ["turnId\tconvId"] (ordered) + private static let cap = 50 + + /// Serializes the read-modify-write of the persisted dedup set + map so two + /// concurrent deliveries can't both pass the `contains` check. + private static let lock = NSLock() + + // MARK: - Handoff mapping + + /// What we remember about a handed-off turn until its reply lands. + struct HandoffRecord { + let conversationId: String + /// Display name of the model that ran on the VM (for the message byline). + let model: String? + } + + /// Remember which conversation + model a handed-off turn used. Called when the + /// background handoff submit succeeds, so the reply can be reconciled into the + /// right chat and stamped with the model that actually ran on the VM. + static func recordHandoff(turnId: String, conversationId: String, model: String?) { + lock.lock(); defer { lock.unlock() } + var map = loadMap() + map[turnId] = HandoffRecord(conversationId: conversationId, model: model) + saveMap(map) + } + + /// Already-applied check (used to suppress a duplicate push banner). + static func isApplied(turnId: String) -> Bool { + lock.lock(); defer { lock.unlock() } + return loadApplied().contains(turnId) + } + + /// The conversation a handed-off turn belongs to, from the persisted map. + static func conversationId(forTurn turnId: String) -> String? { + lock.lock(); defer { lock.unlock() } + return loadMap()[turnId]?.conversationId + } + + // MARK: - Apply + + /// Write a completed runner turn's reply into its conversation, exactly once. + /// Returns true if this call performed the write, false if it was a duplicate + /// or the conversation couldn't be resolved. `conversationId` from the push / + /// poll payload takes precedence; the persisted map is the fallback. + @discardableResult + static func applyRunnerTurn(turnId: String, conversationId: String?, text: String) -> Bool { + guard !text.isEmpty else { return false } + + lock.lock() + var applied = loadApplied() + if applied.contains(turnId) { + lock.unlock() + return false + } + let record = loadMap()[turnId] + let resolved = (conversationId?.isEmpty == false ? conversationId : nil) ?? record?.conversationId + guard let convId = resolved else { + lock.unlock() + log.error("applyRunnerTurn: no conversation for turn \(turnId, privacy: .public)") + return false + } + guard let conversation = SimpleConversationManager.shared.getConversation(by: convId) else { + lock.unlock() + log.error("applyRunnerTurn: conversation \(convId, privacy: .public) not found") + return false + } + // Claim the turn before writing so a racing channel sees it as applied. + applied.append(turnId) + if applied.count > cap { applied.removeFirst(applied.count - cap) } + saveApplied(applied) + lock.unlock() + + var message = MessageStruct(role: "assistant", content: text) + // Stamp the byline with the model that ran + a "VM" indicator so it's + // clear this reply was produced in the background on the VM. + message.model = record?.model.map { "\($0) · VM" } ?? "VM" + // Persistence + any UI snapshot refresh stays on main (the conversation + // manager mutates its `currentConversation` snapshot here). + DispatchQueue.main.async { + SimpleConversationManager.shared.addMessage(message, to: conversation) + NotificationCenter.default.post( + name: .runnerTurnApplied, object: nil, + userInfo: ["conversation_id": convId]) + } + log.info("applied runner turn \(turnId, privacy: .public) → conversation \(convId, privacy: .public)") + return true + } + + // MARK: - UserDefaults helpers + + private static func loadApplied() -> [String] { + UserDefaults.standard.stringArray(forKey: appliedKey) ?? [] + } + + private static func saveApplied(_ ids: [String]) { + UserDefaults.standard.set(ids, forKey: appliedKey) + } + + private static func loadMap() -> [String: HandoffRecord] { + let rows = UserDefaults.standard.stringArray(forKey: mapKey) ?? [] + var map: [String: HandoffRecord] = [:] + for row in rows { + let p = row.components(separatedBy: "\t") + guard p.count >= 2 else { continue } + let model = (p.count >= 3 && !p[2].isEmpty) ? p[2] : nil + map[p[0]] = HandoffRecord(conversationId: p[1], model: model) + } + return map + } + + private static func saveMap(_ map: [String: HandoffRecord]) { + // Preserve recency by capping the row count; dictionaries are unordered, + // so this caps total size rather than strictly evicting oldest — fine for + // a fallback lookup that the push payload usually satisfies anyway. + var rows = map.map { "\($0.key)\t\($0.value.conversationId)\t\($0.value.model ?? "")" } + if rows.count > cap { rows = Array(rows.suffix(cap)) } + UserDefaults.standard.set(rows, forKey: mapKey) + } +} diff --git a/LoopIOS/Runner/VMAgentRuntime.swift b/LoopIOS/Runner/VMAgentRuntime.swift new file mode 100644 index 0000000..349ce2f --- /dev/null +++ b/LoopIOS/Runner/VMAgentRuntime.swift @@ -0,0 +1,254 @@ +// +// VMAgentRuntime.swift +// Loop +// +// Shared building blocks for running a Loop agent turn on the user's VM as a +// detached, stdlib-only Python one-shot. Two callers use it: +// +// - `BackgroundTurnRunner` — a single one-shot when the app is backgrounded +// mid-turn (writes `req-.json`, runs once, removes the request). +// - `VMCronManager` — a recurring cron job (writes a long-lived `req.json`, +// `python3 run.py req.json` runs on the crontab schedule, minting a fresh +// `turn_id` each firing and appending to `results.ndjson`). +// +// The Python script (`pythonScript`) handles both modes off one config: +// `cron: true` switches it from "write a single result file + delete the +// request" to "append a results.ndjson line + keep the request" and mints a +// per-run `turn_id` when none is supplied. Either way it POSTs a `runner_turn` +// push so the device can surface the reply. +// + +import Foundation + +enum VMAgentRuntime { + + /// Where the one-shot writes its files on the VM. + static let remoteDir = "$HOME/.loop" + + /// Same endpoint the Go runner defaults to / PUSH_NOTIFICATIONS.md documents. + static let pushURL = "https://dev.generalbackend.com/loopharness/push/send" + + /// Pick the model + key for a remote run: the user's selected cloud provider + /// if it has a key, else any available cloud key (Apple's on-device model + /// can't run on the VM). `label` is the display name for the message byline. + static func providerConfig() -> (provider: String, modelID: String, key: String, label: String)? { + func key(_ k: KeyStore.Key) -> String? { + let v = KeyStore.shared.value(for: k) + return (v?.isEmpty == false) ? v : nil + } + let sel = ModelSelectionStore.current + switch sel.provider { + case .anthropic: if let k = key(.anthropic), let m = sel.apiModelID { return ("anthropic", m, k, sel.displayName) } + case .openAI: if let k = key(.openAI), let m = sel.apiModelID { return ("openai", m, k, sel.displayName) } + case .fireworks: if let k = key(.fireworks), let m = sel.apiModelID { return ("fireworks", m, k, sel.displayName) } + case .apple: break + } + if let k = key(.openAI) { return ("openai", "gpt-4o", k, "GPT-4o") } + if let k = key(.anthropic) { return ("anthropic", "claude-sonnet-4-6", k, "Claude Sonnet 4.6") } + if let k = key(.fireworks) { return ("fireworks", "accounts/fireworks/models/kimi-k2p6", k, "Kimi K2.6") } + return nil + } + + /// Every API key the user has, keyed by its env-var name (KeyStore.Key + /// rawValue, e.g. GITHUB_PAT). Shipped to the runner so its tools can use any + /// connected cloud service dynamically — whatever the user has set up. + static func exportableKeys() -> [String: String] { + var env: [String: String] = [:] + for key in KeyStore.Key.allCases { + if let v = KeyStore.shared.value(for: key), !v.isEmpty { + env[key.rawValue] = v + } + } + return env + } + + /// Stdlib-only Python agent loop: read request → call the provider with the + /// VM-runnable tools (`shell`, `web_fetch`, optional `web_search`) → execute + /// tool calls → feed results back until a final answer (max 8 steps) → persist + /// the result + POST the push. Handles both OpenAI-style (`tool_calls`) and + /// Anthropic-style (`tool_use`) protocols. Errors are reported in the push body. + /// + /// Modes (off the request JSON): + /// - one-shot (default): writes `result_path` then removes the request file. + /// - cron (`cron: true`): mints `turn_id = job_id-` when absent, + /// appends a line to `results_path`, and keeps the request file for reuse. + static let pythonScript = #""" +import json, sys, os, time, urllib.request, urllib.error, subprocess + +MAX_STEPS = 8 +cfg = json.load(open(sys.argv[1])) +ENV = cfg.get("env", {}) # {ENV_VAR_NAME: value} — the user's connected API keys + +def run_tool(name, args): + try: + if name == "shell": + out = subprocess.run(["bash", "-lc", args.get("command", "")], + capture_output=True, text=True, timeout=60, + env={**os.environ, **ENV}) + r = out.stdout or "" + if out.stderr: + r += "\n[stderr]\n" + out.stderr + return r[:6000] or "(no output)" + if name == "web_fetch": + req = urllib.request.Request(args.get("url", ""), headers={"User-Agent": "Loop/1.0"}) + with urllib.request.urlopen(req, timeout=30) as resp: + return resp.read().decode("utf-8", "ignore")[:6000] + if name == "web_search": + n = int(args.get("num_results", 5) or 5) + body = {"query": args.get("query", ""), "numResults": n, + "contents": {"text": {"maxCharacters": 800}}} + req = urllib.request.Request("https://api.exa.ai/search", + data=json.dumps(body).encode("utf-8"), + headers={"x-api-key": ENV.get("EXA_API_KEY", ""), + "Content-Type": "application/json"}, method="POST") + with urllib.request.urlopen(req, timeout=30) as resp: + data = json.loads(resp.read().decode("utf-8")) + hits = ["%s\n%s\n%s" % (r.get("title", ""), r.get("url", ""), (r.get("text") or "")[:500]) + for r in data.get("results", [])[:n]] + return "\n\n".join(hits) or "(no results)" + return "unknown tool: " + str(name) + except Exception as e: + return "tool error: " + str(e)[:300] + +def tool_specs(): + specs = [ + ("shell", "Run a bash command on this machine (your VM). Connected API keys are available as environment variables (use curl).", + {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}), + ("web_fetch", "HTTP GET a URL and return the body (truncated).", + {"type": "object", "properties": {"url": {"type": "string"}}, "required": ["url"]}), + ] + if ENV.get("EXA_API_KEY"): + specs.append(("web_search", "Search the web (Exa); returns top results with snippets.", + {"type": "object", "properties": {"query": {"type": "string"}, + "num_results": {"type": "integer"}}, "required": ["query"]})) + return specs + +def tools_openai(): + return [{"type": "function", "function": {"name": n, "description": d, "parameters": p}} + for (n, d, p) in tool_specs()] + +def tools_anthropic(): + return [{"name": n, "description": d, "input_schema": p} for (n, d, p) in tool_specs()] + +def note(): + refs = { + "GITHUB_PAT": "GitHub: curl https://api.github.com/... -H \"Authorization: Bearer $GITHUB_PAT\"", + "NOTION_INTEGRATION_TOKEN": "Notion: curl https://api.notion.com/v1/... -H \"Authorization: Bearer $NOTION_INTEGRATION_TOKEN\" -H \"Notion-Version: 2022-06-28\"", + "SLACK_USER_TOKEN": "Slack: curl https://slack.com/api/ -H \"Authorization: Bearer $SLACK_USER_TOKEN\"", + "DEVIN_API_KEY": "Devin: curl https://api.devin.ai/v1/... -H \"Authorization: Bearer $DEVIN_API_KEY\"", + } + lines = [refs[k] for k in refs if ENV.get(k)] + names = ", ".join(sorted(ENV.keys())) + n = ("\n\n[You are Loop running in the BACKGROUND on the user's VM. Tools: " + + ", ".join(s[0] for s in tool_specs()) + + ". Device-only Loop skills (calendar, files, health, music, location) are NOT available here.") + if names: + n += " Connected API keys, set as environment variables for the shell tool: " + names + "." + if lines: + n += " Quick refs: " + " | ".join(lines) + n += " Use these to complete the task or answer directly, then stop.]" + return n + +def http(url, headers, body): + req = urllib.request.Request(url, data=json.dumps(body).encode("utf-8"), + headers=headers, method="POST") + with urllib.request.urlopen(req, timeout=180) as r: + return json.loads(r.read().decode("utf-8")) + +def run_openai(base, key, model, msgs): + headers = {"Authorization": "Bearer " + key, "Content-Type": "application/json"} + tools = tools_openai(); nt = note() + if msgs and msgs[0].get("role") == "system": + msgs[0]["content"] = (msgs[0].get("content") or "") + nt + else: + msgs.insert(0, {"role": "system", "content": nt.strip()}) + for _ in range(MAX_STEPS): + resp = http(base, headers, {"model": model, "messages": msgs, + "tools": tools, "tool_choice": "auto", "max_tokens": 1024}) + m = resp["choices"][0]["message"] + calls = m.get("tool_calls") or [] + if not calls: + return m.get("content") or "" + msgs.append({"role": "assistant", "content": m.get("content") or "", "tool_calls": calls}) + for c in calls: + try: + a = json.loads(c["function"].get("arguments") or "{}") + except Exception: + a = {} + msgs.append({"role": "tool", "tool_call_id": c["id"], + "content": run_tool(c["function"]["name"], a)}) + return "(stopped after %d tool steps)" % MAX_STEPS + +def run_anthropic(key, model, system, msgs): + headers = {"x-api-key": key, "anthropic-version": "2023-06-01", "content-type": "application/json"} + tools = tools_anthropic(); system = (system or "") + note() + for _ in range(MAX_STEPS): + resp = http("https://api.anthropic.com/v1/messages", headers, + {"model": model, "max_tokens": 1024, "system": system, + "messages": msgs, "tools": tools}) + content = resp.get("content", []) + tool_uses = [b for b in content if b.get("type") == "tool_use"] + if not tool_uses: + return "".join(b.get("text", "") for b in content if b.get("type") == "text") + msgs.append({"role": "assistant", "content": content}) + results = [{"type": "tool_result", "tool_use_id": tu.get("id"), + "content": run_tool(tu.get("name"), tu.get("input") or {})} for tu in tool_uses] + msgs.append({"role": "user", "content": results}) + return "(stopped after %d tool steps)" % MAX_STEPS + +provider = cfg.get("provider", "openai") +model = cfg["model"]; key = cfg["api_key"]; msgs = cfg.get("messages", []) +text = ""; err = "" + +try: + if provider == "anthropic": + system = "\n\n".join(m.get("content", "") for m in msgs if m.get("role") == "system") + conv = [m for m in msgs if m.get("role") != "system"] + text = run_anthropic(key, model, system, conv) + else: + base = ("https://api.fireworks.ai/inference/v1/chat/completions" + if provider == "fireworks" + else "https://api.openai.com/v1/chat/completions") + text = run_openai(base, key, model, msgs) +except urllib.error.HTTPError as e: + err = "HTTP %s: %s" % (e.code, e.read().decode("utf-8", "ignore")[:200]) +except Exception as e: + err = str(e)[:200] + +is_cron = bool(cfg.get("cron")) +turn_id = cfg.get("turn_id") or (str(cfg.get("job_id", "job")) + "-" + str(int(time.time()))) +conv_id = cfg.get("conversation_id", "") + +try: + if is_cron: + line = json.dumps({"turn_id": turn_id, "conversation_id": conv_id, + "text": text, "error": err, "ts": int(time.time())}) + with open(cfg["results_path"], "a") as f: + f.write(line + "\n") + else: + json.dump({"turn_id": turn_id, "conversation_id": conv_id, + "text": text, "error": err}, open(cfg["result_path"], "w")) +except Exception: + pass + +push = { + "user_id": cfg["user_id"], + "title": cfg.get("title", "Loop"), + "body": ("⚠️ " + err[:150]) if err else (text[:180] or "Done"), + "data": {"type": "runner_turn", "turn_id": turn_id, + "conversation_id": conv_id, "text": text[:3000]}, +} +try: + http(cfg["push_url"], {"Content-Type": "application/json"}, push) +except Exception: + pass + +# The one-shot request file holds the keys, so it's removed after use. A cron +# request is reused on every firing, so it's kept (written chmod 600). +if not is_cron: + try: + os.remove(sys.argv[1]) + except Exception: + pass +"""# +} diff --git a/LoopIOS/Runner/VMCronPoller.swift b/LoopIOS/Runner/VMCronPoller.swift new file mode 100644 index 0000000..3247333 --- /dev/null +++ b/LoopIOS/Runner/VMCronPoller.swift @@ -0,0 +1,95 @@ +// +// VMCronPoller.swift +// Loop +// +// Backstop delivery for VM cron agents. The primary path is the `runner_turn` +// push each cron firing POSTs; this poller catches runs whose push was dropped +// (device offline / killed at fire time) by SSH-reading each job's +// `results.ndjson` past a stored cursor and feeding new lines through +// `RunnerTurnApplier` — which dedups by `turn_id`, so push + poll can never +// double-write the same run into the thread. +// +// Foreground-only and deliberately gentle (60s): cron runs are infrequent and +// each tick does one short SSH `cat` per job. +// + +import Foundation +import os + +final class VMCronPoller { + + static let shared = VMCronPoller() + private init() {} + + private static let log = Logger(subsystem: "com.bhat.intel", category: "VMCronPoller") + + private let foregroundInterval: TimeInterval = 60 + private let lock = NSLock() + private var foregroundTimer: DispatchSourceTimer? + private var pollInFlight = false + + /// Start a one-shot catch-up at launch so a result that landed while the app + /// was away surfaces on next open without waiting a full interval. + func bootstrap() { + pollNow() + } + + func startForegroundPolling() { + lock.lock() + guard foregroundTimer == nil else { lock.unlock(); return } + let timer = DispatchSource.makeTimerSource(queue: .global(qos: .utility)) + timer.schedule(deadline: .now() + 3, repeating: foregroundInterval) + timer.setEventHandler { [weak self] in self?.pollNow() } + foregroundTimer = timer + lock.unlock() + timer.resume() + Self.log.info("VM cron polling started") + } + + func stopForegroundPolling() { + lock.lock() + foregroundTimer?.cancel() + foregroundTimer = nil + lock.unlock() + Self.log.info("VM cron polling stopped") + } + + func pollNow() { + lock.lock() + if pollInFlight { lock.unlock(); return } + let jobs = VMCronStore.shared.all() + guard !jobs.isEmpty else { lock.unlock(); return } + pollInFlight = true + lock.unlock() + + Task { [weak self] in + guard let self else { return } + for job in jobs { + await self.drain(job) + } + self.lock.lock(); self.pollInFlight = false; self.lock.unlock() + } + } + + /// Apply any unseen result lines for one job and advance its cursor. + private func drain(_ job: VMCronJob) async { + guard let result = await VMCronManager.shared.fetchNewResults(for: job) else { return } + var applied = false + for line in result.lines { + let body = !line.text.isEmpty ? line.text + : (line.error.isEmpty ? "" : "⚠️ \(line.error)") + guard !body.isEmpty else { continue } + let convId = line.conversationId.isEmpty ? job.conversationId : line.conversationId + if RunnerTurnApplier.applyRunnerTurn(turnId: line.turnId, conversationId: convId, text: body) { + applied = true + } + } + // Persist the cursor even when every line was a push-applied duplicate, so + // we don't re-read them forever. Re-fetch the record in case it changed. + if var fresh = VMCronStore.shared.all().first(where: { $0.id == job.id }) { + fresh.resultsCursor = result.newCursor + if applied { fresh.lastRunAt = Date() } + VMCronStore.shared.upsert(fresh) + } + } +} diff --git a/LoopIOS/SceneDelegate.swift b/LoopIOS/SceneDelegate.swift index 099be79..791e1aa 100644 --- a/LoopIOS/SceneDelegate.swift +++ b/LoopIOS/SceneDelegate.swift @@ -73,6 +73,9 @@ class SceneDelegate: UIResponder, UIWindowSceneDelegate { // Start the OpenClaw message poller (watches backends for new messages). OpenClawMessagePoller.shared.startForegroundPolling() + // Start the VM cron backstop poller (catches up missed agent results). + VMCronPoller.shared.startForegroundPolling() + // Keep the screen on while Loop is in the foreground. The app is // primarily a conversational surface — locking mid-thought breaks // the spell. iOS restores the idle timer automatically when the app @@ -87,6 +90,7 @@ class SceneDelegate: UIResponder, UIWindowSceneDelegate { UIApplication.shared.isIdleTimerDisabled = false LoopRunnerPoller.shared.stopForegroundPolling() OpenClawMessagePoller.shared.stopForegroundPolling() + VMCronPoller.shared.stopForegroundPolling() } func sceneWillEnterForeground(_ scene: UIScene) { @@ -96,8 +100,31 @@ class SceneDelegate: UIResponder, UIWindowSceneDelegate { func sceneDidEnterBackground(_ scene: UIScene) { // Called as the scene transitions from the foreground to the background. - // Use this method to save data, release shared resources, and store enough scene-specific state information - // to restore the scene back to its current state. + // + // Background handoff: if a local inference turn is in flight, hand it to a + // reachable Loop Runner so it finishes server-side and pushes the user + // back with the reply. We use `sceneDidEnterBackground` (NOT + // `sceneWillResignActive`) on purpose — resign also fires for Control + // Center, the notification shade, incoming calls, and the app-switcher + // peek, where the user hasn't actually left and we must not hand off. + // + // The submit may need to establish an SSH tunnel, which outlives this + // synchronous callback, so it runs inside a background task. The local + // turn is abandoned only if the submit succeeds (handled inside the VC). + guard let messagingVC = findMessagingVC() else { + MessagingVC.handoffLog.info("handoff skipped: no live MessagingVC found") + return + } + let app = UIApplication.shared + var bgTask: UIBackgroundTaskIdentifier = .invalid + bgTask = app.beginBackgroundTask(withName: "loop.handoff") { + if bgTask != .invalid { app.endBackgroundTask(bgTask); bgTask = .invalid } + } + guard bgTask != .invalid else { return } + Task { + _ = await messagingVC.handoffInFlightTurnIfEligible() + if bgTask != .invalid { app.endBackgroundTask(bgTask); bgTask = .invalid } + } } // MARK: - URL Handling diff --git a/LoopIOS/Settings/IntegrationsVC.swift b/LoopIOS/Settings/IntegrationsVC.swift index 785af14..39f1cc6 100644 --- a/LoopIOS/Settings/IntegrationsVC.swift +++ b/LoopIOS/Settings/IntegrationsVC.swift @@ -122,6 +122,7 @@ final class IntegrationsVC: UIViewController { // working integrations down the list. Restore once the OAuth // flow lands. slackIntegration(), + googleWorkspaceIntegration(), githubIntegration(), devinIntegration(), twitterIntegration(), @@ -180,11 +181,7 @@ final class IntegrationsVC: UIViewController { } private func pushGitHubKeyEditor(_ key: KeyStore.Key) { - guard let nav = navigationController else { return } - var stack = nav.viewControllers - stack.append(KeysVC()) - stack.append(KeyEditVC(focusing: key)) - nav.setViewControllers(stack, animated: true) + navigationController?.pushViewController(KeyEditVC(focusing: key), animated: true) } /// Devin coding agent. Connection state is "did the user paste BOTH a @@ -255,14 +252,10 @@ final class IntegrationsVC: UIViewController { } private func pushDevinEditor(_ key: KeyStore.Key) { - guard let nav = navigationController else { return } - var stack = nav.viewControllers - stack.append(KeysVC()) - // KeyEditVC now edits a whole service (Devin = API key + org id) in a + // KeyEditVC edits a whole service (Devin = API key + org id) in a // single panel; `focusing:` lands the user on the specific field they // came to set (api key vs. org id) without losing the other field. - stack.append(KeyEditVC(focusing: key)) - nav.setViewControllers(stack, animated: true) + navigationController?.pushViewController(KeyEditVC(focusing: key), animated: true) } /// Notion is token-backed — connection state is "did the user paste an @@ -301,11 +294,7 @@ final class IntegrationsVC: UIViewController { } private func pushNotionKeyEditor() { - guard let nav = navigationController else { return } - var stack = nav.viewControllers - stack.append(KeysVC()) - stack.append(KeyEditVC(focusing: .notionIntegrationToken)) - nav.setViewControllers(stack, animated: true) + navigationController?.pushViewController(KeyEditVC(focusing: .notionIntegrationToken), animated: true) } /// Slack is a personal-only integration in v1 — connection state is just @@ -344,15 +333,11 @@ final class IntegrationsVC: UIViewController { } } - /// Push the Keys list + the specific key editor in one shot so back from - /// the editor lands on Keys (not Integrations). Mirrors the Mac surface, - /// where the Keys window opens with the row pre-selected. + /// Push the key editor directly onto the Integrations stack so Back from + /// the editor returns to Integrations (where the user tapped in), not the + /// generic Keys list. `focusing:` lands them on the specific field. private func pushSlackKeyEditor() { - guard let nav = navigationController else { return } - var stack = nav.viewControllers - stack.append(KeysVC()) - stack.append(KeyEditVC(focusing: .slackUserToken)) - nav.setViewControllers(stack, animated: true) + navigationController?.pushViewController(KeyEditVC(focusing: .slackUserToken), animated: true) } // MARK: Twitter @@ -380,23 +365,68 @@ final class IntegrationsVC: UIViewController { preferredStyle: .alert ) alert.addAction(UIAlertAction(title: "Edit Keys", style: .default) { [weak self] _ in - guard let nav = self?.navigationController else { return } - var stack = nav.viewControllers - stack.append(KeysVC()) - stack.append(KeyEditVC(focusing: .xAPIKey)) - nav.setViewControllers(stack, animated: true) + self?.navigationController?.pushViewController(KeyEditVC(focusing: .xAPIKey), animated: true) }) alert.addAction(UIAlertAction(title: "Done", style: .cancel)) present(alert, animated: true) } else { - guard let nav = navigationController else { return } - var stack = nav.viewControllers - stack.append(KeysVC()) - stack.append(KeyEditVC(focusing: .xAPIKey)) - nav.setViewControllers(stack, animated: true) + navigationController?.pushViewController(KeyEditVC(focusing: .xAPIKey), animated: true) } } + // MARK: - Google Workspace + + private func googleWorkspaceIntegration() -> Integration { + let hasToken = !((KeyStore.shared.value(for: .googleWorkspaceAccessToken) ?? "").isEmpty) + return Integration( + title: "Google Workspace", + subtitle: hasToken + ? "Connected \u{00B7} Drive, Gmail, Calendar" + : "Tap to paste your Google OAuth2 access token", + icon: "envelope", + tint: .systemBlue, + status: hasToken ? .connected : .notConnected, + handler: { vc in vc.handleGoogleWorkspaceTap() } + ) + } + + private func handleGoogleWorkspaceTap() { + let hasToken = !((KeyStore.shared.value(for: .googleWorkspaceAccessToken) ?? "").isEmpty) + if hasToken { + let alert = UIAlertController( + title: "Google Workspace connected", + message: "Loop can access your Google Drive, Gmail, and Calendar. Services available depend on the scopes granted to your access token.", + preferredStyle: .actionSheet + ) + alert.addAction(UIAlertAction(title: "Edit Access Token", style: .default) { [weak self] _ in + self?.pushGoogleWorkspaceKeyEditor(.googleWorkspaceAccessToken) + }) + alert.addAction(UIAlertAction(title: "Edit Refresh Token", style: .default) { [weak self] _ in + self?.pushGoogleWorkspaceKeyEditor(.googleWorkspaceRefreshToken) + }) + alert.addAction(UIAlertAction(title: "Revoke / Remove", style: .destructive) { [weak self] _ in + KeyStore.shared.setValue(nil, for: .googleWorkspaceAccessToken) + KeyStore.shared.setValue(nil, for: .googleWorkspaceRefreshToken) + KeyStore.shared.setValue(nil, for: .googleWorkspaceClientId) + KeyStore.shared.setValue(nil, for: .googleWorkspaceClientSecret) + self?.rebuildIntegrations() + }) + alert.addAction(UIAlertAction(title: "Done", style: .cancel)) + if let popover = alert.popoverPresentationController { + popover.sourceView = view + popover.sourceRect = CGRect(x: view.bounds.midX, y: view.bounds.midY, width: 0, height: 0) + popover.permittedArrowDirections = [] + } + present(alert, animated: true) + } else { + pushGoogleWorkspaceKeyEditor(.googleWorkspaceAccessToken) + } + } + + private func pushGoogleWorkspaceKeyEditor(_ key: KeyStore.Key) { + navigationController?.pushViewController(KeyEditVC(focusing: key), animated: true) + } + // MARK: - Apple Health private func healthIntegration() -> Integration { diff --git a/LoopIOS/Settings/KeyStore.swift b/LoopIOS/Settings/KeyStore.swift index a1d20a9..80340c6 100644 --- a/LoopIOS/Settings/KeyStore.swift +++ b/LoopIOS/Settings/KeyStore.swift @@ -44,6 +44,13 @@ final class KeyStore { case xAccessToken = "X_ACCESS_TOKEN" case xAccessTokenSecret = "X_ACCESS_TOKEN_SECRET" case sfBayTransit = "SF_BAY_511_API_KEY" + case googleWorkspaceAccessToken = "GOOGLE_WORKSPACE_ACCESS_TOKEN" + case googleWorkspaceRefreshToken = "GOOGLE_WORKSPACE_REFRESH_TOKEN" + case googleWorkspaceClientId = "GOOGLE_WORKSPACE_CLIENT_ID" + case googleWorkspaceClientSecret = "GOOGLE_WORKSPACE_CLIENT_SECRET" + case agentMail = "AGENT_MAIL_API_KEY" + case agentMailInbox = "AGENT_MAIL_INBOX" + case serpAPI = "SERPAPI_API_KEY" /// User-facing label shown in Settings. var displayName: String { @@ -69,6 +76,13 @@ final class KeyStore { case .xAccessToken: return "X Access Token" case .xAccessTokenSecret: return "X Access Token Secret" case .sfBayTransit: return "511 SF Bay API Key" + case .googleWorkspaceAccessToken: return "Google Workspace Access Token" + case .googleWorkspaceRefreshToken: return "Google Workspace Refresh Token" + case .googleWorkspaceClientId: return "Google Workspace Client ID" + case .googleWorkspaceClientSecret: return "Google Workspace Client Secret" + case .agentMail: return "AgentMail API Key" + case .agentMailInbox: return "AgentMail Inbox" + case .serpAPI: return "SerpAPI Key" } } @@ -96,6 +110,13 @@ final class KeyStore { case .xAccessToken: return "User-level access token with read+write permission from developer.x.com" case .xAccessTokenSecret: return "User-level access token secret — shown once at generation time" case .sfBayTransit: return "Free API token from 511.org/open-data/token — powers real-time Muni arrival predictions" + case .googleWorkspaceAccessToken: return "OAuth2 access token from Google Cloud Console · powers Drive, Gmail, Calendar" + case .googleWorkspaceRefreshToken: return "Optional. OAuth2 refresh token — lets the app request a new access token when the current one expires" + case .googleWorkspaceClientId: return "Optional. OAuth2 client id from Google Cloud Console — needed for token refresh" + case .googleWorkspaceClientSecret: return "Optional. OAuth2 client secret from Google Cloud Console — needed for token refresh" + case .agentMail: return "am_… key from agentmail.to · lets Loop read its inbox and send email (with attachments) on your behalf" + case .agentMailInbox: return "Optional. The inbox to send from, e.g. loop_email@agentmail.to. Leave blank and Loop will reuse your first inbox or create one automatically." + case .serpAPI: return "Free key from serpapi.com → powers web image search (Google Images)" } } } @@ -111,6 +132,9 @@ final class KeyStore { case github, slack, notion, obsidian case twitter case sfBayTransit + case googleWorkspace + case agentMail + case serpAPI /// Row title in the list + window title in the editor. var displayName: String { @@ -129,6 +153,9 @@ final class KeyStore { case .obsidian: return "Obsidian" case .twitter: return "X (Twitter)" case .sfBayTransit: return "511 SF Bay" + case .googleWorkspace: return "Google Workspace" + case .agentMail: return "AgentMail" + case .serpAPI: return "SerpAPI" } } @@ -152,6 +179,9 @@ final class KeyStore { case .obsidian: return "Read and write your Obsidian vault through a self-hosted relay" case .twitter: return "Post tweets to X (Twitter) with OAuth 1.0a" case .sfBayTransit: return "Real-time SF Muni bus/train arrival predictions via the 511 API" + case .googleWorkspace: return "Google Drive, Gmail, and Calendar via OAuth2 access token" + case .agentMail: return "Read Loop's email inbox and send email (with attachments) via agentmail.to" + case .serpAPI: return "Search the web for images (Google Images) and render them inline" } } @@ -176,6 +206,9 @@ final class KeyStore { case .obsidian: return [.obsidianAPI, .obsidianBaseURL, .obsidianVaultName] case .twitter: return [.xAPIKey, .xAPISecret, .xAccessToken, .xAccessTokenSecret] case .sfBayTransit: return [.sfBayTransit] + case .googleWorkspace: return [.googleWorkspaceAccessToken, .googleWorkspaceRefreshToken, .googleWorkspaceClientId, .googleWorkspaceClientSecret] + case .agentMail: return [.agentMail, .agentMailInbox] + case .serpAPI: return [.serpAPI] } } @@ -297,7 +330,7 @@ final class KeyStore { // hide and the URL is the whole point. Same for the Devin org id, // which is a `org-…` identifier (not a secret) the user needs to be // able to read back when verifying their setup. - if key == .obsidianBaseURL || key == .obsidianVaultName || key == .githubBaseURL || key == .devinOrgID { + if key == .obsidianBaseURL || key == .obsidianVaultName || key == .githubBaseURL || key == .devinOrgID || key == .agentMailInbox { return raw } let suffixLen = 4 diff --git a/LoopIOS/Settings/SSHConfigStore.swift b/LoopIOS/Settings/SSHConfigStore.swift index 4012781..1083056 100644 --- a/LoopIOS/Settings/SSHConfigStore.swift +++ b/LoopIOS/Settings/SSHConfigStore.swift @@ -3,8 +3,10 @@ // Loop // // Persists the user's SSH connections. Loop supports multiple saved -// connections; the list is ordered and the *first* entry is the default — -// the one the `ssh_client` skill and the Loop Runner transport connect to. +// connections in a user-defined order; one is marked *active* (`selectedID`, +// falling back to the first). The active connection is what `config` returns — +// the one the `ssh_client` skill, the SSH tunnel, and the background handoff +// connect to. // // Non-secret fields (id, name, host, port, username) live in UserDefaults as // a JSON list; the private key and passphrase for each connection live in the @@ -14,6 +16,7 @@ // import Foundation +import os import Security struct SSHConfig: Identifiable, Equatable { @@ -67,6 +70,7 @@ final class SSHConfigStore { // MARK: - Storage keys private static let listKey = "loop.ssh.connections.v2" + private static let selectedKey = "loop.ssh.selectedID" // Legacy single-config keys (migrated on first load). private static let legacyHostKey = "loop.ssh.host" @@ -75,21 +79,79 @@ final class SSHConfigStore { private static let legacyKeyAccount = "loop.ssh.privateKey" private static let legacyPassAccount = "loop.ssh.passphrase" + /// Gate for the one-shot seed from build-time `Secrets.xcconfig` values + /// (surfaced via Info.plist `$(SSH_*)` substitution). Set only after a seed + /// actually lands, so the user's later edits/deletes aren't re-clobbered — + /// but a fresh checkout that adds the xcconfig keys still seeds on next run. + private static let seededFromInfoPlistFlag = "loop.ssh.seededFromInfoPlist.v1" + private let defaults = UserDefaults.standard - /// Ordered connections; `connections[0]` is the default. + /// Saved connections, in user-defined display order. private(set) var connections: [SSHConfig] = [] + /// The user-selected active connection. `nil` falls back to the first + /// connection, so existing installs keep their old "first = default" behavior + /// until the user explicitly picks one. + private(set) var selectedID: UUID? + + /// Posted after the connection list changes because of an iCloud sync from + /// another device, so an open list view can refresh. + static let didChangeNotification = Notification.Name("loop.ssh.connectionsDidChange") + private init() { load() + // Reload when SSH settings arrive from another device via iCloud KVS. + NotificationCenter.default.addObserver( + self, selector: #selector(kvsChanged(_:)), + name: iCloudKVSDefaults.didChangeNotification, object: nil) + } + + /// True while `save()` is writing, so the KVS write it performs doesn't + /// re-enter `load()` (which would rebuild `connections` from the keychain + /// mid-save — before the secrets are written — and clobber the in-memory key). + private var isSaving = false + + @objc private func kvsChanged(_ note: Notification) { + guard !isSaving else { return } // ignore our own writes + let keys = (note.userInfo?["keys"] as? [String]) ?? [] + guard keys.contains(where: { $0.hasPrefix("loop.ssh.") }) else { return } + load() + NotificationCenter.default.post(name: Self.didChangeNotification, object: nil) + } + + /// The id of the connection that's actually active right now — the explicit + /// selection if still valid, otherwise the first connection. + var effectiveSelectedID: UUID? { + if let selectedID, connections.contains(where: { $0.id == selectedID }) { + return selectedID + } + return connections.first?.id + } + + /// Mark a connection active. Everything that reads `config` (ssh_client + /// skill, SSH tunnel, the background handoff) immediately uses it. + func select(id: UUID) { + guard connections.contains(where: { $0.id == id }) else { return } + selectedID = id + iCloudKVSDefaults.shared.set(id.uuidString, forKey: Self.selectedKey) + } + + /// Reorder the connection list (display order only; does not change which is + /// active — that's `selectedID`). + func move(from: Int, to: Int) { + guard connections.indices.contains(from), to >= 0, to <= connections.count else { return } + let item = connections.remove(at: from) + connections.insert(item, at: min(to, connections.count)) + save() } // MARK: - Default-connection shim (legacy single-config API) - /// The default connection (first in the list). Reads return an empty, - /// unconfigured config when none exist; writes upsert the default. + /// The active connection (the selected one, or the first). Reads return an + /// empty, unconfigured config when none exist; writes upsert the first. var config: SSHConfig { - get { connections.first ?? SSHConfig() } + get { connections.first(where: { $0.id == effectiveSelectedID }) ?? SSHConfig() } set { if var first = connections.first { first.host = newValue.host @@ -114,6 +176,16 @@ final class SSHConfigStore { connections.first { $0.id == id } } + /// True if a non-empty private key is actually stored in the keychain for + /// this connection. The editor uses it to confirm a save really persisted + /// (in-memory state always "has" the key, so it can't reveal a write failure). + func privateKeyPersists(id: UUID) -> Bool { + let account = Self.keyAccount(id) + let value = readKeychain(account: account) + Self.log.info("privateKeyPersists id=\(id.uuidString, privacy: .public) account=\(account, privacy: .public) found=\(value != nil, privacy: .public) len=\(value?.count ?? -1, privacy: .public)") + return !(value ?? "").isEmpty + } + /// Inserts a new connection (appended) or updates an existing one in place. func addOrUpdate(_ connection: SSHConfig) { if let idx = connections.firstIndex(where: { $0.id == connection.id }) { @@ -128,6 +200,10 @@ final class SSHConfigStore { guard let idx = connections.firstIndex(where: { $0.id == id }) else { return } let removed = connections.remove(at: idx) deleteSecrets(for: removed.id) + if selectedID == removed.id { + selectedID = nil + iCloudKVSDefaults.shared.set(nil, forKey: Self.selectedKey) + } save() } @@ -149,35 +225,80 @@ final class SSHConfigStore { let username: String } + private func makeConfig(_ meta: Meta) -> SSHConfig { + SSHConfig( + id: meta.id, + name: meta.name, + host: meta.host, + port: meta.port, + username: meta.username, + privateKey: readKeychain(account: Self.keyAccount(meta.id)) ?? "", + passphrase: readKeychain(account: Self.passAccount(meta.id)) ?? "") + } + private func load() { + if let raw = iCloudKVSDefaults.shared.string(forKey: Self.selectedKey) { + selectedID = UUID(uuidString: raw) + } + // Preferred: the connection list synced via iCloud KVS (JSON string). + if let json = iCloudKVSDefaults.shared.string(forKey: Self.listKey), + let data = json.data(using: .utf8), + let metas = try? JSONDecoder().decode([Meta].self, from: data) { + connections = metas.map(makeConfig) + return + } + // Legacy: an older build stored the list as `Data` in local UserDefaults. + // Migrate it up to KVS (and `save()` re-writes the secrets as + // synchronizable Keychain items so they start syncing too). if let data = defaults.data(forKey: Self.listKey), let metas = try? JSONDecoder().decode([Meta].self, from: data) { - connections = metas.map { meta in - SSHConfig( - id: meta.id, - name: meta.name, - host: meta.host, - port: meta.port, - username: meta.username, - privateKey: readKeychain(account: Self.keyAccount(meta.id)) ?? "", - passphrase: readKeychain(account: Self.passAccount(meta.id)) ?? "") - } + connections = metas.map(makeConfig) + save() + defaults.removeObject(forKey: Self.listKey) return } migrateLegacyIfPresent() + // No stored or legacy connections: fall back to a connection baked in + // at build time via Secrets.xcconfig (mirrors KeyStore's Info.plist + // fallback for API keys). + if connections.isEmpty { + seedFromInfoPlistIfNeeded() + } + } + + /// One-time seed of a connection from the build-time `Secrets.xcconfig` + /// values (Info.plist `$(SSH_*)`). Only runs when nothing else is stored, + /// and only marks itself done once a config actually lands — so adding the + /// xcconfig keys to an existing install still takes effect on the next run. + private func seedFromInfoPlistIfNeeded() { + guard !defaults.bool(forKey: Self.seededFromInfoPlistFlag) else { return } + guard let seed = Self.infoPlistConfig() else { return } + connections = [seed] + save() + defaults.set(true, forKey: Self.seededFromInfoPlistFlag) + Self.log.info("Seeded SSH connection from Secrets.xcconfig host=\(seed.host, privacy: .public) user=\(seed.username, privacy: .public)") } private func save() { - let metas = connections.map { - Meta(id: $0.id, name: $0.name, host: $0.host, port: $0.port, username: $0.username) - } - if let data = try? JSONEncoder().encode(metas) { - defaults.set(data, forKey: Self.listKey) - } + isSaving = true + defer { isSaving = false } + + // Write secrets FIRST so anything that reads back (incl. a reload) sees + // the real keys. (The KVS write below fires didChangeNotification; the + // isSaving guard suppresses the re-entrant load, but ordering is correct + // regardless.) for c in connections { writeKeychain(account: Self.keyAccount(c.id), value: c.privateKey) writeKeychain(account: Self.passAccount(c.id), value: c.passphrase) } + + let metas = connections.map { + Meta(id: $0.id, name: $0.name, host: $0.host, port: $0.port, username: $0.username) + } + if let data = try? JSONEncoder().encode(metas), + let json = String(data: data, encoding: .utf8) { + iCloudKVSDefaults.shared.set(json, forKey: Self.listKey) + } } /// One-time migration of the old single-connection layout into the list. @@ -212,6 +333,50 @@ final class SSHConfigStore { deleteKeychain(account: Self.legacyPassAccount) } + // MARK: - Build-time config (Secrets.xcconfig → Info.plist) + + /// A non-empty Info.plist string, treating an unexpanded `$(VAR)` + /// placeholder (the xcconfig key was left blank) as missing — same rule + /// KeyStore uses for API keys. + private static func infoPlistString(_ key: String) -> String? { + guard let raw = Bundle.main.object(forInfoDictionaryKey: key) as? String else { return nil } + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed.isEmpty || trimmed.hasPrefix("$(") { return nil } + return trimmed + } + + /// Builds a connection from the `SSH_*` build-time values, or nil when the + /// minimum (host + username) isn't present. Port defaults to 22; the name + /// falls back to the host. + private static func infoPlistConfig() -> SSHConfig? { + guard let host = infoPlistString("SSH_HOST"), + let username = infoPlistString("SSH_USERNAME") else { return nil } + let port = infoPlistString("SSH_PORT").flatMap { Int($0) } ?? 22 + let name = infoPlistString("SSH_NAME") ?? host + let privateKey = infoPlistString("SSH_PRIVATE_KEY_B64").flatMap(decodeBase64Key) ?? "" + let passphrase = infoPlistString("SSH_PASSPHRASE") ?? "" + return SSHConfig( + name: name, host: host, port: port, username: username, + privateKey: privateKey, passphrase: passphrase) + } + + /// Decodes a base64-encoded private key. Accepts both standard base64 and + /// base64url (`-`/`_`), so the value can be encoded with `base64 | tr '+/' + /// '-_'` to avoid xcconfig truncating it at a `//` (which it reads as a + /// comment). Tolerates embedded whitespace and missing padding. + private static func decodeBase64Key(_ raw: String) -> String? { + var b64 = raw + .replacingOccurrences(of: "-", with: "+") + .replacingOccurrences(of: "_", with: "/") + .components(separatedBy: .whitespacesAndNewlines).joined() + guard !b64.isEmpty else { return nil } + let remainder = b64.count % 4 + if remainder > 0 { b64 += String(repeating: "=", count: 4 - remainder) } + guard let data = Data(base64Encoded: b64), + let key = String(data: data, encoding: .utf8) else { return nil } + return key + } + private static func keyAccount(_ id: UUID) -> String { "loop.ssh.key.\(id.uuidString)" } private static func passAccount(_ id: UUID) -> String { "loop.ssh.pass.\(id.uuidString)" } @@ -222,35 +387,163 @@ final class SSHConfigStore { // MARK: - Keychain helpers + /// Service namespace for the (best-effort) iCloud-synced copy. + private static let service = "com.bhat.intel.ssh" + + /// OSStatus of the last private-key write attempt (for editor diagnostics). + private(set) var lastKeyWriteStatus: OSStatus = errSecSuccess + + /// Local-first storage for SSH secrets. The PRIMARY copy is a protected file + /// in the app container — reliable regardless of Keychain/iCloud state (the + /// Keychain was silently failing to persist the multi-line private key on + /// some devices, while everything else — stored via iCloud KVS — saved fine). + /// We ALSO best-effort write the Keychain (local item + iCloud-synchronizable + /// mirror) so secrets ride the Keychain / sync across devices when that works. + private func writeKeychain(account: String, value: String) { + guard !value.isEmpty else { + deleteKeychain(account: account) + return + } + // Primary: protected local file. + let savedFile = setFileSecret(account: account, value: value) + + // Best-effort: Keychain (plain local) + iCloud-synchronizable mirror. + let data = Data(value.utf8) + SecItemDelete(plainQuery(account: account) as CFDictionary) + var add = plainQuery(account: account) + add[kSecValueData as String] = data + SecItemAdd(add as CFDictionary, nil) + mirrorToSync(account: account, data: data) + + let keychainReadable = (readData(plainQuery(account: account)) != nil) + let fileReadback = (fileSecret(account: account) != nil) + let persisted = savedFile || keychainReadable + lastKeyWriteStatus = persisted ? errSecSuccess : errSecIO + Self.log.info("writeKeychain account=\(account, privacy: .public) valueLen=\(value.count, privacy: .public) savedFile=\(savedFile, privacy: .public) fileReadback=\(fileReadback, privacy: .public) keychainReadable=\(keychainReadable, privacy: .public) file=\(Self.secretsFileURL.path, privacy: .public)") + if !persisted { + Self.log.error("SSH secret write failed for \(account, privacy: .public)") + } + } + private func readKeychain(account: String) -> String? { - let query: [String: Any] = [ + // 1. Protected local file (primary). + if let s = fileSecret(account: account) { return s } + // 2. Keychain: plain device-local. + if let s = readData(plainQuery(account: account)) { return s } + // 3. Keychain: item synced in from another device (service-scoped, any sync). + if let s = readData([ kSecClass as String: kSecClassGenericPassword, kSecAttrAccount as String: account, - kSecReturnData as String: true, - kSecMatchLimit as String: kSecMatchLimitOne - ] - var result: AnyObject? - let status = SecItemCopyMatching(query as CFDictionary, &result) - guard status == errSecSuccess, let data = result as? Data else { return nil } - return String(data: data, encoding: .utf8) + kSecAttrService as String: Self.service, + kSecAttrSynchronizable as String: kSecAttrSynchronizableAny, + kSecUseDataProtectionKeychain as String: true, + ]) { return s } + // 4. Keychain: legacy items from older builds (no service, any sync). + if let s = readData([ + kSecClass as String: kSecClassGenericPassword, + kSecAttrAccount as String: account, + kSecAttrSynchronizable as String: kSecAttrSynchronizableAny, + ]) { return s } + return nil } - private func writeKeychain(account: String, value: String) { - deleteKeychain(account: account) - guard !value.isEmpty else { return } - let add: [String: Any] = [ + private func deleteKeychain(account: String) { + setFileSecret(account: account, value: nil) + SecItemDelete(plainQuery(account: account) as CFDictionary) + SecItemDelete([ kSecClass as String: kSecClassGenericPassword, kSecAttrAccount as String: account, - kSecValueData as String: Data(value.utf8) - ] - SecItemAdd(add as CFDictionary, nil) + kSecAttrService as String: Self.service, + kSecAttrSynchronizable as String: kSecAttrSynchronizableAny, + kSecUseDataProtectionKeychain as String: true, + ] as CFDictionary) + SecItemDelete([ + kSecClass as String: kSecClassGenericPassword, + kSecAttrAccount as String: account, + kSecAttrSynchronizable as String: kSecAttrSynchronizableAny, + ] as CFDictionary) } - private func deleteKeychain(account: String) { - let query: [String: Any] = [ + // MARK: - Local secret file (primary store) + + /// Protected JSON file holding `account -> secret`. Lives in Application + /// Support (app sandbox), written with complete file protection (encrypted + /// at rest while the device is locked). Local-only — never leaves the device. + private static var secretsFileURL: URL { + let base = (try? FileManager.default.url( + for: .applicationSupportDirectory, in: .userDomainMask, appropriateFor: nil, create: true)) + ?? FileManager.default.temporaryDirectory + let dir = base.appendingPathComponent("LoopSSH", isDirectory: true) + try? FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) + return dir.appendingPathComponent("secrets.json") + } + + private func loadSecretsFile() -> [String: String] { + guard let data = try? Data(contentsOf: Self.secretsFileURL), + let dict = try? JSONDecoder().decode([String: String].self, from: data) else { + return [:] + } + return dict + } + + private func fileSecret(account: String) -> String? { + let v = loadSecretsFile()[account] + return (v?.isEmpty == false) ? v : nil + } + + @discardableResult + private func setFileSecret(account: String, value: String?) -> Bool { + var dict = loadSecretsFile() + if let value, !value.isEmpty { dict[account] = value } else { dict.removeValue(forKey: account) } + guard let data = try? JSONEncoder().encode(dict) else { return false } + do { + try data.write(to: Self.secretsFileURL, options: [.atomic, .completeFileProtection]) + return true + } catch { + Self.log.error("SSH secrets file write failed: \(error.localizedDescription, privacy: .public)") + return false + } + } + + // MARK: - Keychain primitives + + /// The original device-local layout: class + account only. + private func plainQuery(account: String) -> [String: Any] { + [kSecClass as String: kSecClassGenericPassword, kSecAttrAccount as String: account] + } + + /// Read a non-empty UTF-8 value matching `query`, or nil. + private func readData(_ query: [String: Any]) -> String? { + var q = query + q[kSecReturnData as String] = true + q[kSecMatchLimit as String] = kSecMatchLimitOne + var result: AnyObject? + guard SecItemCopyMatching(q as CFDictionary, &result) == errSecSuccess, + let data = result as? Data, + let s = String(data: data, encoding: .utf8), !s.isEmpty else { + return nil + } + return s + } + + /// Best-effort write into the iCloud-synchronizable bucket for cross-device + /// sync. Failures are ignored — the plain local copy is the source of truth. + private func mirrorToSync(account: String, data: Data) { + let q: [String: Any] = [ kSecClass as String: kSecClassGenericPassword, - kSecAttrAccount as String: account + kSecAttrAccount as String: account, + kSecAttrService as String: Self.service, + kSecAttrSynchronizable as String: kCFBooleanTrue!, + kSecUseDataProtectionKeychain as String: true, ] - SecItemDelete(query as CFDictionary) + if SecItemUpdate(q as CFDictionary, [kSecValueData as String: data] as CFDictionary) == errSecSuccess { + return + } + var add = q + add[kSecValueData as String] = data + add[kSecAttrAccessible as String] = kSecAttrAccessibleAfterFirstUnlock + SecItemAdd(add as CFDictionary, nil) } + + private static let log = Logger(subsystem: "com.bhat.intel", category: "SSHConfigStore") } diff --git a/LoopIOS/Settings/SSHConnectionsVC.swift b/LoopIOS/Settings/SSHConnectionsVC.swift index b82df5b..5a0ef98 100644 --- a/LoopIOS/Settings/SSHConnectionsVC.swift +++ b/LoopIOS/Settings/SSHConnectionsVC.swift @@ -27,6 +27,8 @@ final class SSHConnectionsVC: UITableViewController { title = "SSH" navigationItem.rightBarButtonItem = UIBarButtonItem( barButtonSystemItem: .add, target: self, action: #selector(addTapped)) + // Edit button toggles drag-to-reorder mode. + navigationItem.leftBarButtonItem = editButtonItem tableView.register(UITableViewCell.self, forCellReuseIdentifier: "cell") emptyLabel.text = "No SSH connections.\nTap + to add one." @@ -34,6 +36,12 @@ final class SSHConnectionsVC: UITableViewController { emptyLabel.textAlignment = .center emptyLabel.textColor = .secondaryLabel emptyLabel.font = .preferredFont(forTextStyle: .subheadline) + + // Refresh if the list changes from an iCloud sync while this screen is open. + NotificationCenter.default.addObserver( + forName: SSHConfigStore.didChangeNotification, object: nil, queue: .main) { [weak self] _ in + self?.reload() + } } override func viewWillAppear(_ animated: Bool) { @@ -75,34 +83,66 @@ final class SSHConnectionsVC: UITableViewController { } override func tableView(_ tableView: UITableView, titleForFooterInSection section: Int) -> String? { - connections.isEmpty ? nil : "The top connection is the default used for new sessions. Swipe right on another to make it the default." + connections.isEmpty ? nil : "Tap a connection to make it active (✓). The active one is used for new sessions and background handoffs. Tap Edit to reorder; swipe a row to edit, open a Terminal, or delete." } override func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell { let cell = UITableViewCell(style: .subtitle, reuseIdentifier: "cell") let conn = connections[indexPath.row] - let isDefault = indexPath.row == 0 + let isActive = conn.id == SSHConfigStore.shared.effectiveSelectedID cell.textLabel?.text = conn.displayName cell.detailTextLabel?.text = conn.endpointSummary cell.detailTextLabel?.textColor = .secondaryLabel cell.detailTextLabel?.font = .monospacedSystemFont(ofSize: 12, weight: .regular) - cell.accessoryType = .disclosureIndicator - // Leading dot: filled green for the default, hollow for the rest. - let symbol = isDefault ? "circle.fill" : "circle" - cell.imageView?.image = UIImage(systemName: symbol, - withConfiguration: UIImage.SymbolConfiguration(pointSize: 11)) - cell.imageView?.tintColor = isDefault ? .systemGreen : .tertiaryLabel + cell.imageView?.image = UIImage(systemName: "terminal", + withConfiguration: UIImage.SymbolConfiguration(pointSize: 13)) + cell.imageView?.tintColor = .secondaryLabel + + // Checkmark marks the active connection (mirrors Execution Backend). + cell.accessoryType = isActive ? .checkmark : .none + cell.showsReorderControl = true return cell } override func tableView(_ tableView: UITableView, didSelectRowAt indexPath: IndexPath) { tableView.deselectRow(at: indexPath, animated: true) let conn = connections[indexPath.row] - navigationController?.pushViewController(SSHSettingsVC(connection: conn), animated: true) + // Tapping the already-active connection opens its editor; tapping another + // makes it active (a second tap then edits it). + if conn.id == SSHConfigStore.shared.effectiveSelectedID { + navigationController?.pushViewController(SSHSettingsVC(connection: conn), animated: true) + } else { + SSHConfigStore.shared.select(id: conn.id) + reload() + } + } + + // MARK: - Reordering + + override func tableView(_ tableView: UITableView, canMoveRowAt indexPath: IndexPath) -> Bool { + true } + override func tableView(_ tableView: UITableView, + moveRowAt sourceIndexPath: IndexPath, to destinationIndexPath: IndexPath) { + SSHConfigStore.shared.move(from: sourceIndexPath.row, to: destinationIndexPath.row) + connections = SSHConfigStore.shared.connections + } + + // Reorder-only editing mode: no delete circle, no indent (delete stays on swipe). + override func tableView(_ tableView: UITableView, + editingStyleForRowAt indexPath: IndexPath) -> UITableViewCell.EditingStyle { + .none + } + + override func tableView(_ tableView: UITableView, shouldIndentWhileEditingRowAt indexPath: IndexPath) -> Bool { + false + } + + // MARK: - Swipe actions + override func tableView(_ tableView: UITableView, trailingSwipeActionsConfigurationForRowAt indexPath: IndexPath) -> UISwipeActionsConfiguration? { let conn = connections[indexPath.row] @@ -112,24 +152,16 @@ final class SSHConnectionsVC: UITableViewController { self?.reload() done(true) } + let edit = UIContextualAction(style: .normal, title: "Edit") { [weak self] _, _, done in + self?.navigationController?.pushViewController(SSHSettingsVC(connection: conn), animated: true) + done(true) + } + edit.backgroundColor = .systemBlue let terminal = UIContextualAction(style: .normal, title: "Terminal") { [weak self] _, _, done in self?.openTerminal(conn) done(true) } terminal.backgroundColor = .systemGreen - return UISwipeActionsConfiguration(actions: [delete, terminal]) - } - - override func tableView(_ tableView: UITableView, - leadingSwipeActionsConfigurationForRowAt indexPath: IndexPath) -> UISwipeActionsConfiguration? { - guard indexPath.row != 0 else { return nil } // already the default - let conn = connections[indexPath.row] - let makeDefault = UIContextualAction(style: .normal, title: "Default") { [weak self] _, _, done in - SSHConfigStore.shared.makeDefault(id: conn.id) - self?.reload() - done(true) - } - makeDefault.backgroundColor = .systemBlue - return UISwipeActionsConfiguration(actions: [makeDefault]) + return UISwipeActionsConfiguration(actions: [delete, edit, terminal]) } } diff --git a/LoopIOS/Settings/SSHSettingsVC.swift b/LoopIOS/Settings/SSHSettingsVC.swift index 469e69d..7e228c8 100644 --- a/LoopIOS/Settings/SSHSettingsVC.swift +++ b/LoopIOS/Settings/SSHSettingsVC.swift @@ -55,6 +55,14 @@ final class SSHSettingsVC: UIViewController { private var editingConnection: SSHConfig private let isNew: Bool + /// Placeholder shown over the (empty) private-key text view. + private let keyPlaceholder = UILabel() + /// True when the edited connection already has a saved key — we show a masked + /// "saved" placeholder instead of the raw key and keep it unless replaced. + private var savedKeyPresent = false + /// True once the user has typed in the private-key field this session. + private var keyFieldEdited = false + init(connection: SSHConfig?) { self.editingConnection = connection ?? SSHConfig() self.isNew = (connection == nil) @@ -174,6 +182,20 @@ final class SSHSettingsVC: UIViewController { } } + // Placeholder overlay for the private-key text view (UITextView has none). + keyPlaceholder.font = .monospacedSystemFont(ofSize: 13, weight: .regular) + keyPlaceholder.textColor = .placeholderText + keyPlaceholder.numberOfLines = 0 + keyPlaceholder.isUserInteractionEnabled = false + keyPlaceholder.translatesAutoresizingMaskIntoConstraints = false + privateKeyView.addSubview(keyPlaceholder) + NSLayoutConstraint.activate([ + keyPlaceholder.topAnchor.constraint(equalTo: privateKeyView.topAnchor, constant: 8), + keyPlaceholder.leadingAnchor.constraint(equalTo: privateKeyView.leadingAnchor, constant: 6), + keyPlaceholder.trailingAnchor.constraint(equalTo: privateKeyView.trailingAnchor, constant: -6), + ]) + privateKeyView.delegate = self + setupStatusRow() openTerminalButton.addTarget(self, action: #selector(openTerminalTapped), for: .touchUpInside) @@ -244,8 +266,22 @@ final class SSHSettingsVC: UIViewController { hostField.text = cfg.host portField.text = cfg.port == 0 ? "22" : String(cfg.port) usernameField.text = cfg.username - privateKeyView.text = cfg.privateKey passphraseField.text = cfg.passphrase + // Never re-display the stored private key. If one is saved, show a masked + // "saved" placeholder; the field stays empty unless the user types a new + // key (which then replaces it). + savedKeyPresent = !cfg.privateKey.isEmpty + privateKeyView.text = "" + updateKeyPlaceholder() + } + + private func updateKeyPlaceholder() { + keyPlaceholder.isHidden = !privateKeyView.text.isEmpty + if savedKeyPresent && !keyFieldEdited { + keyPlaceholder.text = "•••••••••••••• · saved\nTap to paste a new key (leave blank to keep)" + } else { + keyPlaceholder.text = "Private Key (PEM)" + } } /// Builds an `SSHConfig` from the current field values, preserving the @@ -254,13 +290,17 @@ final class SSHSettingsVC: UIViewController { let port = Int(portField.text ?? "22") ?? 22 let host = hostField.text?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" let name = nameField.text?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + let typedKey = privateKeyView.text.trimmingCharacters(in: .whitespacesAndNewlines) + // Keep the saved key when the field was left untouched; only overwrite + // when the user actually typed a new one. + let privateKey = (!keyFieldEdited && savedKeyPresent) ? editingConnection.privateKey : typedKey return SSHConfig( id: editingConnection.id, name: name.isEmpty ? host : name, host: host, port: port, username: usernameField.text?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "", - privateKey: privateKeyView.text.trimmingCharacters(in: .whitespacesAndNewlines), + privateKey: privateKey, passphrase: passphraseField.text ?? "" ) } @@ -285,6 +325,19 @@ final class SSHSettingsVC: UIViewController { return } + // Confirm the private key actually landed in the keychain — surfaces a + // sync/keychain write failure immediately instead of a later empty field. + if !SSHConfigStore.shared.privateKeyPersists(id: config.id) { + let st = SSHConfigStore.shared.lastKeyWriteStatus + setStatus(.failed("Couldn't save the private key to the keychain (status \(st)). Try toggling iCloud Keychain, or report this code.")) + return + } + // The key is saved; reflect that as the masked placeholder going forward. + savedKeyPresent = true + keyFieldEdited = false + privateKeyView.text = "" + updateKeyPlaceholder() + setStatus(.checking) Task { @MainActor in do { @@ -325,6 +378,8 @@ final class SSHSettingsVC: UIViewController { } private static func makeTextView(placeholder: String) -> UITextView { + // (placeholder param retained for call-site clarity; the overlay label + // in setupLayout provides the actual placeholder behavior.) let tv = UITextView() tv.font = .monospacedSystemFont(ofSize: 13, weight: .regular) tv.layer.cornerRadius = 8 @@ -337,3 +392,10 @@ final class SSHSettingsVC: UIViewController { return tv } } + +extension SSHSettingsVC: UITextViewDelegate { + func textViewDidChange(_ textView: UITextView) { + if !textView.text.isEmpty { keyFieldEdited = true } + updateKeyPlaceholder() + } +} diff --git a/LoopIOS/Settings/SettingsVC.swift b/LoopIOS/Settings/SettingsVC.swift index b23ea0b..a6c0eaf 100644 --- a/LoopIOS/Settings/SettingsVC.swift +++ b/LoopIOS/Settings/SettingsVC.swift @@ -41,7 +41,7 @@ final class SettingsVC: UIViewController { private func buildSections() -> [Section] { let main: Section = isOpenClawActive ? openClawMainSection() : localMainSection() - return [ + let sections: [Section] = [ Section(header: "Core", rows: [ Row(title: "Execution Backend", icon: "externaldrive.badge.icloud") { settings in settings.navigationController?.pushViewController(ExecutionBackendVC(), animated: true) @@ -62,6 +62,20 @@ final class SettingsVC: UIViewController { } ]), ] + return AppFlags.isManaged ? hidingManagedRows(sections) : sections + } + + /// Managed builds pin the backend and lock down skills/SSH, so drop those + /// rows (and any section left empty — e.g. "Core") from Settings. Model + /// switching stays available: it routes to the same ModelPickerVC, which + /// gates picks on a configured key, so a managed user can still move + /// between the models their build can actually run. + private func hidingManagedRows(_ sections: [Section]) -> [Section] { + let hidden: Set = ["Execution Backend", "Skills", "SSH"] + return sections.compactMap { section in + let rows = section.rows.filter { !hidden.contains($0.title) } + return rows.isEmpty ? nil : Section(header: section.header, rows: rows) + } } /// Local backend (the app's original on-device settings). @@ -79,6 +93,9 @@ final class SettingsVC: UIViewController { Row(title: "Scheduled", icon: "calendar.badge.clock") { settings in settings.navigationController?.pushViewController(ScheduledTasksVC(), animated: true) }, + Row(title: "VM Agents", icon: "clock.arrow.2.circlepath") { settings in + settings.navigationController?.pushViewController(VMCronTasksVC(), animated: true) + }, Row(title: "Subagents", icon: "hammer") { settings in settings.navigationController?.pushViewController(SubagentsListVC(), animated: true) }, diff --git a/LoopIOS/Settings/VMCronTasksVC.swift b/LoopIOS/Settings/VMCronTasksVC.swift new file mode 100644 index 0000000..6c2406b --- /dev/null +++ b/LoopIOS/Settings/VMCronTasksVC.swift @@ -0,0 +1,261 @@ +// +// VMCronTasksVC.swift +// Loop +// +// Settings → VM Agents. Lists recurring jobs that run on the user's SSH VM via +// cron (see VMCronManager). Create with the "+" button, swipe to delete (which +// also removes the cron entry + files on the VM), tap a row to open its thread. +// + +#if os(iOS) + +import UIKit + +final class VMCronTasksVC: UIViewController { + + private let tableView = UITableView(frame: .zero, style: .insetGrouped) + private var jobs: [VMCronJob] = [] + + override func viewDidLoad() { + super.viewDidLoad() + title = "VM Agents" + view.backgroundColor = .systemGroupedBackground + + navigationItem.rightBarButtonItem = UIBarButtonItem( + barButtonSystemItem: .add, target: self, action: #selector(addTapped)) + + tableView.translatesAutoresizingMaskIntoConstraints = false + tableView.dataSource = self + tableView.delegate = self + tableView.register(UITableViewCell.self, forCellReuseIdentifier: "row") + view.addSubview(tableView) + NSLayoutConstraint.activate([ + tableView.leadingAnchor.constraint(equalTo: view.leadingAnchor), + tableView.trailingAnchor.constraint(equalTo: view.trailingAnchor), + tableView.topAnchor.constraint(equalTo: view.safeAreaLayoutGuide.topAnchor), + tableView.bottomAnchor.constraint(equalTo: view.bottomAnchor), + ]) + reload() + } + + override func viewWillAppear(_ animated: Bool) { + super.viewWillAppear(animated) + reload() + } + + private func reload() { + jobs = VMCronManager.shared.list().sorted { $0.createdAt > $1.createdAt } + tableView.reloadData() + } + + // MARK: - Create + + @objc private func addTapped() { + let create = VMCronCreateVC { [weak self] in self?.reload() } + let nav = UINavigationController(rootViewController: create) + present(nav, animated: true) + } + + // MARK: - Per-row actions + + private func openThread(for job: VMCronJob) { + guard let conv = SimpleConversationManager.shared.getConversation(by: job.conversationId), + let nav = navigationController, + let messagingVC = nav.viewControllers.first(where: { $0 is MessagingVC }) as? MessagingVC else { return } + messagingVC.loadConversation(conv) + nav.popToRootViewController(animated: true) + } + + private func delete(_ job: VMCronJob) { + // Drop the local record immediately so the row disappears; the VM cleanup + // runs in the background (best-effort). + jobs.removeAll { $0.id == job.id } + tableView.reloadData() + Task { _ = await VMCronManager.shared.delete(id: job.id) } + } +} + +extension VMCronTasksVC: UITableViewDataSource, UITableViewDelegate { + func tableView(_ tableView: UITableView, numberOfRowsInSection section: Int) -> Int { + return max(jobs.count, 1) + } + + func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell { + let cell = tableView.dequeueReusableCell(withIdentifier: "row", for: indexPath) + var config = cell.defaultContentConfiguration() + if jobs.isEmpty { + config.text = "No VM agents yet." + config.secondaryText = "Tap + or ask Loop, e.g. \"every 2 hours read Hacker News and send me the top stories\"." + config.textProperties.color = .secondaryLabel + cell.contentConfiguration = config + cell.selectionStyle = .none + cell.accessoryType = .none + return cell + } + let job = jobs[indexPath.row] + config.text = job.title + var secondary = job.humanSchedule + if let last = job.lastRunAt { + secondary += " · last run " + Self.relative.localizedString(for: last, relativeTo: Date()) + } + config.secondaryText = secondary + config.image = UIImage(systemName: "clock.arrow.2.circlepath") + cell.contentConfiguration = config + cell.accessoryType = .disclosureIndicator + cell.selectionStyle = .default + return cell + } + + func tableView(_ tableView: UITableView, didSelectRowAt indexPath: IndexPath) { + tableView.deselectRow(at: indexPath, animated: true) + guard !jobs.isEmpty, indexPath.row < jobs.count else { return } + openThread(for: jobs[indexPath.row]) + } + + func tableView(_ tableView: UITableView, + trailingSwipeActionsConfigurationForRowAt indexPath: IndexPath) -> UISwipeActionsConfiguration? { + guard !jobs.isEmpty, indexPath.row < jobs.count else { return nil } + let job = jobs[indexPath.row] + let deleteAction = UIContextualAction(style: .destructive, title: "Delete") { [weak self] _, _, completion in + self?.delete(job) + completion(true) + } + return UISwipeActionsConfiguration(actions: [deleteAction]) + } + + private static let relative = RelativeDateTimeFormatter() +} + +// MARK: - Create form + +private final class VMCronCreateVC: UIViewController, UITextViewDelegate { + + private let onSaved: () -> Void + private let titleField = UITextField() + private let scheduleField = UITextField() + private let promptView = UITextView() + private let promptPlaceholder = "What should run each time? e.g. Fetch the Hacker News front page and list the top 5 stories with a one-line summary and link each." + private var saveButton: UIBarButtonItem! + + init(onSaved: @escaping () -> Void) { + self.onSaved = onSaved + super.init(nibName: nil, bundle: nil) + } + required init?(coder: NSCoder) { fatalError("init(coder:) has not been implemented") } + + override func viewDidLoad() { + super.viewDidLoad() + title = "New VM Agent" + view.backgroundColor = .systemGroupedBackground + + navigationItem.leftBarButtonItem = UIBarButtonItem( + barButtonSystemItem: .cancel, target: self, action: #selector(cancelTapped)) + saveButton = UIBarButtonItem( + barButtonSystemItem: .save, target: self, action: #selector(saveTapped)) + navigationItem.rightBarButtonItem = saveButton + + titleField.placeholder = "Title (e.g. HN top stories)" + titleField.borderStyle = .roundedRect + titleField.autocapitalizationType = .sentences + + scheduleField.placeholder = "Schedule: 2h, 30m, 1d, or a cron expr" + scheduleField.borderStyle = .roundedRect + scheduleField.autocapitalizationType = .none + scheduleField.autocorrectionType = .no + + promptView.font = .preferredFont(forTextStyle: .body) + promptView.layer.cornerRadius = 8 + promptView.layer.borderWidth = 1 + promptView.layer.borderColor = UIColor.separator.cgColor + promptView.delegate = self + promptView.text = promptPlaceholder + promptView.textColor = .placeholderText + + let scheduleHint = label("Cron times use the VM's local timezone. Shorthand: 30m = every 30 min, 2h = every 2 hours, 1d = daily 9am.") + let promptLabel = label("Prompt") + promptLabel.font = .preferredFont(forTextStyle: .headline) + + let stack = UIStackView(arrangedSubviews: [ + titleField, scheduleField, scheduleHint, promptLabel, promptView, + ]) + stack.axis = .vertical + stack.spacing = 12 + stack.setCustomSpacing(4, after: scheduleField) + stack.translatesAutoresizingMaskIntoConstraints = false + view.addSubview(stack) + NSLayoutConstraint.activate([ + stack.leadingAnchor.constraint(equalTo: view.layoutMarginsGuide.leadingAnchor), + stack.trailingAnchor.constraint(equalTo: view.layoutMarginsGuide.trailingAnchor), + stack.topAnchor.constraint(equalTo: view.safeAreaLayoutGuide.topAnchor, constant: 16), + promptView.heightAnchor.constraint(equalToConstant: 160), + ]) + } + + private func label(_ text: String) -> UILabel { + let l = UILabel() + l.text = text + l.numberOfLines = 0 + l.font = .preferredFont(forTextStyle: .footnote) + l.textColor = .secondaryLabel + return l + } + + func textViewDidBeginEditing(_ textView: UITextView) { + if textView.textColor == .placeholderText { + textView.text = "" + textView.textColor = .label + } + } + func textViewDidEndEditing(_ textView: UITextView) { + if textView.text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + textView.text = promptPlaceholder + textView.textColor = .placeholderText + } + } + + @objc private func cancelTapped() { dismiss(animated: true) } + + @objc private func saveTapped() { + let title = (titleField.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + let schedule = (scheduleField.text ?? "").trimmingCharacters(in: .whitespacesAndNewlines) + let prompt = promptView.textColor == .placeholderText + ? "" : promptView.text.trimmingCharacters(in: .whitespacesAndNewlines) + + guard !title.isEmpty, !prompt.isEmpty else { + return showError("Please enter a title and a prompt.") + } + guard let parsed = VMCronManager.parseSchedule(schedule) else { + return showError("Couldn't read the schedule. Use shorthand like 2h or 30m, or a 5-field cron expression like 0 */2 * * *.") + } + + saveButton.isEnabled = false + let spinner = UIActivityIndicatorView(style: .medium) + spinner.startAnimating() + navigationItem.rightBarButtonItem = UIBarButtonItem(customView: spinner) + + Task { [weak self] in + let result = await VMCronManager.shared.create( + title: title, prompt: prompt, cronExpr: parsed.cron, humanSchedule: parsed.human) + await MainActor.run { + guard let self else { return } + switch result { + case .success: + self.onSaved() + self.dismiss(animated: true) + case .failure(let reason): + self.navigationItem.rightBarButtonItem = self.saveButton + self.saveButton.isEnabled = true + self.showError(reason) + } + } + } + } + + private func showError(_ message: String) { + let alert = UIAlertController(title: "Couldn't create agent", message: message, preferredStyle: .alert) + alert.addAction(UIAlertAction(title: "OK", style: .default)) + present(alert, animated: true) + } +} + +#endif diff --git a/LoopIOS/Settings/iCloudKVSDefaults.swift b/LoopIOS/Settings/iCloudKVSDefaults.swift index cb1431c..c8cfef3 100644 --- a/LoopIOS/Settings/iCloudKVSDefaults.swift +++ b/LoopIOS/Settings/iCloudKVSDefaults.swift @@ -45,10 +45,12 @@ final class iCloudKVSDefaults { "loop.modelSelection", ] - /// Per-provider TTS voice keys follow the pattern `ttsVoice.` — - /// we accept any key with this prefix in addition to the exact-match set - /// above. - private static let mirroredPrefixes: [String] = ["ttsVoice."] + /// Prefix-matched keys mirrored in addition to the exact-match set: + /// - `ttsVoice.` — per-provider TTS voice selection. + /// - `loop.ssh.` — saved SSH connection list + selected connection, so SSH + /// settings follow the user across devices (the private keys themselves + /// ride iCloud Keychain via the synchronizable Keychain items). + private static let mirroredPrefixes: [String] = ["ttsVoice.", "loop.ssh."] private let local = UserDefaults.standard private let kvs = NSUbiquitousKeyValueStore.default diff --git a/LoopIOS/Skills/AgentMail/AgentMailClient.swift b/LoopIOS/Skills/AgentMail/AgentMailClient.swift new file mode 100644 index 0000000..0c67d8c --- /dev/null +++ b/LoopIOS/Skills/AgentMail/AgentMailClient.swift @@ -0,0 +1,230 @@ +// +// AgentMailClient.swift +// Loop +// +// Shared networking client for AgentMail (https://agentmail.to) — an email +// API built for agents. Reads the API key from KeyStore (Settings → Keys → +// AgentMail) and injects `Authorization: Bearer ` into every request. +// Mirrors GoogleWorkspaceClient's shape: one private request helper, typed +// errors, and a hint string per error code that skills relay to the model. +// +// REST surface used (base https://api.agentmail.to): +// GET /v0/inboxes +// POST /v0/inboxes +// GET /v0/inboxes/{inbox_id}/messages +// GET /v0/inboxes/{inbox_id}/messages/{message_id} +// POST /v0/inboxes/{inbox_id}/messages/send +// + +import Foundation + +/// Thin wrapper around the AgentMail REST API. Methods return raw +/// `[String: Any]` payloads so callers (AgentMailSkill) can shape them +/// however the model wants. Foundation-only so it builds on iOS/macOS alike. +final class AgentMailClient { + + static let shared = AgentMailClient() + + static let baseURL = "https://api.agentmail.to" + + /// Dedicated session so attachment uploads don't stall on the shared + /// session's defaults. Attachments are base64-inlined in the JSON body, + /// so give the resource timeout some headroom for a few-MB PDF. + private let session: URLSession = { + let config = URLSessionConfiguration.ephemeral + config.timeoutIntervalForRequest = 30 + config.timeoutIntervalForResource = 120 + return URLSession(configuration: config) + }() + + private init() {} + + // MARK: - Errors + + enum AgentMailError: Error { + case notConnected + case transport + case malformedResponse + /// Non-2xx response. `status` is the HTTP code; `message` is the + /// server's human-readable detail (best-effort parsed). + case api(status: Int, message: String) + + /// One-liner the skill passes back through the `function` role so the + /// model has something concrete to relay to the user. + var hint: String { + switch self { + case .notConnected: + return "AgentMail isn't connected. Ask the user to paste their AgentMail API key in Settings \u{2192} Keys \u{2192} AgentMail." + case .transport: + return "Network error talking to api.agentmail.to. Suggest retrying." + case .malformedResponse: + return "AgentMail returned an unexpected response shape." + case .api(let status, _): + return AgentMailClient.recoveryHint(forStatus: status) + } + } + + /// Short stable error code for the JSON payload back to the model. + var code: String { + switch self { + case .notConnected: return "agentmail_not_connected" + case .transport: return "agentmail_transport_failed" + case .malformedResponse: return "agentmail_malformed_response" + case .api(let status, _): return "agentmail_http_\(status)" + } + } + } + + private static func recoveryHint(forStatus status: Int) -> String { + switch status { + case 401, 403: + return "The AgentMail API key is invalid or lacks permission. Ask the user to check it in Settings \u{2192} Keys \u{2192} AgentMail." + case 404: + return "AgentMail resource not found. Double-check the inbox or message id." + case 422: + return "AgentMail rejected the request as invalid — check the recipient address and required fields." + case 429: + return "AgentMail rate-limited the call. Wait a moment and retry." + default: + return "AgentMail returned HTTP \(status). See https://docs.agentmail.to for details." + } + } + + // MARK: - Inboxes + + /// GET /v0/inboxes — returns the raw payload (expects an `inboxes` array). + func listInboxes(completion: @escaping (Result<[String: Any], AgentMailError>) -> Void) { + request(method: "GET", path: "/v0/inboxes", queryItems: nil, body: nil, completion: completion) + } + + /// POST /v0/inboxes — create an inbox. `username`/`displayName` optional; + /// AgentMail auto-assigns an address when `username` is nil. + func createInbox(username: String?, + displayName: String?, + completion: @escaping (Result<[String: Any], AgentMailError>) -> Void) { + var body: [String: Any] = [:] + if let username, !username.isEmpty { body["username"] = username } + if let displayName, !displayName.isEmpty { body["display_name"] = displayName } + request(method: "POST", path: "/v0/inboxes", queryItems: nil, body: body, completion: completion) + } + + // MARK: - Messages + + /// GET /v0/inboxes/{inbox_id}/messages + func listMessages(inboxId: String, + limit: Int?, + completion: @escaping (Result<[String: Any], AgentMailError>) -> Void) { + var items: [URLQueryItem] = [] + if let limit { items.append(URLQueryItem(name: "limit", value: "\(limit)")) } + request(method: "GET", + path: "/v0/inboxes/\(pathEscape(inboxId))/messages", + queryItems: items.isEmpty ? nil : items, + body: nil, + completion: completion) + } + + /// GET /v0/inboxes/{inbox_id}/messages/{message_id} + func getMessage(inboxId: String, + messageId: String, + completion: @escaping (Result<[String: Any], AgentMailError>) -> Void) { + request(method: "GET", + path: "/v0/inboxes/\(pathEscape(inboxId))/messages/\(pathEscape(messageId))", + queryItems: nil, + body: nil, + completion: completion) + } + + /// POST /v0/inboxes/{inbox_id}/messages/send. `attachments` are already + /// shaped per the AgentMail schema (filename / content_type / base64 + /// `content`). Pass an empty array to send a plain message. + func sendMessage(inboxId: String, + to: [String], + subject: String?, + text: String?, + html: String?, + cc: [String]?, + attachments: [[String: Any]], + completion: @escaping (Result<[String: Any], AgentMailError>) -> Void) { + var body: [String: Any] = ["to": to] + if let subject { body["subject"] = subject } + if let text, !text.isEmpty { body["text"] = text } + if let html, !html.isEmpty { body["html"] = html } + if let cc, !cc.isEmpty { body["cc"] = cc } + if !attachments.isEmpty { body["attachments"] = attachments } + request(method: "POST", + path: "/v0/inboxes/\(pathEscape(inboxId))/messages/send", + queryItems: nil, + body: body, + completion: completion) + } + + // MARK: - Request plumbing + + private func request(method: String, + path: String, + queryItems: [URLQueryItem]?, + body: [String: Any]?, + completion: @escaping (Result<[String: Any], AgentMailError>) -> Void) { + guard let key = KeyStore.shared.value(for: .agentMail), + !key.isEmpty else { + DispatchQueue.main.async { completion(.failure(.notConnected)) } + return + } + guard var components = URLComponents(string: AgentMailClient.baseURL + path) else { + DispatchQueue.main.async { completion(.failure(.transport)) } + return + } + if let queryItems, !queryItems.isEmpty { + components.queryItems = (components.queryItems ?? []) + queryItems + } + guard let url = components.url else { + DispatchQueue.main.async { completion(.failure(.transport)) } + return + } + + var req = URLRequest(url: url) + req.httpMethod = method + req.setValue("Bearer \(key)", forHTTPHeaderField: "Authorization") + req.setValue("application/json", forHTTPHeaderField: "Content-Type") + if let body { + req.httpBody = try? JSONSerialization.data(withJSONObject: body) + } + + let task = session.dataTask(with: req) { data, response, error in + DispatchQueue.main.async { + if error != nil { completion(.failure(.transport)); return } + guard let http = response as? HTTPURLResponse, let data else { + completion(.failure(.transport)); return + } + let json = (try? JSONSerialization.jsonObject(with: data)) as? [String: Any] + if (200..<300).contains(http.statusCode) { + completion(.success(json ?? [:])) + } else { + let message = AgentMailClient.parseError(json: json, data: data, status: http.statusCode) + completion(.failure(.api(status: http.statusCode, message: message))) + } + } + } + task.resume() + } + + /// Best-effort extraction of a human-readable error message from an + /// AgentMail error body. Falls back to the raw string / status code. + private static func parseError(json: [String: Any]?, data: Data, status: Int) -> String { + if let message = json?["message"] as? String { return message } + if let detail = json?["detail"] as? String { return detail } + if let error = json?["error"] as? String { return error } + if let error = json?["error"] as? [String: Any], + let message = error["message"] as? String { return message } + if let raw = String(data: data, encoding: .utf8), !raw.isEmpty, raw.count < 500 { + return raw + } + return "HTTP \(status)" + } + + /// Percent-escape a single path segment (inbox ids are email addresses, + /// so `@` and `.` must survive but be URL-safe). + private func pathEscape(_ segment: String) -> String { + return segment.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? segment + } +} diff --git a/LoopIOS/Skills/AgentMail/AgentMailSkill.swift b/LoopIOS/Skills/AgentMail/AgentMailSkill.swift new file mode 100644 index 0000000..ad66ad4 --- /dev/null +++ b/LoopIOS/Skills/AgentMail/AgentMailSkill.swift @@ -0,0 +1,476 @@ +// +// AgentMailSkill.swift +// Loop +// +// AgentMail (agentmail.to) integration. Reads the API key from KeyStore +// (Settings → Keys → AgentMail) and talks to the AgentMail REST API through +// AgentMailClient. Mirrors GoogleGmailSkill's shape: one `agent_mail` tool +// with an `action` selector, structured JSON responses. +// +// The headline capability is `send_message` with attachments: the model can +// attach the file Loop just generated (a PDF, image, or story) by setting +// `attach_latest: true` — the skill resolves the newest matching file in the +// workspace and base64-inlines it for AgentMail. Explicit workspace paths are +// also supported via `attachment_paths`. +// + +import Foundation + +final class AgentMailSkill { + static let shared = AgentMailSkill() + + /// Per-attachment byte cap. base64 inflates by ~33%, so a 20MB file lands + /// around 27MB of JSON — generous for a PDF/image without risking OOM. + private static let maxAttachmentBytes = 20 * 1024 * 1024 + + /// Resolved sender inbox id, cached for the session so we don't re-list / + /// re-create on every send. Guarded by `lock`. + private var cachedInboxId: String? + private let lock = NSLock() + + private init() {} + + // MARK: - System prompt + + static let systemPromptFragment: String = """ +You can send and read email on the user's behalf through AgentMail (agentmail.to) via the `agent_mail` tool. Mail is sent from the user's own AgentMail inbox (e.g. loop_email@agentmail.to). +- agent_mail (action: "send_message"): send an email. Params: to (required — address or array of addresses), subject, text (the body), cc (optional), html (optional). To attach a file: set attach_latest=true to attach the most recently generated file, optionally with attach_kind ("pdf", "image", "story", or "file") to disambiguate; or pass attachment_paths (array of workspace-relative paths like "pdfs/report.pdf"). +- agent_mail (action: "list_messages"): list recent messages in the inbox. Params: limit (default 20, max 100). +- agent_mail (action: "get_message"): read one message. Params: message_id (required). +- agent_mail (action: "list_inboxes"): list the AgentMail inboxes available to this key. + +Workflow tips: +- To email a PDF/image/story you just created for the user, call send_message with attach_latest=true and attach_kind set to the matching type. The file must have finished generating first — if a generation is still in progress, wait for it to appear before sending. +- When the user says "email me X", default the recipient to their own address if they've given it; otherwise ask once. +- If a tool returns {"error":"agentmail_not_connected"}, tell the user to paste their AgentMail API key in Settings → Keys → AgentMail. Don't retry until they have. +""" + + // MARK: - Tool schemas + + static let tools: [[String: Any]] = [ + [ + "type": "function", + "function": [ + "name": "agent_mail", + "description": "Send and read email via the user's AgentMail (agentmail.to) inbox. Supports sending with attachments — including the file Loop just generated.", + "parameters": [ + "type": "object", + "properties": [ + "action": [ + "type": "string", + "enum": ["send_message", "list_messages", "get_message", "list_inboxes"], + "description": "The AgentMail action to perform." + ], + "to": [ + "type": ["string", "array"], + "items": ["type": "string"], + "description": "Recipient address or addresses (for send_message)." + ], + "cc": [ + "type": ["string", "array"], + "items": ["type": "string"], + "description": "CC address or addresses (for send_message, optional)." + ], + "subject": [ + "type": "string", + "description": "Email subject (for send_message)." + ], + "text": [ + "type": "string", + "description": "Plain-text body of the email (for send_message)." + ], + "html": [ + "type": "string", + "description": "Optional HTML body (for send_message)." + ], + "attach_latest": [ + "type": "boolean", + "description": "If true, attach the most recently generated file in the workspace (for send_message)." + ], + "attach_kind": [ + "type": "string", + "enum": ["pdf", "image", "story", "file"], + "description": "Which kind of recently generated file to attach when attach_latest is true. Omit to attach the newest of any kind." + ], + "attachment_paths": [ + "type": "array", + "items": ["type": "string"], + "description": "Explicit workspace-relative file paths to attach (e.g. \"pdfs/report.pdf\")." + ], + "message_id": [ + "type": "string", + "description": "Message id (for get_message)." + ], + "limit": [ + "type": "integer", + "description": "Max messages to return for list_messages. Default 20, max 100." + ], + "inbox": [ + "type": "string", + "description": "Optional inbox id/address to operate on. Defaults to the configured or first inbox." + ] + ], + "required": ["action"] + ] + ] + ] + ] + + static let toolNames: Set = ["agent_mail"] + + func handles(functionName: String) -> Bool { + return AgentMailSkill.toolNames.contains(functionName) + } + + func statusText(for call: FunctionCallStruct) -> String? { + guard call.name == "agent_mail" else { return nil } + switch call.arguments["action"] as? String ?? "" { + case "send_message": return "sending email" + case "list_messages": return "reading inbox" + case "get_message": return "reading email" + case "list_inboxes": return "listing inboxes" + default: return "accessing AgentMail" + } + } + + // MARK: - Dispatch + + func handle(functionCall: FunctionCallStruct, + completion: @escaping (MessageStruct) -> Void) { + let args = functionCall.arguments + guard let action = args["action"] as? String, !action.isEmpty else { + completion(missingArgs(expected: "action")); return + } + let explicitInbox = (args["inbox"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) + + switch action { + case "list_inboxes": + AgentMailClient.shared.listInboxes { [self] result in + completion(map(result)) + } + + case "list_messages": + resolveInbox(explicit: explicitInbox) { [self] inboxResult in + switch inboxResult { + case .failure(let err): completion(errorMessage(err)) + case .success(let inboxId): + let limit = min(max(intArg(args["limit"]) ?? 20, 1), 100) + AgentMailClient.shared.listMessages(inboxId: inboxId, limit: limit) { result in + completion(self.map(result)) + } + } + } + + case "get_message": + guard let messageId = (args["message_id"] as? String)? + .trimmingCharacters(in: .whitespacesAndNewlines), !messageId.isEmpty else { + completion(missingArgs(expected: "message_id")); return + } + resolveInbox(explicit: explicitInbox) { [self] inboxResult in + switch inboxResult { + case .failure(let err): completion(errorMessage(err)) + case .success(let inboxId): + AgentMailClient.shared.getMessage(inboxId: inboxId, messageId: messageId) { result in + completion(self.map(result)) + } + } + } + + case "send_message": + handleSend(args: args, explicitInbox: explicitInbox, completion: completion) + + default: + completion(functionMessage(payload: [ + "error": "unknown_action", + "hint": "Valid actions: send_message, list_messages, get_message, list_inboxes" + ])) + } + } + + // MARK: - Send + + private func handleSend(args: [String: Any], + explicitInbox: String?, + completion: @escaping (MessageStruct) -> Void) { + let to = stringList(args["to"]) + guard !to.isEmpty else { + completion(missingArgs(expected: "to")); return + } + let cc = stringList(args["cc"]) + let subject = args["subject"] as? String + let text = args["text"] as? String ?? args["body"] as? String + let html = args["html"] as? String + + // Gather attachments off-main — reading + base64-encoding a multi-MB + // PDF shouldn't block the UI. + DispatchQueue.global(qos: .userInitiated).async { [self] in + let attachmentResult = gatherAttachments(args: args) + switch attachmentResult { + case .failure(let err): + DispatchQueue.main.async { + completion(functionMessage(payload: [ + "error": "attachment_failed", + "hint": err.message + ])) + } + case .success(let attachments): + resolveInbox(explicit: explicitInbox) { inboxResult in + switch inboxResult { + case .failure(let err): + completion(self.errorMessage(err)) + case .success(let inboxId): + AgentMailClient.shared.sendMessage( + inboxId: inboxId, + to: to, + subject: subject, + text: text, + html: html, + cc: cc.isEmpty ? nil : cc, + attachments: attachments + ) { result in + switch result { + case .failure(let err): + completion(self.errorMessage(err)) + case .success(let dict): + completion(self.functionMessage(payload: [ + "status": "sent", + "from_inbox": inboxId, + "to": to, + "attachments_count": attachments.count, + "message_id": dict["message_id"] as? String ?? "", + "thread_id": dict["thread_id"] as? String ?? "" + ])) + } + } + } + } + } + } + } + + // MARK: - Inbox resolution + + /// Resolve the inbox id to send/read from. Priority: explicit arg → + /// configured AGENT_MAIL_INBOX → cached → first existing inbox → create one. + private func resolveInbox(explicit: String?, + completion: @escaping (Result) -> Void) { + if let explicit, !explicit.isEmpty { + completion(.success(explicit)); return + } + if let configured = KeyStore.shared.value(for: .agentMailInbox)? + .trimmingCharacters(in: .whitespacesAndNewlines), !configured.isEmpty { + completion(.success(configured)); return + } + lock.lock(); let cached = cachedInboxId; lock.unlock() + if let cached { completion(.success(cached)); return } + + AgentMailClient.shared.listInboxes { [self] result in + switch result { + case .failure(let err): + completion(.failure(err)) + case .success(let dict): + if let id = firstInboxId(from: dict) { + cache(inboxId: id); completion(.success(id)); return + } + // No inbox yet — create a default one. + AgentMailClient.shared.createInbox(username: nil, displayName: "Loop") { createResult in + switch createResult { + case .failure(let err): + completion(.failure(err)) + case .success(let created): + if let id = (created["inbox_id"] as? String) ?? (created["email"] as? String) { + self.cache(inboxId: id); completion(.success(id)) + } else { + completion(.failure(.malformedResponse)) + } + } + } + } + } + } + + private func firstInboxId(from dict: [String: Any]) -> String? { + guard let inboxes = dict["inboxes"] as? [[String: Any]], let first = inboxes.first else { + return nil + } + return (first["inbox_id"] as? String) ?? (first["email"] as? String) + } + + private func cache(inboxId: String) { + lock.lock(); cachedInboxId = inboxId; lock.unlock() + } + + // MARK: - Attachment resolution + + /// Model-relayable reason an attachment couldn't be assembled (missing + /// file, too large, bad path). Carried as a `Result` failure. + struct AttachmentError: Error { let message: String } + + /// Build the AgentMail `attachments` array from the call args. Returns a + /// failure message (model-relayable) if a requested file can't be found or + /// is too large. + private func gatherAttachments(args: [String: Any]) -> Result<[[String: Any]], AttachmentError> { + var urls: [URL] = [] + + // Explicit workspace-relative paths. + if let paths = args["attachment_paths"] as? [String] { + for raw in paths { + let path = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard !path.isEmpty else { continue } + let cleaned = path.replacingOccurrences(of: "workspace://", with: "") + guard let url = try? Workspace.shared.resolve(cleaned) else { + return .failure(AttachmentError(message: "Couldn't resolve attachment path '\(raw)' inside the workspace.")) + } + urls.append(url) + } + } + + // Newest generated file of a given kind (or any kind). + if (args["attach_latest"] as? Bool) == true { + let kind = (args["attach_kind"] as? String)?.lowercased() + if let url = latestGeneratedFile(kind: kind) { + urls.append(url) + } else { + let what = kind.map { "\($0) " } ?? "" + return .failure(AttachmentError(message: "No recently generated \(what)file was found to attach. If a file is still generating, wait for it to finish, then try again.")) + } + } + + guard !urls.isEmpty else { return .success([]) } + + var out: [[String: Any]] = [] + for url in urls { + try? Workspace.shared.ensureDownloaded(url) + guard let data = try? Data(contentsOf: url) else { + return .failure(AttachmentError(message: "Couldn't read the attachment at '\(url.lastPathComponent)'.")) + } + guard data.count <= AgentMailSkill.maxAttachmentBytes else { + let mb = data.count / (1024 * 1024) + return .failure(AttachmentError(message: "'\(url.lastPathComponent)' is \(mb)MB — over the \(AgentMailSkill.maxAttachmentBytes / (1024 * 1024))MB attachment limit.")) + } + out.append([ + "filename": url.lastPathComponent, + "content_type": contentType(for: url), + "content": data.base64EncodedString() + ]) + } + return .success(out) + } + + /// Find the most recently modified file produced by a generation skill. + /// Maps each kind to its on-disk home; `nil` kind searches them all and + /// returns the globally newest. + private func latestGeneratedFile(kind: String?) -> URL? { + let root = Workspace.shared.rootURL + let docs = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first + + // (directory, predicate the filename must satisfy) + var sources: [(dir: URL, accept: (String) -> Bool)] = [] + func addPDF() { sources.append((root.appendingPathComponent("pdfs", isDirectory: true), + { $0.hasSuffix(".pdf") })) } + func addImage() { sources.append((root.appendingPathComponent("images", isDirectory: true), + { $0.hasSuffix(".png") || $0.hasSuffix(".jpg") || $0.hasSuffix(".jpeg") })) } + func addStory() { if let docs { sources.append((docs.appendingPathComponent("Stories", isDirectory: true), + { $0.hasSuffix(".html") })) } } + func addFile() { sources.append((root.appendingPathComponent("attachments", isDirectory: true), + { _ in true })) } + + switch kind { + case "pdf": addPDF() + case "image": addImage() + case "story": addStory() + case "file": addFile() + default: addPDF(); addImage(); addStory(); addFile() + } + + var newest: (url: URL, date: Date)? + let fm = FileManager.default + for source in sources { + guard let entries = try? fm.contentsOfDirectory( + at: source.dir, + includingPropertiesForKeys: [.contentModificationDateKey], + options: [.skipsHiddenFiles]) else { continue } + for entry in entries { + let name = entry.lastPathComponent + // Skip generated thumbnails (e.g. PDF ".thumb.png"). + if name.contains(".thumb.") { continue } + guard source.accept(name) else { continue } + let mod = (try? entry.resourceValues(forKeys: [.contentModificationDateKey]))? + .contentModificationDate ?? .distantPast + if newest == nil || mod > newest!.date { + newest = (entry, mod) + } + } + } + return newest?.url + } + + private func contentType(for url: URL) -> String { + switch url.pathExtension.lowercased() { + case "pdf": return "application/pdf" + case "png": return "image/png" + case "jpg", "jpeg": return "image/jpeg" + case "gif": return "image/gif" + case "html", "htm": return "text/html" + case "md", "markdown": return "text/markdown" + case "txt", "log": return "text/plain" + case "csv": return "text/csv" + case "json": return "application/json" + case "zip": return "application/zip" + default: return "application/octet-stream" + } + } + + // MARK: - Helpers + + /// Map a client Result into a function-role MessageStruct. + private func map(_ result: Result<[String: Any], AgentMailClient.AgentMailError>) -> MessageStruct { + switch result { + case .failure(let err): return errorMessage(err) + case .success(let dict): return functionMessage(payload: dict) + } + } + + private func errorMessage(_ error: AgentMailClient.AgentMailError) -> MessageStruct { + return functionMessage(payload: ["error": error.code, "hint": error.hint]) + } + + private func functionMessage(payload: Any) -> MessageStruct { + let json: String + if let data = try? JSONSerialization.data(withJSONObject: payload, options: []), + let str = String(data: data, encoding: .utf8) { + json = str + } else { + json = "{}" + } + return MessageStruct(role: "function", content: json, name: "agent_mail") + } + + private func missingArgs(expected: String) -> MessageStruct { + return MessageStruct(role: "assistant", + content: "I need \(expected) to call agent_mail. Please provide it.") + } + + /// Accept a single string or an array of strings for to/cc fields. + private func stringList(_ value: Any?) -> [String] { + if let s = value as? String { + let trimmed = s.trimmingCharacters(in: .whitespacesAndNewlines) + return trimmed.isEmpty ? [] : [trimmed] + } + if let arr = value as? [String] { + return arr.map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }.filter { !$0.isEmpty } + } + if let arr = value as? [Any] { + return arr.compactMap { $0 as? String } + .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }.filter { !$0.isEmpty } + } + return [] + } + + private func intArg(_ value: Any?) -> Int? { + if let i = value as? Int { return i } + if let d = value as? Double { return Int(d) } + if let s = value as? String { return Int(s) } + return nil + } +} diff --git a/LoopIOS/Skills/Geocoding/GeocodingSkill.swift b/LoopIOS/Skills/Geocoding/GeocodingSkill.swift new file mode 100644 index 0000000..a0f56d7 --- /dev/null +++ b/LoopIOS/Skills/Geocoding/GeocodingSkill.swift @@ -0,0 +1,175 @@ +// +// GeocodingSkill.swift +// Loop +// + +import Foundation +import CoreLocation + +/// Lets Loop turn a street address (or place name) into latitude/longitude +/// coordinates via CoreLocation's CLGeocoder. This is the inverse of the +/// reverse-geocode that LocationSkill does on the device's own position: +/// here the model supplies an address string and gets back coordinates plus a +/// normalized, geocoder-canonicalized address. +/// +/// Unlike LocationSkill, this needs no location permission — geocoding an +/// arbitrary address is a network lookup, not a read of the device's position. +/// +/// Tools the model sees: +/// - geocode_address: address string -> { latitude, longitude, formatted_address, ... } +struct GeocodingSkill { + static let shared = GeocodingSkill() + + static let systemPromptFragment: String = """ +You can convert an address or place name into map coordinates with this tool: +- geocode_address: takes an `address` string and returns its latitude/longitude plus a normalized address and (when available) locality, region, and country. + +When to call: +- The user gives an address and you need coordinates ("where is 1 Infinite Loop, Cupertino?", "pin this address on a map"). +- You need lat/lon to feed another tool that expects coordinates (e.g. a map embed or a nearby search) and only have a textual address. + +Notes: +- This does NOT use the device's location and needs no permission — it geocodes whatever address you pass. +- For the user's *own* current position, use get_current_location instead. +- An ambiguous or unknown address returns an error string; relay it and ask the user to clarify (add city/state/country). +""" + + static let tools: [[String: Any]] = [ + [ + "type": "function", + "function": [ + "name": "geocode_address", + "description": "Convert a street address or place name into latitude/longitude coordinates. Returns the coordinates and a normalized address. Does not require location permission.", + "parameters": [ + "type": "object", + "properties": [ + "address": [ + "type": "string", + "description": "The address or place name to geocode, e.g. \"1600 Amphitheatre Parkway, Mountain View, CA\". Include city/state/country when possible to disambiguate." + ] + ], + "required": ["address"] + ] + ] + ] + ] + + static let toolNames: Set = [ + "geocode_address" + ] + + func handles(functionName: String) -> Bool { + return GeocodingSkill.toolNames.contains(functionName) + } + + func statusText(for call: FunctionCallStruct) -> String? { + switch call.name { + case "geocode_address": + if let address = call.arguments["address"] as? String, !address.isEmpty { + return "geocoding \(address.prefix(40))" + } + return "geocoding address" + default: + return nil + } + } + + // MARK: - Dispatch + + func handle(functionCall: FunctionCallStruct, + completion: @escaping (MessageStruct) -> Void) { + switch functionCall.name { + case "geocode_address": + let address = (functionCall.arguments["address"] as? String)? + .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + geocode(address: address, completion: completion) + default: + completion(MessageStruct( + role: "assistant", + content: "I don't know how to handle the Geocoding tool '\(functionCall.name)'." + )) + } + } + + // MARK: - Tool handler + + private func geocode(address: String, + completion: @escaping (MessageStruct) -> Void) { + guard !address.isEmpty else { + completion(Self.result("Provide a non-empty `address` to geocode.")) + return + } + + // CLGeocoder is single-shot and retains itself for the duration of the + // request, but we hold a local reference until the callback fires so + // ARC doesn't release it mid-flight. + let geocoder = CLGeocoder() + geocoder.geocodeAddressString(address) { placemarks, error in + if let error = error { + // CLError.geocodeFoundNoResult reads better as "not found". + let detail: String + if (error as? CLError)?.code == .geocodeFoundNoResult { + detail = "no matching location found" + } else { + detail = error.localizedDescription + } + completion(Self.result("Could not geocode \"\(address)\": \(detail). Try adding a city, state, or country to disambiguate.")) + return + } + guard let placemark = placemarks?.first, + let location = placemark.location else { + completion(Self.result("No coordinates found for \"\(address)\". Try adding a city, state, or country to disambiguate.")) + return + } + + var payload: [String: Any] = [ + "status": "ok", + "query": address, + "latitude": location.coordinate.latitude, + "longitude": location.coordinate.longitude, + "formatted_address": Self.formatAddress(placemark) + ] + if let locality = placemark.locality { payload["locality"] = locality } + if let region = placemark.administrativeArea { payload["region"] = region } + if let postal = placemark.postalCode { payload["postal_code"] = postal } + if let country = placemark.country { payload["country"] = country } + if let isoCountry = placemark.isoCountryCode { payload["country_code"] = isoCountry } + + let json = (try? JSONSerialization.data(withJSONObject: payload, options: [.sortedKeys])) + .flatMap { String(data: $0, encoding: .utf8) } ?? "{}" + completion(Self.result(json)) + } + } + + // MARK: - Helpers + + private static func result(_ body: String) -> MessageStruct { + return MessageStruct(role: "function", content: body, name: "geocode_address") + } + + /// Builds a single normalized address line from a placemark's components, + /// skipping nils and the redundant `name`-equals-thoroughfare case. + private static func formatAddress(_ placemark: CLPlacemark) -> String { + var parts: [String] = [] + if let name = placemark.name, name != placemark.thoroughfare { + parts.append(name) + } else if let thoroughfare = placemark.thoroughfare { + if let sub = placemark.subThoroughfare { + parts.append("\(sub) \(thoroughfare)") + } else { + parts.append(thoroughfare) + } + } + if let locality = placemark.locality { parts.append(locality) } + if let admin = placemark.administrativeArea { parts.append(admin) } + if let postal = placemark.postalCode { parts.append(postal) } + if let country = placemark.country { parts.append(country) } + + // De-dup consecutive repeats (e.g. name already contains the city). + var unique: [String] = [] + for part in parts where unique.last != part { + unique.append(part) + } + return unique.joined(separator: ", ") + } +} diff --git a/LoopIOS/Skills/GoogleWorkspace/GoogleCalendarSkill.swift b/LoopIOS/Skills/GoogleWorkspace/GoogleCalendarSkill.swift new file mode 100644 index 0000000..dc0cf14 --- /dev/null +++ b/LoopIOS/Skills/GoogleWorkspace/GoogleCalendarSkill.swift @@ -0,0 +1,296 @@ +// +// GoogleCalendarSkill.swift +// Loop +// +// Google Calendar integration via the Calendar v3 REST API. Reads the +// user's access token from KeyStore (Settings → Keys → Google Workspace +// Access Token). Mirrors SlackSkill/NotionSkill's shape: static tool +// schemas, a dispatch method, and structured JSON responses. +// +// Note: This is separate from the existing CalendarSkill which uses +// Apple's EventKit. This skill talks directly to Google Calendar's REST +// API for users who want deeper Google Calendar access. +// + +import Foundation + +struct GoogleCalendarSkill { + static let shared = GoogleCalendarSkill() + + // MARK: - System prompt + + static let systemPromptFragment: String = """ +You can access the user's Google Calendar through these tools: +- google_calendar (action: "list_events"): list calendar events. Optional params: calendarId (default "primary"), timeMin (RFC3339), timeMax (RFC3339), maxResults (default 10, max 50). +- google_calendar (action: "create_event"): create a new event. Params: summary (required), start (required, RFC3339 datetime or date), end (required, RFC3339 datetime or date), description (optional), attendees (optional array of email strings), calendarId (optional, default "primary"). + +Workflow tips: +- Use RFC3339 timestamps like "2024-03-15T09:00:00-07:00" or date-only "2024-03-15" for all-day events. +- If a tool returns {"error":"google_not_connected"}, tell the user to paste their Google access token in Settings → Keys → Google Workspace Access Token. +- If a tool returns {"error":"token_expired"}, tell the user their Google token has expired and they need to refresh it. +""" + + // MARK: - Tool schemas + + static let tools: [[String: Any]] = [ + [ + "type": "function", + "function": [ + "name": "google_calendar", + "description": "Interact with Google Calendar. Specify an action and its parameters.", + "parameters": [ + "type": "object", + "properties": [ + "action": [ + "type": "string", + "enum": ["list_events", "create_event"], + "description": "The Calendar action to perform." + ], + "calendarId": [ + "type": "string", + "description": "Calendar id. Defaults to 'primary'." + ], + "timeMin": [ + "type": "string", + "description": "Lower bound for event start time (RFC3339, for list_events). E.g. '2024-03-15T00:00:00Z'." + ], + "timeMax": [ + "type": "string", + "description": "Upper bound for event start time (RFC3339, for list_events)." + ], + "maxResults": [ + "type": "integer", + "description": "Max events to return for list_events. Default 10, max 50." + ], + "summary": [ + "type": "string", + "description": "Event title (for create_event)." + ], + "start": [ + "type": "string", + "description": "Event start time as RFC3339 datetime or date string (for create_event)." + ], + "end": [ + "type": "string", + "description": "Event end time as RFC3339 datetime or date string (for create_event)." + ], + "description": [ + "type": "string", + "description": "Event description (optional, for create_event)." + ], + "attendees": [ + "type": "array", + "items": ["type": "string"], + "description": "Optional array of attendee email addresses (for create_event)." + ] + ], + "required": ["action"] + ] + ] + ] + ] + + static let toolNames: Set = ["google_calendar"] + + func handles(functionName: String) -> Bool { + return GoogleCalendarSkill.toolNames.contains(functionName) + } + + func statusText(for call: FunctionCallStruct) -> String? { + guard call.name == "google_calendar" else { return nil } + let action = call.arguments["action"] as? String ?? "" + switch action { + case "list_events": return "listing Google Calendar events" + case "create_event": + if let summary = call.arguments["summary"] as? String, !summary.isEmpty { + return "creating event: \(summary)" + } + return "creating a calendar event" + default: return "accessing Google Calendar" + } + } + + // MARK: - Dispatch + + func handle(functionCall: FunctionCallStruct, + completion: @escaping (MessageStruct) -> Void) { + let args = functionCall.arguments + guard let action = args["action"] as? String, !action.isEmpty else { + completion(missingArgs(expected: "action")); return + } + switch action { + case "list_events": + listEvents(calendarId: args["calendarId"] as? String, + timeMin: args["timeMin"] as? String, + timeMax: args["timeMax"] as? String, + maxResults: intArg(args["maxResults"]), + completion: completion) + case "create_event": + guard let summary = args["summary"] as? String, !summary.isEmpty, + let start = args["start"] as? String, !start.isEmpty, + let end = args["end"] as? String, !end.isEmpty else { + completion(missingArgs(expected: "summary, start, end")); return + } + createEvent(calendarId: args["calendarId"] as? String, + summary: summary, + start: start, + end: end, + description: args["description"] as? String, + attendees: args["attendees"] as? [String], + completion: completion) + default: + completion(functionMessage(payload: [ + "error": "unknown_action", + "hint": "Valid actions: list_events, create_event" + ])) + } + } + + // MARK: - Actions + + private func listEvents(calendarId: String?, + timeMin: String?, + timeMax: String?, + maxResults: Int?, + completion: @escaping (MessageStruct) -> Void) { + let calendar = calendarId ?? "primary" + let count = min(max(maxResults ?? 10, 1), 50) + var queryItems = [ + URLQueryItem(name: "maxResults", value: "\(count)"), + URLQueryItem(name: "singleEvents", value: "true"), + URLQueryItem(name: "orderBy", value: "startTime") + ] + if let timeMin, !timeMin.isEmpty { + queryItems.append(URLQueryItem(name: "timeMin", value: timeMin)) + } + if let timeMax, !timeMax.isEmpty { + queryItems.append(URLQueryItem(name: "timeMax", value: timeMax)) + } + + let encodedCalendar = calendar.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? calendar + GoogleWorkspaceClient.shared.get( + url: "https://www.googleapis.com/calendar/v3/calendars/\(encodedCalendar)/events", + queryItems: queryItems + ) { [self] result in + switch result { + case .failure(let err): + completion(errorMessage(error: err)) + case .success(let dict): + let items = (dict["items"] as? [[String: Any]] ?? []).map(trimEvent) + completion(functionMessage(payload: ["events": items])) + } + } + } + + private func createEvent(calendarId: String?, + summary: String, + start: String, + end: String, + description: String?, + attendees: [String]?, + completion: @escaping (MessageStruct) -> Void) { + let calendar = calendarId ?? "primary" + + var body: [String: Any] = ["summary": summary] + + // Determine if datetime or date based on whether value contains "T" + if start.contains("T") { + body["start"] = ["dateTime": start] + body["end"] = ["dateTime": end] + } else { + body["start"] = ["date": start] + body["end"] = ["date": end] + } + + if let description, !description.isEmpty { + body["description"] = description + } + if let attendees, !attendees.isEmpty { + body["attendees"] = attendees.map { ["email": $0] } + } + + let encodedCalendar = calendar.addingPercentEncoding(withAllowedCharacters: .urlPathAllowed) ?? calendar + GoogleWorkspaceClient.shared.post( + url: "https://www.googleapis.com/calendar/v3/calendars/\(encodedCalendar)/events", + body: body + ) { [self] result in + switch result { + case .failure(let err): + completion(errorMessage(error: err)) + case .success(let dict): + completion(functionMessage(payload: [ + "status": "created", + "id": dict["id"] as? String ?? "", + "htmlLink": dict["htmlLink"] as? String ?? "", + "summary": dict["summary"] as? String ?? summary + ])) + } + } + } + + // MARK: - Helpers + + private func trimEvent(_ event: [String: Any]) -> [String: Any] { + var out: [String: Any] = [ + "id": event["id"] as? String ?? "", + "summary": event["summary"] as? String ?? "(no title)" + ] + if let start = event["start"] as? [String: Any] { + out["start"] = start["dateTime"] as? String ?? start["date"] as? String ?? "" + } + if let end = event["end"] as? [String: Any] { + out["end"] = end["dateTime"] as? String ?? end["date"] as? String ?? "" + } + if let htmlLink = event["htmlLink"] as? String { + out["htmlLink"] = htmlLink + } + if let description = event["description"] as? String, !description.isEmpty { + out["description"] = description + } + if let attendees = event["attendees"] as? [[String: Any]] { + out["attendees"] = attendees.map { a in + [ + "email": a["email"] as? String ?? "", + "responseStatus": a["responseStatus"] as? String ?? "" + ] + } + } + if let status = event["status"] as? String { + out["status"] = status + } + return out + } + + private func errorMessage(error: GoogleWorkspaceClient.GoogleError) -> MessageStruct { + let payload: [String: Any] = [ + "error": error.code, + "hint": error.hint + ] + return functionMessage(payload: payload) + } + + private func functionMessage(payload: Any) -> MessageStruct { + let json: String + if let data = try? JSONSerialization.data(withJSONObject: payload, options: []), + let str = String(data: data, encoding: .utf8) { + json = str + } else { + json = "{}" + } + return MessageStruct(role: "function", content: json, name: "google_calendar") + } + + private func missingArgs(expected: String) -> MessageStruct { + return MessageStruct( + role: "assistant", + content: "I need \(expected) to call google_calendar. Please provide them." + ) + } + + private func intArg(_ value: Any?) -> Int? { + if let i = value as? Int { return i } + if let d = value as? Double { return Int(d) } + if let s = value as? String { return Int(s) } + return nil + } +} diff --git a/LoopIOS/Skills/GoogleWorkspace/GoogleDriveSkill.swift b/LoopIOS/Skills/GoogleWorkspace/GoogleDriveSkill.swift new file mode 100644 index 0000000..478c3fb --- /dev/null +++ b/LoopIOS/Skills/GoogleWorkspace/GoogleDriveSkill.swift @@ -0,0 +1,344 @@ +// +// GoogleDriveSkill.swift +// Loop +// +// Google Drive integration. Reads the user's access token from KeyStore +// (Settings → Keys → Google Workspace Access Token) and talks directly to +// the Google Drive v3 API. Mirrors NotionSkill's shape: static tool schemas, +// a dispatch method, and structured JSON responses. +// + +import Foundation + +struct GoogleDriveSkill { + static let shared = GoogleDriveSkill() + + // MARK: - System prompt + + static let systemPromptFragment: String = """ +You can access the user's Google Drive through these tools: +- google_drive (action: "list_files"): list files. Optional params: q (Drive search query), pageSize (default 20, max 100), fields (partial response field mask). +- google_drive (action: "get_file"): get file metadata by fileId. +- google_drive (action: "read_file"): read file content. For Google Docs/Sheets/Slides, exports as plain text. For other files, returns a download URL. +- google_drive (action: "create_file"): create a new file. Params: name (required), mimeType (required), content (optional text content), parents (optional array of folder ids). + +Workflow tips: +- Use list_files with a q parameter to search (e.g. q: "name contains 'report'" or q: "mimeType = 'application/vnd.google-apps.document'"). +- If a tool returns {"error":"google_not_connected"}, tell the user to paste their Google access token in Settings → Keys → Google Workspace Access Token. +- If a tool returns {"error":"token_expired"}, tell the user their Google token has expired and they need to refresh it. +""" + + // MARK: - Tool schemas + + static let tools: [[String: Any]] = [ + [ + "type": "function", + "function": [ + "name": "google_drive", + "description": "Interact with Google Drive. Specify an action and its parameters.", + "parameters": [ + "type": "object", + "properties": [ + "action": [ + "type": "string", + "enum": ["list_files", "get_file", "read_file", "create_file"], + "description": "The Drive action to perform." + ], + "q": [ + "type": "string", + "description": "Drive search query (for list_files). See https://developers.google.com/drive/api/guides/search-files" + ], + "pageSize": [ + "type": "integer", + "description": "Max files to return for list_files. Default 20, max 100." + ], + "fields": [ + "type": "string", + "description": "Partial response field mask (for list_files or get_file)." + ], + "fileId": [ + "type": "string", + "description": "The file id (for get_file, read_file)." + ], + "name": [ + "type": "string", + "description": "File name (for create_file)." + ], + "mimeType": [ + "type": "string", + "description": "MIME type of the file to create (for create_file). Use 'application/vnd.google-apps.document' for Google Docs." + ], + "content": [ + "type": "string", + "description": "Optional text content for the new file (for create_file)." + ], + "parents": [ + "type": "array", + "items": ["type": "string"], + "description": "Optional folder ids to place the file in (for create_file)." + ] + ], + "required": ["action"] + ] + ] + ] + ] + + static let toolNames: Set = ["google_drive"] + + func handles(functionName: String) -> Bool { + return GoogleDriveSkill.toolNames.contains(functionName) + } + + func statusText(for call: FunctionCallStruct) -> String? { + guard call.name == "google_drive" else { return nil } + let action = call.arguments["action"] as? String ?? "" + switch action { + case "list_files": return "listing Google Drive files" + case "get_file": return "reading Drive file metadata" + case "read_file": return "reading Drive file content" + case "create_file": + if let name = call.arguments["name"] as? String, !name.isEmpty { + return "creating Drive file \(name)" + } + return "creating a Drive file" + default: return "accessing Google Drive" + } + } + + // MARK: - Dispatch + + func handle(functionCall: FunctionCallStruct, + completion: @escaping (MessageStruct) -> Void) { + let args = functionCall.arguments + guard let action = args["action"] as? String, !action.isEmpty else { + completion(missingArgs(expected: "action")); return + } + switch action { + case "list_files": + listFiles(q: args["q"] as? String, + pageSize: intArg(args["pageSize"]), + fields: args["fields"] as? String, + completion: completion) + case "get_file": + guard let fileId = args["fileId"] as? String, !fileId.isEmpty else { + completion(missingArgs(expected: "fileId")); return + } + getFile(fileId: fileId, fields: args["fields"] as? String, completion: completion) + case "read_file": + guard let fileId = args["fileId"] as? String, !fileId.isEmpty else { + completion(missingArgs(expected: "fileId")); return + } + readFile(fileId: fileId, completion: completion) + case "create_file": + guard let name = args["name"] as? String, !name.isEmpty, + let mimeType = args["mimeType"] as? String, !mimeType.isEmpty else { + completion(missingArgs(expected: "name, mimeType")); return + } + createFile(name: name, + mimeType: mimeType, + content: args["content"] as? String, + parents: args["parents"] as? [String], + completion: completion) + default: + completion(functionMessage(payload: [ + "error": "unknown_action", + "hint": "Valid actions: list_files, get_file, read_file, create_file" + ])) + } + } + + // MARK: - Actions + + private func listFiles(q: String?, + pageSize: Int?, + fields: String?, + completion: @escaping (MessageStruct) -> Void) { + let size = min(max(pageSize ?? 20, 1), 100) + var queryItems = [ + URLQueryItem(name: "pageSize", value: "\(size)"), + URLQueryItem(name: "fields", value: fields ?? "files(id,name,mimeType,modifiedTime,size,parents)") + ] + if let q, !q.isEmpty { + queryItems.append(URLQueryItem(name: "q", value: q)) + } + GoogleWorkspaceClient.shared.get( + url: "https://www.googleapis.com/drive/v3/files", + queryItems: queryItems + ) { [self] result in + switch result { + case .failure(let err): + completion(errorMessage(error: err)) + case .success(let dict): + let files = dict["files"] as? [[String: Any]] ?? [] + completion(functionMessage(payload: ["files": files])) + } + } + } + + private func getFile(fileId: String, + fields: String?, + completion: @escaping (MessageStruct) -> Void) { + let fieldMask = fields ?? "id,name,mimeType,modifiedTime,size,parents,webViewLink" + let queryItems = [URLQueryItem(name: "fields", value: fieldMask)] + GoogleWorkspaceClient.shared.get( + url: "https://www.googleapis.com/drive/v3/files/\(fileId)", + queryItems: queryItems + ) { [self] result in + switch result { + case .failure(let err): + completion(errorMessage(error: err)) + case .success(let dict): + completion(functionMessage(payload: dict)) + } + } + } + + private func readFile(fileId: String, + completion: @escaping (MessageStruct) -> Void) { + // First get metadata to determine the mimeType + let queryItems = [URLQueryItem(name: "fields", value: "id,name,mimeType")] + GoogleWorkspaceClient.shared.get( + url: "https://www.googleapis.com/drive/v3/files/\(fileId)", + queryItems: queryItems + ) { [self] result in + switch result { + case .failure(let err): + completion(errorMessage(error: err)) + case .success(let meta): + let mimeType = meta["mimeType"] as? String ?? "" + if mimeType.starts(with: "application/vnd.google-apps.") { + // Google native format — export as text + exportAsText(fileId: fileId, nativeMime: mimeType, completion: completion) + } else { + // Binary/non-native — return a download URL + let downloadURL = "https://www.googleapis.com/drive/v3/files/\(fileId)?alt=media" + completion(functionMessage(payload: [ + "type": "binary", + "mimeType": mimeType, + "downloadUrl": downloadURL, + "hint": "Use this URL with the access token to download the file content." + ])) + } + } + } + } + + private func exportAsText(fileId: String, + nativeMime: String, + completion: @escaping (MessageStruct) -> Void) { + let exportMime: String + switch nativeMime { + case "application/vnd.google-apps.document": + exportMime = "text/plain" + case "application/vnd.google-apps.spreadsheet": + exportMime = "text/csv" + case "application/vnd.google-apps.presentation": + exportMime = "text/plain" + default: + exportMime = "text/plain" + } + let queryItems = [URLQueryItem(name: "mimeType", value: exportMime)] + GoogleWorkspaceClient.shared.get( + url: "https://www.googleapis.com/drive/v3/files/\(fileId)/export", + queryItems: queryItems + ) { [self] result in + switch result { + case .failure(let err): + // If export fails, it may be because the response was plain text (not JSON). + // In that case the client would report malformedResponse — handle gracefully. + completion(errorMessage(error: err)) + case .success(let dict): + // The export endpoint returns raw text, not JSON. If we got JSON + // back it means the client parsed an error. Pass through whatever we got. + completion(functionMessage(payload: [ + "type": "text", + "mimeType": exportMime, + "content": dict + ])) + } + } + } + + private func createFile(name: String, + mimeType: String, + content: String?, + parents: [String]?, + completion: @escaping (MessageStruct) -> Void) { + var metadata: [String: Any] = [ + "name": name, + "mimeType": mimeType + ] + if let parents, !parents.isEmpty { + metadata["parents"] = parents + } + + if let content, !content.isEmpty, + mimeType == "application/vnd.google-apps.document" { + // For Google Docs, create with content using the multipart upload + // Simplified: create empty doc then would need to use Docs API to add content. + // For v1, create the file metadata-only; content param noted in response. + GoogleWorkspaceClient.shared.post( + url: "https://www.googleapis.com/drive/v3/files", + body: metadata + ) { [self] result in + switch result { + case .failure(let err): + completion(errorMessage(error: err)) + case .success(let dict): + var response = dict + response["note"] = "File created. For Google Docs, use the Google Docs API to insert content after creation." + completion(functionMessage(payload: response)) + } + } + } else { + // Metadata-only creation + GoogleWorkspaceClient.shared.post( + url: "https://www.googleapis.com/drive/v3/files", + body: metadata + ) { [self] result in + switch result { + case .failure(let err): + completion(errorMessage(error: err)) + case .success(let dict): + completion(functionMessage(payload: dict)) + } + } + } + } + + // MARK: - Helpers + + private func errorMessage(error: GoogleWorkspaceClient.GoogleError) -> MessageStruct { + let payload: [String: Any] = [ + "error": error.code, + "hint": error.hint + ] + return functionMessage(payload: payload) + } + + private func functionMessage(payload: Any) -> MessageStruct { + let json: String + if let data = try? JSONSerialization.data(withJSONObject: payload, options: []), + let str = String(data: data, encoding: .utf8) { + json = str + } else { + json = "{}" + } + return MessageStruct(role: "function", content: json, name: "google_drive") + } + + private func missingArgs(expected: String) -> MessageStruct { + return MessageStruct( + role: "assistant", + content: "I need \(expected) to call google_drive. Please provide them." + ) + } + + private func intArg(_ value: Any?) -> Int? { + if let i = value as? Int { return i } + if let d = value as? Double { return Int(d) } + if let s = value as? String { return Int(s) } + return nil + } +} diff --git a/LoopIOS/Skills/GoogleWorkspace/GoogleGmailSkill.swift b/LoopIOS/Skills/GoogleWorkspace/GoogleGmailSkill.swift new file mode 100644 index 0000000..586e529 --- /dev/null +++ b/LoopIOS/Skills/GoogleWorkspace/GoogleGmailSkill.swift @@ -0,0 +1,304 @@ +// +// GoogleGmailSkill.swift +// Loop +// +// Gmail integration. Reads the user's access token from KeyStore +// (Settings → Keys → Google Workspace Access Token) and talks directly to +// the Gmail v1 API. Mirrors SlackSkill's shape: static tool schemas, +// a dispatch method, and structured JSON responses. +// + +import Foundation + +struct GoogleGmailSkill { + static let shared = GoogleGmailSkill() + + // MARK: - System prompt + + static let systemPromptFragment: String = """ +You can access the user's Gmail through these tools: +- google_gmail (action: "search_messages"): search for messages. Params: q (Gmail search query, required), maxResults (default 10, max 50). +- google_gmail (action: "get_message"): get a specific message by id. Params: id (required), format (optional: "full", "metadata", "minimal" — default "full"). +- google_gmail (action: "send_message"): send an email. Params: to (required), subject (required), body (required), attachments (optional, not supported in v1). + +Workflow tips: +- Use Gmail search syntax in q: "from:alice@example.com", "subject:meeting", "is:unread", "newer_than:2d", etc. +- Message ids come from search_messages results — chain calls when needed. +- If a tool returns {"error":"google_not_connected"}, tell the user to paste their Google access token in Settings → Keys → Google Workspace Access Token. +- If a tool returns {"error":"token_expired"}, tell the user their Google token has expired and they need to refresh it. +""" + + // MARK: - Tool schemas + + static let tools: [[String: Any]] = [ + [ + "type": "function", + "function": [ + "name": "google_gmail", + "description": "Interact with Gmail. Specify an action and its parameters.", + "parameters": [ + "type": "object", + "properties": [ + "action": [ + "type": "string", + "enum": ["search_messages", "get_message", "send_message"], + "description": "The Gmail action to perform." + ], + "q": [ + "type": "string", + "description": "Gmail search query (for search_messages). Supports Gmail search syntax." + ], + "maxResults": [ + "type": "integer", + "description": "Max messages to return for search_messages. Default 10, max 50." + ], + "id": [ + "type": "string", + "description": "Message id (for get_message)." + ], + "format": [ + "type": "string", + "enum": ["full", "metadata", "minimal"], + "description": "Response format for get_message. Default 'full'." + ], + "to": [ + "type": "string", + "description": "Recipient email address (for send_message)." + ], + "subject": [ + "type": "string", + "description": "Email subject (for send_message)." + ], + "body": [ + "type": "string", + "description": "Email body text (for send_message)." + ] + ], + "required": ["action"] + ] + ] + ] + ] + + static let toolNames: Set = ["google_gmail"] + + func handles(functionName: String) -> Bool { + return GoogleGmailSkill.toolNames.contains(functionName) + } + + func statusText(for call: FunctionCallStruct) -> String? { + guard call.name == "google_gmail" else { return nil } + let action = call.arguments["action"] as? String ?? "" + switch action { + case "search_messages": return "searching Gmail" + case "get_message": return "reading Gmail message" + case "send_message": return "sending email" + default: return "accessing Gmail" + } + } + + // MARK: - Dispatch + + func handle(functionCall: FunctionCallStruct, + completion: @escaping (MessageStruct) -> Void) { + let args = functionCall.arguments + guard let action = args["action"] as? String, !action.isEmpty else { + completion(missingArgs(expected: "action")); return + } + switch action { + case "search_messages": + guard let q = args["q"] as? String, !q.isEmpty else { + completion(missingArgs(expected: "q")); return + } + searchMessages(q: q, maxResults: intArg(args["maxResults"]), completion: completion) + case "get_message": + guard let id = args["id"] as? String, !id.isEmpty else { + completion(missingArgs(expected: "id")); return + } + getMessage(id: id, format: args["format"] as? String, completion: completion) + case "send_message": + guard let to = args["to"] as? String, !to.isEmpty, + let subject = args["subject"] as? String, + let body = args["body"] as? String else { + completion(missingArgs(expected: "to, subject, body")); return + } + sendMessage(to: to, subject: subject, body: body, completion: completion) + default: + completion(functionMessage(payload: [ + "error": "unknown_action", + "hint": "Valid actions: search_messages, get_message, send_message" + ])) + } + } + + // MARK: - Actions + + private func searchMessages(q: String, + maxResults: Int?, + completion: @escaping (MessageStruct) -> Void) { + let count = min(max(maxResults ?? 10, 1), 50) + let queryItems = [ + URLQueryItem(name: "q", value: q), + URLQueryItem(name: "maxResults", value: "\(count)") + ] + GoogleWorkspaceClient.shared.get( + url: "https://www.googleapis.com/gmail/v1/users/me/messages", + queryItems: queryItems + ) { [self] result in + switch result { + case .failure(let err): + completion(errorMessage(error: err)) + case .success(let dict): + let messages = dict["messages"] as? [[String: Any]] ?? [] + let resultSizeEstimate = dict["resultSizeEstimate"] as? Int ?? 0 + completion(functionMessage(payload: [ + "messages": messages, + "resultSizeEstimate": resultSizeEstimate + ])) + } + } + } + + private func getMessage(id: String, + format: String?, + completion: @escaping (MessageStruct) -> Void) { + let fmt = format ?? "full" + let queryItems = [URLQueryItem(name: "format", value: fmt)] + GoogleWorkspaceClient.shared.get( + url: "https://www.googleapis.com/gmail/v1/users/me/messages/\(id)", + queryItems: queryItems + ) { [self] result in + switch result { + case .failure(let err): + completion(errorMessage(error: err)) + case .success(let dict): + // Trim to useful fields for the model + var trimmed: [String: Any] = [ + "id": dict["id"] as? String ?? "", + "threadId": dict["threadId"] as? String ?? "", + "snippet": dict["snippet"] as? String ?? "" + ] + if let payload = dict["payload"] as? [String: Any] { + if let headers = payload["headers"] as? [[String: Any]] { + let relevant = headers.filter { h in + let name = (h["name"] as? String ?? "").lowercased() + return ["from", "to", "subject", "date", "cc"].contains(name) + } + trimmed["headers"] = relevant + } + // Extract plain text body if available + if let body = payload["body"] as? [String: Any], + let data = body["data"] as? String { + trimmed["body"] = decodeBase64URL(data) + } else if let parts = payload["parts"] as? [[String: Any]] { + for part in parts { + let mime = part["mimeType"] as? String ?? "" + if mime == "text/plain", + let body = part["body"] as? [String: Any], + let data = body["data"] as? String { + trimmed["body"] = decodeBase64URL(data) + break + } + } + } + } + if let labels = dict["labelIds"] as? [String] { + trimmed["labelIds"] = labels + } + completion(functionMessage(payload: trimmed)) + } + } + } + + private func sendMessage(to: String, + subject: String, + body: String, + completion: @escaping (MessageStruct) -> Void) { + // Construct a simple RFC 2822 message + let message = [ + "To: \(to)", + "Subject: \(subject)", + "Content-Type: text/plain; charset=utf-8", + "", + body + ].joined(separator: "\r\n") + + guard let messageData = message.data(using: .utf8) else { + completion(functionMessage(payload: [ + "error": "encoding_failed", + "hint": "Failed to encode the email message." + ])) + return + } + + // Gmail API expects base64url-encoded raw message + let encoded = messageData.base64EncodedString() + .replacingOccurrences(of: "+", with: "-") + .replacingOccurrences(of: "/", with: "_") + .replacingOccurrences(of: "=", with: "") + + let requestBody: [String: Any] = ["raw": encoded] + GoogleWorkspaceClient.shared.post( + url: "https://www.googleapis.com/gmail/v1/users/me/messages/send", + body: requestBody + ) { [self] result in + switch result { + case .failure(let err): + completion(errorMessage(error: err)) + case .success(let dict): + completion(functionMessage(payload: [ + "status": "sent", + "id": dict["id"] as? String ?? "", + "threadId": dict["threadId"] as? String ?? "" + ])) + } + } + } + + // MARK: - Helpers + + private func decodeBase64URL(_ input: String) -> String { + var base64 = input + .replacingOccurrences(of: "-", with: "+") + .replacingOccurrences(of: "_", with: "/") + let remainder = base64.count % 4 + if remainder > 0 { + base64 += String(repeating: "=", count: 4 - remainder) + } + guard let data = Data(base64Encoded: base64) else { return input } + return String(data: data, encoding: .utf8) ?? input + } + + private func errorMessage(error: GoogleWorkspaceClient.GoogleError) -> MessageStruct { + let payload: [String: Any] = [ + "error": error.code, + "hint": error.hint + ] + return functionMessage(payload: payload) + } + + private func functionMessage(payload: Any) -> MessageStruct { + let json: String + if let data = try? JSONSerialization.data(withJSONObject: payload, options: []), + let str = String(data: data, encoding: .utf8) { + json = str + } else { + json = "{}" + } + return MessageStruct(role: "function", content: json, name: "google_gmail") + } + + private func missingArgs(expected: String) -> MessageStruct { + return MessageStruct( + role: "assistant", + content: "I need \(expected) to call google_gmail. Please provide them." + ) + } + + private func intArg(_ value: Any?) -> Int? { + if let i = value as? Int { return i } + if let d = value as? Double { return Int(d) } + if let s = value as? String { return Int(s) } + return nil + } +} diff --git a/LoopIOS/Skills/GoogleWorkspace/GoogleWorkspaceClient.swift b/LoopIOS/Skills/GoogleWorkspace/GoogleWorkspaceClient.swift new file mode 100644 index 0000000..dd43585 --- /dev/null +++ b/LoopIOS/Skills/GoogleWorkspace/GoogleWorkspaceClient.swift @@ -0,0 +1,204 @@ +// +// GoogleWorkspaceClient.swift +// Loop +// +// Shared networking client for all Google Workspace APIs (Drive, Gmail, +// Calendar). Reads the user's access token from KeyStore and injects +// `Authorization: Bearer ` into every request. Mirrors +// NotionClient's shape: one private request helper, typed errors, a hint +// string per error code that skills can relay to the model. +// + +import Foundation + +/// Thin wrapper around Google Workspace REST APIs. Methods return raw +/// `[String: Any]` payloads so callers can shape them however the model wants. +final class GoogleWorkspaceClient { + + static let shared = GoogleWorkspaceClient() + + /// Dedicated URLSession so request/resource timeouts don't depend on the + /// shared session config. Matches NotionClient's pattern. + private let session: URLSession = { + let config = URLSessionConfiguration.ephemeral + config.timeoutIntervalForRequest = 30 + config.timeoutIntervalForResource = 60 + return URLSession(configuration: config) + }() + + private init() {} + + // MARK: - Errors + + enum GoogleError: Error { + case notConnected + case transport + case malformedResponse + /// Non-2xx response. `status` is the HTTP code; `code` is Google's + /// machine-readable error reason; `message` is their human detail. + case api(status: Int, code: String, message: String) + + /// One-liner the skill can pass back through `function` role so the + /// model has something concrete to relay to the user. + var hint: String { + switch self { + case .notConnected: + return "Google Workspace isn't connected. Ask the user to paste their access token in Settings \u{2192} Keys \u{2192} Google Workspace Access Token." + case .transport: + return "Network error talking to googleapis.com. Suggest retrying." + case .malformedResponse: + return "Google returned an unexpected response shape." + case .api(let status, _, _): + return GoogleWorkspaceClient.recoveryHint(forStatus: status) + } + } + + /// Short stable error code for the JSON payload back to the model. + var code: String { + switch self { + case .notConnected: return "google_not_connected" + case .transport: return "google_transport_failed" + case .malformedResponse: return "google_malformed_response" + case .api(_, let c, _): return c + } + } + } + + private static func recoveryHint(forStatus status: Int) -> String { + switch status { + case 401: + return "The Google access token is invalid or expired. Ask the user to refresh it and paste the new token in Settings \u{2192} Keys \u{2192} Google Workspace Access Token." + case 403: + return "The token doesn't have permission for this resource. The user may need to grant additional scopes." + case 404: + return "Resource not found. Double-check the id." + case 429: + return "Google rate-limited the call. Wait a moment and retry." + default: + return "Google API returned HTTP \(status). See https://developers.google.com/workspace for details." + } + } + + // MARK: - Public request interface + + /// Perform a GET request against a Google API endpoint. + func get(url urlString: String, + queryItems: [URLQueryItem]? = nil, + completion: @escaping (Result<[String: Any], GoogleError>) -> Void) { + request(method: "GET", urlString: urlString, queryItems: queryItems, body: nil, completion: completion) + } + + /// Perform a POST request against a Google API endpoint. + func post(url urlString: String, + body: [String: Any]?, + completion: @escaping (Result<[String: Any], GoogleError>) -> Void) { + request(method: "POST", urlString: urlString, queryItems: nil, body: body, completion: completion) + } + + /// Perform a POST request with raw Data body (used for MIME messages). + func postRaw(url urlString: String, + data: Data, + contentType: String, + completion: @escaping (Result<[String: Any], GoogleError>) -> Void) { + guard let token = KeyStore.shared.value(for: .googleWorkspaceAccessToken), + !token.isEmpty else { + DispatchQueue.main.async { completion(.failure(.notConnected)) } + return + } + guard let url = URL(string: urlString) else { + DispatchQueue.main.async { completion(.failure(.transport)) } + return + } + var req = URLRequest(url: url) + req.httpMethod = "POST" + req.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + req.setValue(contentType, forHTTPHeaderField: "Content-Type") + req.httpBody = data + + let task = session.dataTask(with: req) { data, response, error in + DispatchQueue.main.async { + if error != nil { completion(.failure(.transport)); return } + guard let http = response as? HTTPURLResponse, + let data else { completion(.failure(.transport)); return } + let json = (try? JSONSerialization.jsonObject(with: data)) as? [String: Any] + if (200..<300).contains(http.statusCode) { + completion(.success(json ?? [:])) + } else { + let (code, message) = GoogleWorkspaceClient.parseError(json: json, status: http.statusCode) + completion(.failure(.api(status: http.statusCode, code: code, message: message))) + } + } + } + task.resume() + } + + // MARK: - Private request plumbing + + private func request(method: String, + urlString: String, + queryItems: [URLQueryItem]?, + body: [String: Any]?, + completion: @escaping (Result<[String: Any], GoogleError>) -> Void) { + guard let token = KeyStore.shared.value(for: .googleWorkspaceAccessToken), + !token.isEmpty else { + DispatchQueue.main.async { completion(.failure(.notConnected)) } + return + } + + guard var components = URLComponents(string: urlString) else { + DispatchQueue.main.async { completion(.failure(.transport)) } + return + } + if let queryItems, !queryItems.isEmpty { + var existing = components.queryItems ?? [] + existing.append(contentsOf: queryItems) + components.queryItems = existing + } + guard let url = components.url else { + DispatchQueue.main.async { completion(.failure(.transport)) } + return + } + + var req = URLRequest(url: url) + req.httpMethod = method + req.setValue("Bearer \(token)", forHTTPHeaderField: "Authorization") + req.setValue("application/json", forHTTPHeaderField: "Content-Type") + if let body { + req.httpBody = try? JSONSerialization.data(withJSONObject: body) + } + + let task = session.dataTask(with: req) { data, response, error in + DispatchQueue.main.async { + if error != nil { completion(.failure(.transport)); return } + guard let http = response as? HTTPURLResponse, + let data else { completion(.failure(.transport)); return } + let json = (try? JSONSerialization.jsonObject(with: data)) as? [String: Any] + if (200..<300).contains(http.statusCode) { + completion(.success(json ?? [:])) + } else { + let (code, message) = GoogleWorkspaceClient.parseError(json: json, status: http.statusCode) + completion(.failure(.api(status: http.statusCode, code: code, message: message))) + } + } + } + task.resume() + } + + /// Parse a Google API error response into a code + message pair. + private static func parseError(json: [String: Any]?, status: Int) -> (code: String, message: String) { + if let errorDict = json?["error"] as? [String: Any] { + let message = (errorDict["message"] as? String) ?? "HTTP \(status)" + if let errors = errorDict["errors"] as? [[String: Any]], + let first = errors.first, + let reason = first["reason"] as? String { + if status == 401 { return (code: "token_expired", message: message) } + return (code: reason, message: message) + } + let code = (errorDict["status"] as? String) ?? "http_\(status)" + if status == 401 { return (code: "token_expired", message: message) } + return (code: code, message: message) + } + if status == 401 { return (code: "token_expired", message: "Token expired or invalid") } + return (code: "http_\(status)", message: "HTTP \(status)") + } +} diff --git a/LoopIOS/Skills/Image/ImageSkill.swift b/LoopIOS/Skills/Image/ImageSkill.swift index e636c45..b30c3b0 100644 --- a/LoopIOS/Skills/Image/ImageSkill.swift +++ b/LoopIOS/Skills/Image/ImageSkill.swift @@ -38,10 +38,15 @@ final class ImageSkill { static let shared = ImageSkill() static let systemPromptFragment: String = """ -You can generate images inline in chat using the generate_image tool. +You can generate ORIGINAL, invented images inline in chat using the generate_image tool. This is an AI art generator (gpt-image), NOT a way to find real photos. -When to call: -- The user describes an image idea ("draw me…", "show me…", "mockup of…", "moodboard…"). +CRITICAL — generate_image is expensive. Do NOT use it to find or show real, existing things: +- If the user wants to see real photos/pictures/images of an actual place, person, product, animal, event, or anything that exists in the world ("find me images of Ocean Beach", "show me photos of the Eiffel Tower", "what does a quokka look like"), use image_search instead. NEVER call generate_image for these. +- Only use generate_image when the user wants something invented or artistic that does not exist yet: a drawing, mockup, concept art, logo, moodboard, wallpaper, or an explicit "generate/draw/paint/illustrate" request. +- If you're unsure whether the user wants real photos or invented art, prefer image_search (it's far cheaper) or ask. + +When to call generate_image: +- The user describes an invented image idea ("draw me…", "mockup of…", "moodboard…", "illustrate…"). - The user asks to iterate on a previously-generated image ("make it darker", "same scene but cinematic", "remove the background"). In that case, look at the prior generate_image call's prompt and write a new full prompt that incorporates the change — do not pass a delta, the tool always takes a full prompt. Rules: diff --git a/LoopIOS/Skills/ImageSearch/SerpImageSearchSkill.swift b/LoopIOS/Skills/ImageSearch/SerpImageSearchSkill.swift new file mode 100644 index 0000000..ef85018 --- /dev/null +++ b/LoopIOS/Skills/ImageSearch/SerpImageSearchSkill.swift @@ -0,0 +1,269 @@ +// +// SerpImageSearchSkill.swift +// Loop +// +// Lets Loop search the web for images and render them inline as a thumbnail +// gallery. Calls SerpAPI's Google Images engine +// (https://serpapi.com/images-results) directly from the device using the +// SERPAPI_API_KEY in the KeyStore, so this skill does not depend on the +// backend. +// +// Synchronous, like MapsSkill: the search returns all image URLs in one call, +// so the skill returns a `role:"function"` result message that carries an +// `ImageGalleryAttachment`. The chat cell renders that as a horizontal +// thumbnail strip; thumbnails load lazily and tapping one opens the full +// image. No host protocol / generating lifecycle. +// + +import Foundation + +struct SerpImageSearchSkill { + static let shared = SerpImageSearchSkill() + + private static let baseURL = "https://serpapi.com/search.json" + + /// Hard cap so a runaway `num_results` can't flood the gallery. + private static let maxResults = 10 + private static let defaultResults = 6 + + static let systemPromptFragment: String = """ +You can search the web for REAL photos and render them inline with this tool: +- image_search: pass a `query` (e.g. "Alamo Square park", "golden retriever puppy", "mid-century modern living room") and optional `num_results` (default 6, max 10). Returns Google Images results and renders them as a thumbnail gallery in the chat; the user can tap a thumbnail to open the full image. + +This is the DEFAULT and ONLY correct tool whenever the user wants to see real, existing images/photos/pictures of anything in the world ("find me images of…", "show me photos of…", "what does X look like", "pull up pics of…", "get me images of…"). It is much cheaper than generating images. + +Hard rules: +- For real/existing subjects, ALWAYS call image_search. NEVER call generate_image for these — generating an AI picture of a real place/person/thing is wrong and expensive. +- Only use generate_image (not this tool) when the user explicitly wants an *invented* or artistic image that doesn't exist yet (a drawing, mockup, concept, logo, moodboard). +- One image_search call per request renders the whole gallery — do not loop or call it once per image, and do not fall back to exa_search/fetch_url to scrape image URLs. If image_search returns an error, tell the user it failed (and why) rather than generating images or scraping pages. + +After a successful call: add a short one-liner ("Here are a few shots of Ocean Beach.") — the gallery shows the images, so don't list them out. +""" + + static let tools: [[String: Any]] = [ + [ + "type": "function", + "function": [ + "name": "image_search", + "description": "Search the web for real images (Google Images via SerpAPI) and render them inline as a tappable thumbnail gallery in the chat. Use when the user wants to see real photos/pictures of something. For invented/AI-generated images use generate_image instead.", + "parameters": [ + "type": "object", + "properties": [ + "query": [ + "type": "string", + "description": "What to find images of, in natural language (e.g. \"Alamo Square park\")." + ], + "num_results": [ + "type": "integer", + "description": "How many images to show (default 6, max 10)." + ] + ], + "required": ["query"] + ] + ] + ] + ] + + static let toolNames: Set = [ + "image_search" + ] + + func handles(functionName: String) -> Bool { + return SerpImageSearchSkill.toolNames.contains(functionName) + } + + func statusText(for call: FunctionCallStruct) -> String? { + switch call.name { + case "image_search": + if let q = (call.arguments["query"] as? String)? + .trimmingCharacters(in: .whitespacesAndNewlines), !q.isEmpty { + return "finding images of \(q)" + } + return "searching for images" + default: + return nil + } + } + + // MARK: - Dispatch + + func handle(functionCall: FunctionCallStruct, + completion: @escaping (MessageStruct) -> Void) { + // Short-circuit before any network call if no SerpAPI key is set — + // return a function-role message that prompts the model to explain + // the situation and offer to store one securely. + if SerpImageSearchSkill.apiKey == nil { + completion(SerpImageSearchSkill.noApiKeyMessage(for: functionCall.name)) + return + } + switch functionCall.name { + case "image_search": + guard let query = (functionCall.arguments["query"] as? String)? + .trimmingCharacters(in: .whitespacesAndNewlines), + !query.isEmpty else { + completion(MessageStruct( + role: "function", + content: "I need a `query` to call image_search.", + name: "image_search" + )) + return + } + let requested = intArg(functionCall.arguments["num_results"]) ?? SerpImageSearchSkill.defaultResults + let n = max(1, min(SerpImageSearchSkill.maxResults, requested)) + imageSearch(query: query, + numResults: n, + conversationId: functionCall.conversationId, + completion: completion) + default: + completion(MessageStruct( + role: "assistant", + content: "I don't know how to handle the image-search tool '\(functionCall.name)'." + )) + } + } + + // MARK: - Tool handler + + private func imageSearch(query: String, + numResults: Int, + conversationId: String?, + completion: @escaping (MessageStruct) -> Void) { + guard let apiKey = SerpImageSearchSkill.apiKey else { + completion(SerpImageSearchSkill.noApiKeyMessage(for: "image_search")) + return + } + var components = URLComponents(string: SerpImageSearchSkill.baseURL) + components?.queryItems = [ + URLQueryItem(name: "engine", value: "google_images"), + URLQueryItem(name: "q", value: query), + URLQueryItem(name: "ijn", value: "0"), + URLQueryItem(name: "api_key", value: apiKey) + ] + guard let url = components?.url else { + completion(SerpImageSearchSkill.errorMessage("couldn't build the request URL")) + return + } + + var request = URLRequest(url: url) + request.httpMethod = "GET" + request.timeoutInterval = 30 + + URLSession.shared.dataTask(with: request) { data, response, error in + let status = (response as? HTTPURLResponse)?.statusCode ?? 0 + if let error = error { + NSLog("[image_search] transport error: \(error.localizedDescription)") + completion(SerpImageSearchSkill.errorMessage("Image search failed (network): \(error.localizedDescription)")) + return + } + guard let data = data, + let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else { + let snippet = data.flatMap { String(data: $0.prefix(300), encoding: .utf8) } ?? "" + NSLog("[image_search] non-JSON response (status \(status)): \(snippet)") + completion(SerpImageSearchSkill.errorMessage("Image search returned an unexpected response (status \(status)).")) + return + } + // SerpAPI reports failures both via HTTP status and an `error` + // field on a 200 — surface either so we don't silently fall back. + if status >= 400 || json["error"] != nil { + let msg = (json["error"] as? String) ?? "Image search failed (status \(status))." + NSLog("[image_search] API error (status \(status)): \(msg)") + completion(SerpImageSearchSkill.errorMessage(msg)) + return + } + + let raw = (json["images_results"] as? [[String: Any]]) ?? [] + NSLog("[image_search] query=\"\(query)\" status=\(status) images_results=\(raw.count)") + let items: [ImageGalleryAttachment.Item] = raw.prefix(numResults).compactMap { r in + // `original` is the full-res image; `thumbnail` the small grid + // image. Need at least one usable image URL to render a tile. + let original = (r["original"] as? String) ?? (r["thumbnail"] as? String) + let thumb = (r["thumbnail"] as? String) ?? original + guard let originalURL = original, let thumbURL = thumb, + !originalURL.isEmpty, !thumbURL.isEmpty else { return nil } + let link = (r["link"] as? String) + let title = (r["title"] as? String) + return ImageGalleryAttachment.Item( + thumbnailURL: thumbURL, + originalURL: originalURL, + sourceLink: (link?.isEmpty == false) ? link : nil, + title: (title?.isEmpty == false) ? title : nil + ) + } + + guard !items.isEmpty else { + completion(MessageStruct( + role: "function", + content: "No images found for \"\(query)\".", + name: "image_search" + )) + return + } + + let attachment = ImageGalleryAttachment( + query: query, + items: items, + conversationId: conversationId + ) + + // Short body for the model — the user-visible surface is the + // rendered gallery; we just confirm what landed plus the sources + // so the model can attribute/caption without restating the list. + var lines = ["Showed \(items.count) image\(items.count == 1 ? "" : "s") for \"\(query)\" in a gallery. Sources:"] + for (i, item) in items.enumerated() { + let label = item.title ?? URL(string: item.sourceLink ?? "")?.host ?? "image" + let src = item.sourceLink ?? item.originalURL + lines.append("\(i + 1). \(SerpImageSearchSkill.truncate(label, to: 80)) — \(src)") + } + + completion(MessageStruct( + role: "function", + content: lines.joined(separator: "\n"), + name: "image_search", + imageGalleryAttachment: attachment + )) + }.resume() + } + + // MARK: - Helpers + + private static var apiKey: String? { + return KeyStore.shared.value(for: .serpAPI) + } + + private func intArg(_ value: Any?) -> Int? { + if let i = value as? Int { return i } + if let d = value as? Double { return Int(d) } + if let n = value as? NSNumber { return n.intValue } + if let s = value as? String { return Int(s) } + return nil + } + + private static func truncate(_ s: String, to max: Int) -> String { + if s.count <= max { return s } + let idx = s.index(s.startIndex, offsetBy: max) + return String(s[.. MessageStruct { + return MessageStruct( + role: "function", + content: "Image search failed: \(message). Tell the user image search isn't working right now — do NOT generate AI images or scrape web pages as a substitute.", + name: "image_search" + ) + } + + /// Returned as the function result when no SerpAPI key is configured. + /// Sent as a function-role message so the model phrases the ask to the + /// user instead of us hard-coding a string into the chat. + private static func noApiKeyMessage(for functionName: String) -> MessageStruct { + let content = KeyStore.missingKeyInstruction( + for: [.serpAPI], + purpose: "web image search (SerpAPI). A free key is available at https://serpapi.com" + ) + return MessageStruct(role: "function", content: content, name: functionName) + } +} diff --git a/LoopIOS/Skills/Integrations/IntegrationSkill.swift b/LoopIOS/Skills/Integrations/IntegrationSkill.swift index df53d74..4fa658f 100644 --- a/LoopIOS/Skills/Integrations/IntegrationSkill.swift +++ b/LoopIOS/Skills/Integrations/IntegrationSkill.swift @@ -48,7 +48,7 @@ final class IntegrationSkill { static let systemPromptFragment: String = """ You can manage the user's integrations and API keys directly: -- list_integrations: enumerate the integrations Loop knows about (Google Calendar via EventKit, Notion, Gmail, Slack, Apple Health) and their current connection state. Notion and Slack are token-backed (ntn_… integration token and xoxp- user token respectively, both stored in the Keychain) — status flips to "connected" once the user pastes the relevant key. Apple Health is OS-permission-backed (like Calendar). +- list_integrations: enumerate the integrations Loop knows about (Google Calendar via EventKit, Notion, Gmail, Slack, Google Workspace, Apple Health) and their current connection state. Notion, Slack, and Google Workspace are token-backed (ntn_… integration token, xoxp- user token, and OAuth2 access token respectively, all stored in the Keychain) — status flips to "connected" once the user pastes the relevant key. Apple Health is OS-permission-backed (like Calendar). - connect_integration: kick off the connect flow for a named integration. For Google Calendar this triggers the OS permission prompt when status is undetermined; if access was previously denied, the tool returns a hint telling you to call open_integration_settings with target="calendar_privacy". For Slack, this returns a `needs_api_key` payload with instructions to walk the user through minting an xoxp- token and pasting it in Settings → Keys → Slack User Token. - open_integration_settings: surfaces the in-app Integrations panel (target="in_app", default) or the system Privacy pane (target="calendar_privacy"). Use this when the user says "open integrations" / "let me see my settings". - list_api_keys: reports which API keys are currently set (Deepgram, ElevenLabs, OpenAI, Exa, Cursor, Obsidian). Values are never returned — only whether each is present. @@ -507,6 +507,13 @@ Tips: case .xAccessToken: return "x_access_token" case .xAccessTokenSecret: return "x_access_token_secret" case .sfBayTransit: return "sf_bay_transit" + case .googleWorkspaceAccessToken: return "google_workspace_access_token" + case .googleWorkspaceRefreshToken: return "google_workspace_refresh_token" + case .googleWorkspaceClientId: return "google_workspace_client_id" + case .googleWorkspaceClientSecret: return "google_workspace_client_secret" + case .agentMail: return "agentmail" + case .agentMailInbox: return "agentmail_inbox" + case .serpAPI: return "serpapi" } } diff --git a/LoopIOS/Skills/Music/MusicController.swift b/LoopIOS/Skills/Music/MusicController.swift index 5485371..0938249 100644 --- a/LoopIOS/Skills/Music/MusicController.swift +++ b/LoopIOS/Skills/Music/MusicController.swift @@ -65,6 +65,13 @@ final class MusicController { private(set) var nowPlaying: NowPlaying? private(set) var pauseReason: PauseReason? + /// Opaque token set when music is ducked for a voice session. Cleared + /// when the user explicitly stops music or changes the queue via system + /// controls, so an orphaned auto-resume never fires after the user has + /// moved on. A new `play_music` call replaces the token (the new track + /// becomes the resume target). + private(set) var resumeToken: UUID? + // MARK: - Internals private let player = ApplicationMusicPlayer.shared @@ -76,6 +83,11 @@ final class MusicController { /// Min messages before we consider an auto-swap from vocal → instrumental. private let instrumentalThreshold = 10 + /// Songs tracked for queue-rebuild. On iOS 26+ the native + /// Queue.insert(_:position:) is unreliable mid-playback, so we keep + /// our own list and reconstruct the player queue on append. + private var managedSongQueue: [Song] = [] + private init() { subscribeToSignals() } @@ -198,46 +210,81 @@ final class MusicController { switch targetType.lowercased() { case "song": let id = MusicItemID(targetId) - var req = MusicCatalogResourceRequest(matching: \.id, equalTo: id) - req.limit = 1 - guard let song = try await req.response().items.first else { - return notFoundResult + let song: Song? + if targetId.hasPrefix("i.") { + var req = MusicLibraryRequest() + req.filter(matching: \.id, equalTo: id) + song = try await req.response().items.first + } else { + var req = MusicCatalogResourceRequest(matching: \.id, equalTo: id) + req.limit = 1 + song = try await req.response().items.first } + guard let song else { return notFoundResult } if queueMode == "append" { - try await player.queue.insert(song, position: .tail) + try await appendSongToQueue(song) } else { + managedSongQueue = [song] player.queue = .init(for: [song]) } - captureNowPlaying(title: song.title, - artist: song.artistName, - album: song.albumTitle, - selectedBy: selectedBy) + if queueMode != "append" { + captureNowPlaying(title: song.title, + artist: song.artistName, + album: song.albumTitle, + selectedBy: selectedBy) + } case "album": let id = MusicItemID(targetId) - var req = MusicCatalogResourceRequest(matching: \.id, equalTo: id) - req.limit = 1 - guard let album = try await req.response().items.first else { - return notFoundResult + let album: Album? + if targetId.hasPrefix("l.") { + var req = MusicLibraryRequest() + req.filter(matching: \.id, equalTo: id) + album = try await req.response().items.first + } else { + var req = MusicCatalogResourceRequest(matching: \.id, equalTo: id) + req.limit = 1 + album = try await req.response().items.first + } + guard let album else { return notFoundResult } + if queueMode == "append" { + try await player.queue.insert(album, position: .tail) + } else { + managedSongQueue = [] + player.queue = .init(for: [album]) + } + if queueMode != "append" { + captureNowPlaying(title: album.title, + artist: album.artistName, + album: album.title, + selectedBy: selectedBy) } - player.queue = .init(for: [album]) - captureNowPlaying(title: album.title, - artist: album.artistName, - album: album.title, - selectedBy: selectedBy) case "playlist": let id = MusicItemID(targetId) - var req = MusicCatalogResourceRequest(matching: \.id, equalTo: id) - req.limit = 1 - guard let playlist = try await req.response().items.first else { - return notFoundResult + let playlist: Playlist? + if targetId.hasPrefix("p.") { + var req = MusicLibraryRequest() + req.filter(matching: \.id, equalTo: id) + playlist = try await req.response().items.first + } else { + var req = MusicCatalogResourceRequest(matching: \.id, equalTo: id) + req.limit = 1 + playlist = try await req.response().items.first + } + guard let playlist else { return notFoundResult } + if queueMode == "append" { + try await player.queue.insert(playlist, position: .tail) + } else { + managedSongQueue = [] + player.queue = .init(for: [playlist]) + } + if queueMode != "append" { + captureNowPlaying(title: playlist.name, + artist: playlist.curatorName, + album: nil, + selectedBy: selectedBy) } - player.queue = .init(for: [playlist]) - captureNowPlaying(title: playlist.name, - artist: playlist.curatorName, - album: nil, - selectedBy: selectedBy) default: return ["status": "error", @@ -245,9 +292,23 @@ final class MusicController { "message": "target_type must be song, album, or playlist."] } + if queueMode == "append" { + var out: [String: Any] = ["status": "ok", "action": "appended"] + if let np = nowPlaying { + out["now_playing"] = [ + "title": np.title, + "artist": np.artist ?? "", + "is_instrumental": np.isInstrumental + ] + } + return out + } + try await player.prepareToPlay() try await player.play() pauseReason = nil + reduckIfVoiceSessionActive() + AgentActivityLog.shared.log(.status, "playing \(nowPlaying?.title ?? "music")") var out: [String: Any] = ["status": "ok"] @@ -261,6 +322,61 @@ final class MusicController { return out } + // MARK: - Queue append (iOS 26+ workaround) + + /// Append a song without interrupting playback. Tries the native + /// `Queue.insert` first; on failure rebuilds the queue from + /// `managedSongQueue` and restores the current playback position. + private func appendSongToQueue(_ song: Song) async throws { + // Fast path: native insert (works pre-iOS 26). + do { + try await player.queue.insert(song, position: .tail) + managedSongQueue.append(song) + print("MusicController: appended via native insert") + return + } catch { + print("MusicController: native insert failed (\(error)), rebuilding queue") + } + + // Snapshot the queue if we haven't been tracking (e.g. prior play was + // an album/playlist). Best-effort — entries that aren't Song-typed + // are silently dropped. + if managedSongQueue.isEmpty { + for entry in player.queue.entries { + if case .song(let s) = entry.item { managedSongQueue.append(s) } + } + print("MusicController: snapshotted \(managedSongQueue.count) songs from queue") + } + + let wasPlaying = player.state.playbackStatus == .playing + let savedTime = player.playbackTime + + managedSongQueue.append(song) + + // Find the currently-playing song so we rebuild from that point. + var currentIdx = 0 + if let currentEntry = player.queue.currentEntry, + case .song(let currentSong) = currentEntry.item { + currentIdx = managedSongQueue.firstIndex(where: { $0.id == currentSong.id }) ?? 0 + } + + let remaining = Array(managedSongQueue.suffix(from: currentIdx)) + managedSongQueue = remaining + + if wasPlaying { player.pause() } + + player.queue = .init(for: remaining) + try await player.prepareToPlay() + player.playbackTime = savedTime + + if wasPlaying { + try await player.play() + pauseReason = nil + reduckIfVoiceSessionActive() + } + print("MusicController: appended via queue rebuild (\(remaining.count) tracks in queue)") + } + func setMusicMood(rawMood: String, instrumentalOverride: Bool?) async throws -> [String: Any] { let mood = MusicMoodMap.parse(rawMood) let instrumental = instrumentalOverride ?? mood.prefersInstrumental @@ -299,6 +415,7 @@ final class MusicController { if let song = song { print("MusicController: setMusicMood queueing song \"\(song.title)\"") + managedSongQueue = [song] player.queue = .init(for: [song]) captureNowPlaying(title: song.title, artist: song.artistName, @@ -317,6 +434,7 @@ final class MusicController { throw error } pauseReason = nil + reduckIfVoiceSessionActive() AgentActivityLog.shared.log(.status, "playing \(song.title)") return [ "status": "ok", @@ -331,6 +449,7 @@ final class MusicController { if let playlist = response.playlists.first { print("MusicController: setMusicMood queueing playlist \"\(playlist.name)\"") + managedSongQueue = [] player.queue = .init(for: [playlist]) captureNowPlaying(title: playlist.name, artist: playlist.curatorName, @@ -349,6 +468,7 @@ final class MusicController { throw error } pauseReason = nil + reduckIfVoiceSessionActive() AgentActivityLog.shared.log(.status, "playing \(playlist.name)") return [ "status": "ok", @@ -372,10 +492,71 @@ final class MusicController { player.pause() pauseReason = reason if reason == .userExplicit { + resumeToken = nil AgentActivityLog.shared.log(.status, "paused music") } } + // MARK: - Voice-session ducking + + /// Synchronously pause music and arm the resume token. Call this + /// **before** the earcon plays so the music drops before the cue sounds. + /// The notification-based path in `handleVoiceLoopState` still acts as a + /// safety net (if the caller forgets, the duck will still happen on the + /// next state change), but calling this explicitly avoids the async + /// latency of the NotificationCenter round-trip. + func duckForVoiceSession() { + guard player.state.playbackStatus == .playing else { return } + player.pause() + pauseReason = .duckRecording + resumeToken = UUID() + print("MusicController: ducked for voice session (token=\(resumeToken!.uuidString.prefix(8)))") + } + + /// Resume music after a voice session if the resume token is still valid + /// and we paused for a duck reason. Fails silently on any error so the + /// user is never spammed with alerts. + func resumeAfterVoiceSession() { + guard resumeToken != nil else { + print("MusicController: resumeAfterVoiceSession — no resume token, skipping") + return + } + guard pauseReason == .duckRecording || pauseReason == .duckSpeaking else { + print("MusicController: resumeAfterVoiceSession — pauseReason is \(String(describing: pauseReason)), skipping") + return + } + Task { [weak self] in + do { + try await self?.player.play() + await MainActor.run { + self?.pauseReason = nil + print("MusicController: resumed after voice session") + } + } catch { + print("MusicController: resumeAfterVoiceSession failed — \(error)") + // Fail silently. The track may no longer be in the queue, + // Apple Music might have errored, etc. + } + } + } + + /// If the voice loop is in a non-idle state (recording, thinking, + /// speaking, …), immediately re-pause the player so a `play()` or + /// `setMusicMood()` call made mid-turn doesn't bleed audio. The new + /// track stays queued and becomes the resume target when the turn ends. + private func reduckIfVoiceSessionActive() { + #if os(macOS) + let voiceState = VoiceLoopCoordinator.current?.state ?? .idle + #else + let voiceState = VoiceLoopCoordinator.shared.state + #endif + guard voiceState != .idle else { return } + player.pause() + pauseReason = .duckSpeaking + resumeToken = UUID() + print("MusicController: re-paused for active voice session (state=\(voiceState))") + } + func resume(reason: PauseReason? = nil) async throws { // Only resume if the caller's reason matches why we paused — so // an "idle" tick after recording doesn't accidentally restart a @@ -399,7 +580,9 @@ final class MusicController { func stop() { player.stop() pauseReason = .userExplicit + resumeToken = nil nowPlaying = nil + managedSongQueue = [] AgentActivityLog.shared.log(.status, "stopped music") } @@ -413,6 +596,7 @@ final class MusicController { if let r = pauseReason { out["pause_reason"] = String(describing: r) } + out["will_auto_resume"] = resumeToken != nil if let np = nowPlaying { out["now_playing"] = [ "title": np.title, @@ -451,11 +635,26 @@ final class MusicController { } } - let playlist = try await MusicLibrary.shared.createPlaylist( - name: name, - description: description, - items: resolved - ) + // Try combined creation first; fall back to two-step for iOS 27+ + // where the combined response decoder may be out of sync. + let playlist: Playlist + do { + playlist = try await MusicLibrary.shared.createPlaylist( + name: name, + description: description, + items: resolved + ) + } catch { + print("MusicController: createPlaylist combined call failed — \(error)") + playlist = try await MusicLibrary.shared.createPlaylist( + name: name, + description: description, + items: [] as [Song] + ) + for song in resolved { + try await MusicLibrary.shared.add(song, to: playlist) + } + } AgentActivityLog.shared.log(.status, "created playlist \"\(name)\" with \(resolved.count) tracks") @@ -514,21 +713,32 @@ final class MusicController { switch state { case .recording: // Pause if currently playing; remember why so we can auto-resume. + // The caller may have already called duckForVoiceSession() + // synchronously — in that case the player is already paused and + // this is a no-op. if player.state.playbackStatus == .playing { - pause(reason: .duckRecording) + player.pause() + pauseReason = .duckRecording + if resumeToken == nil { resumeToken = UUID() } } case .speaking: if player.state.playbackStatus == .playing { - pause(reason: .duckSpeaking) + player.pause() + pauseReason = .duckSpeaking + if resumeToken == nil { resumeToken = UUID() } } - case .idle, .thinking, .transcribing: - // Resume only if we paused for a duck reason. User-explicit - // pauses (or interruptions) stay paused until the user/system - // says otherwise. - if pauseReason == .duckRecording || pauseReason == .duckSpeaking { - Task { [weak self] in - try? await self?.resume(reason: self?.pauseReason) - } + case .thinking, .transcribing: + // Mid-voice-flow — the assistant hasn't spoken yet. Keep music + // paused so there's no audible gap between recording-end and + // TTS-start. Resume is deferred to `.idle`. + break + case .idle: + // The voice turn is fully complete (TTS finished, mic closed). + // Resume only if we ducked and the resume token is still valid + // (not cleared by a user-explicit stop or track change). + if resumeToken != nil, + pauseReason == .duckRecording || pauseReason == .duckSpeaking { + resumeAfterVoiceSession() } } } diff --git a/LoopIOS/Skills/Music/MusicSkill.swift b/LoopIOS/Skills/Music/MusicSkill.swift index ad746d2..a67828c 100644 --- a/LoopIOS/Skills/Music/MusicSkill.swift +++ b/LoopIOS/Skills/Music/MusicSkill.swift @@ -17,7 +17,7 @@ struct MusicSkill { static let systemPromptFragment: String = """ You can put on music to match the moment via these tools: - find_music: catalog search across songs, albums, and playlists. Pass `query`, optional `instrumental_only` (default false), optional `limit` (default 8). -- play_music: start playback of a `target_id` from a previous find_music result. `target_type` ∈ "song" | "album" | "playlist". `queue_mode` ∈ "replace" | "append" (default "replace"). +- play_music: start playback of a `target_id` from a previous find_music result or a user-library id (p.… playlists, l.… albums, i.… songs). `target_type` ∈ "song" | "album" | "playlist". `queue_mode` ∈ "replace" | "append" (default "replace"). For playlists/albums the entire track list is queued, not just the first track. - set_music_mood: high-level shortcut — pass `mood` and we pick a fitting track for you. Mood vocabulary: \(MusicMoodMap.vocabularyList). Pass `instrumental_only=true` to force a vocal-free pick. - create_playlist: save a curated list of `track_ids` as a playlist in the user's Apple Music library. iOS only; on macOS this returns a friendly error. - control_music: queue control. `action` ∈ "pause" | "resume" | "skip" | "stop". Use "stop" when the user wants music off entirely. @@ -62,13 +62,13 @@ How to behave: "type": "function", "function": [ "name": "play_music", - "description": "Start playback of a song / album / playlist by its catalog id (from a prior find_music call).", + "description": "Start playback of a song / album / playlist. Accepts catalog ids (from find_music) and user-library ids (p.… playlists, l.… albums, i.… songs).", "parameters": [ "type": "object", "properties": [ "target_id": [ "type": "string", - "description": "Apple Music catalog id." + "description": "Apple Music catalog or library id." ], "target_type": [ "type": "string", diff --git a/LoopIOS/Skills/Scheduler/BackgroundScheduler.swift b/LoopIOS/Skills/Scheduler/BackgroundScheduler.swift index de098fe..96be093 100644 --- a/LoopIOS/Skills/Scheduler/BackgroundScheduler.swift +++ b/LoopIOS/Skills/Scheduler/BackgroundScheduler.swift @@ -466,6 +466,10 @@ final class BackgroundScheduler { completion(false) case .notDetermined: self.center.requestAuthorization(options: [.alert, .sound, .badge]) { granted, _ in + // Obtain & register the APNs token now that we're authorized. + if granted { + PushRegistration.shared.registerIfAuthorized() + } completion(granted) } @unknown default: @@ -555,6 +559,10 @@ final class BackgroundScheduler { let startedAt = Date() isRunningHeadless = true + // Fresh scheduled job — reset the anti-loop guard so prior state + // from interactive use doesn't falsely flag this run's tool calls. + ToolCallGuard.shared.resetForNewTurn() + // Fresh conversation so the user can scroll back to past briefings via // the normal conversation list. Title doubles as the notification's // visible title. diff --git a/LoopIOS/Skills/Scheduler/VMCronManager.swift b/LoopIOS/Skills/Scheduler/VMCronManager.swift new file mode 100644 index 0000000..945773f --- /dev/null +++ b/LoopIOS/Skills/Scheduler/VMCronManager.swift @@ -0,0 +1,316 @@ +// +// VMCronManager.swift +// Loop +// +// Recurring "VM agents": a prompt that runs on the user's SSH VM on a cron +// schedule and pings the phone with the result. Each job is the +// `BackgroundTurnRunner` one-shot made repeatable: +// +// - We pin one local conversation (the evolving thread) to the job. +// - We write `run.py` (the shared `VMAgentRuntime` agent loop) + a long-lived +// `req.json` (cron mode) under `~/.loop/cron//` over SSH. +// - We install a marker-tagged crontab line so we can list/remove only ours. +// - On each firing, `run.py` mints a fresh `turn_id`, runs the prompt, appends +// `results.ndjson`, and POSTs a `runner_turn` push. The device's +// `RunnerTurnApplier` appends the reply into the pinned thread exactly-once +// (push primary; `VMCronPoller` SSH-reads `results.ndjson` as a backstop). +// +// Removal strips the crontab line and `rm -rf`s the job dir on the VM. The +// conversation thread is kept as history. +// + +import Foundation +import os + +// MARK: - Model + +/// A scheduled VM agent. Persisted locally (UserDefaults); the crontab on the VM +/// is the source of truth for *firing*, this mirror powers the UI + the poller. +struct VMCronJob: Codable, Identifiable, Equatable { + var id: String + var title: String + var prompt: String + /// Standard 5-field cron expression (VM-local time), e.g. "0 */2 * * *". + var cronExpr: String + /// Human-readable schedule for display, e.g. "every 2 hours". + var humanSchedule: String + /// The pinned local conversation this job's runs append to. + var conversationId: String + /// Which SSH connection the job was created on (so removal targets the right VM). + var sshConfigID: String + var createdAt: Date + var lastRunAt: Date? + /// Number of `results.ndjson` lines already applied to the thread (poller cursor). + var resultsCursor: Int +} + +// MARK: - Store + +/// Tiny UserDefaults-backed registry of VM cron jobs (JSON list). +final class VMCronStore { + static let shared = VMCronStore() + private init() {} + + private let key = "loop.vmcron.jobs.v1" + private let defaults = UserDefaults.standard + private let lock = NSLock() + + func all() -> [VMCronJob] { + lock.lock(); defer { lock.unlock() } + return load() + } + + func upsert(_ job: VMCronJob) { + lock.lock(); defer { lock.unlock() } + var jobs = load() + if let i = jobs.firstIndex(where: { $0.id == job.id }) { jobs[i] = job } + else { jobs.append(job) } + save(jobs) + } + + @discardableResult + func remove(id: String) -> VMCronJob? { + lock.lock(); defer { lock.unlock() } + var jobs = load() + guard let i = jobs.firstIndex(where: { $0.id == id }) else { return nil } + let removed = jobs.remove(at: i) + save(jobs) + return removed + } + + private func load() -> [VMCronJob] { + guard let data = defaults.data(forKey: key), + let jobs = try? JSONDecoder().decode([VMCronJob].self, from: data) else { return [] } + return jobs + } + + private func save(_ jobs: [VMCronJob]) { + if let data = try? JSONEncoder().encode(jobs) { defaults.set(data, forKey: key) } + } +} + +// MARK: - Manager + +final class VMCronManager { + + static let shared = VMCronManager() + private init() {} + + private static let log = Logger(subsystem: "com.bhat.intel", category: "vmcron") + + enum CreateResult { + case success(VMCronJob) + case failure(String) + } + + // MARK: Create + + /// Write the script + request to the VM, install the crontab line, pin a + /// conversation, and persist the job. `cronExpr` is a standard 5-field cron + /// expression (VM-local time). `humanSchedule` is for display only. + func create(title: String, + prompt: String, + cronExpr: String, + humanSchedule: String) async -> CreateResult { + let config = SSHConfigStore.shared.config + guard config.isConfigured else { + return .failure("No SSH connection selected. Add one in Settings → SSH first.") + } + guard let pc = VMAgentRuntime.providerConfig() else { + return .failure("No cloud model API key set (add one in Settings → Keys).") + } + let userId = LoopRunnerClient.deviceUserId + guard !userId.isEmpty else { + return .failure("No device push id yet — open the app once with notifications enabled.") + } + guard let cleanCron = Self.sanitizeCron(cronExpr) else { + return .failure("Invalid cron expression: '\(cronExpr)'. Expected 5 fields, e.g. '0 */2 * * *'.") + } + + let jobId = UUID().uuidString + + // Pin the evolving thread up front so the first run has somewhere to land. + let conversation = await MainActor.run { + SimpleConversationManager.shared.createConversation(title: title) + } + + // Request consumed by run.py each firing. Paths are RELATIVE — the crontab + // line `cd`s into the job dir, so Python opens them against that cwd + // (Python's open() doesn't expand `$HOME`). + let cfg: [String: Any] = [ + "cron": true, + "job_id": jobId, + "provider": pc.provider, + "model": pc.modelID, + "api_key": pc.key, + "messages": [["role": "user", "content": prompt]], + "user_id": userId, + "conversation_id": conversation.id, + "title": title, + "push_url": VMAgentRuntime.pushURL, + "results_path": "results.ndjson", + "env": VMAgentRuntime.exportableKeys(), + ] + guard let cfgData = try? JSONSerialization.data(withJSONObject: cfg) else { + return .failure("Could not encode the request.") + } + + let dir = "$HOME/.loop/cron/\(jobId)" + let cfgB64 = cfgData.base64EncodedString() + let scriptB64 = Data(VMAgentRuntime.pythonScript.utf8).base64EncodedString() + // `$HOME` stays single-quoted so the OUTER shell leaves it literal; cron's + // /bin/sh expands it at fire time. The `# loop-cron ` marker lets us + // find and remove exactly this line later. + let cronLine = "\(cleanCron) cd $HOME/.loop/cron/\(jobId) && /usr/bin/env python3 run.py req.json >> run.log 2>&1 # loop-cron \(jobId)" + + let cmd = """ + command -v python3 >/dev/null 2>&1 || { echo LOOP_NOPYTHON; exit 0; } + command -v crontab >/dev/null 2>&1 || { echo LOOP_NOCRON; exit 0; } + mkdir -p \(dir) && printf %s '\(scriptB64)' | base64 -d > \(dir)/run.py && printf %s '\(cfgB64)' | base64 -d > \(dir)/req.json && chmod 600 \(dir)/req.json || { echo LOOP_WRITEFAIL; exit 0; } + ( crontab -l 2>/dev/null | grep -v 'loop-cron \(jobId)'; echo '\(cronLine)' ) | crontab - || { echo LOOP_CRONFAIL; exit 0; } + echo LOOP_OK + """ + + do { + let r = try await SSHSkill.shared.runCommand(cmd, on: config, timeout: 30) + if r.stdout.contains("LOOP_NOPYTHON") { return .failure("python3 isn't installed on the VM.") } + if r.stdout.contains("LOOP_NOCRON") { return .failure("cron (crontab) isn't available on the VM.") } + if r.stdout.contains("LOOP_WRITEFAIL") { return .failure("Couldn't write the job on the VM: \(r.stderr)") } + if r.stdout.contains("LOOP_CRONFAIL") { return .failure("Couldn't install the cron schedule: \(r.stderr)") } + guard r.stdout.contains("LOOP_OK") else { + return .failure(r.stderr.isEmpty ? "Setup not confirmed (\(r.stdout.prefix(120)))" : r.stderr) + } + + let job = VMCronJob( + id: jobId, + title: title, + prompt: prompt, + cronExpr: cleanCron, + humanSchedule: humanSchedule.isEmpty ? cleanCron : humanSchedule, + conversationId: conversation.id, + sshConfigID: config.id.uuidString, + createdAt: Date(), + lastRunAt: nil, + resultsCursor: 0 + ) + VMCronStore.shared.upsert(job) + Self.log.info("created VM cron \(jobId, privacy: .public) (\(cleanCron, privacy: .public))") + return .success(job) + } catch { + return .failure(error.localizedDescription) + } + } + + // MARK: Delete + + /// Remove the crontab line + job dir on the VM and drop the local record. + /// The conversation thread is kept. Returns nil if the job is unknown. + @discardableResult + func delete(id: String) async -> String? { + guard let job = VMCronStore.shared.all().first(where: { $0.id == id }) else { return nil } + // Prefer the connection the job was created on; fall back to the active one. + let config = SSHConfigStore.shared.connection(id: UUID(uuidString: job.sshConfigID) ?? UUID()) + ?? SSHConfigStore.shared.config + let cmd = """ + if command -v crontab >/dev/null 2>&1; then crontab -l 2>/dev/null | grep -v 'loop-cron \(job.id)' | crontab - 2>/dev/null; fi + rm -rf $HOME/.loop/cron/\(job.id) + echo LOOP_OK + """ + if config.isConfigured { + _ = try? await SSHSkill.shared.runCommand(cmd, on: config, timeout: 20) + } + VMCronStore.shared.remove(id: id) + Self.log.info("deleted VM cron \(job.id, privacy: .public)") + return job.title + } + + // MARK: List + + func list() -> [VMCronJob] { VMCronStore.shared.all() } + + // MARK: Poller backstop + + /// One result line read back from a job's `results.ndjson`. + struct ResultLine { + let turnId: String + let conversationId: String + let text: String + let error: String + } + + /// SSH-read `results.ndjson` lines past the job's stored cursor. Returns the + /// new lines plus the updated total line count (the new cursor). Used by + /// `VMCronPoller` so runs that fired while a push was missed still land. + func fetchNewResults(for job: VMCronJob) async -> (lines: [ResultLine], newCursor: Int)? { + let config = SSHConfigStore.shared.connection(id: UUID(uuidString: job.sshConfigID) ?? UUID()) + ?? SSHConfigStore.shared.config + guard config.isConfigured else { return nil } + let cmd = "cat $HOME/.loop/cron/\(job.id)/results.ndjson 2>/dev/null | base64" + guard let r = try? await SSHSkill.shared.runCommand(cmd, on: config, timeout: 20), + r.exitCode == 0 else { return nil } + let b64 = r.stdout.replacingOccurrences(of: "\n", with: "") + guard !b64.isEmpty, + let data = Data(base64Encoded: b64), + let body = String(data: data, encoding: .utf8) else { + return ([], job.resultsCursor) + } + let rows = body.split(separator: "\n", omittingEmptySubsequences: true).map(String.init) + let total = rows.count + guard total > job.resultsCursor else { return ([], total) } + let fresh = rows[job.resultsCursor.. ResultLine? { + guard let data = line.data(using: .utf8), + let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else { return nil } + return ResultLine( + turnId: obj["turn_id"] as? String ?? "", + conversationId: obj["conversation_id"] as? String ?? "", + text: obj["text"] as? String ?? "", + error: obj["error"] as? String ?? "" + ) + } + + // MARK: Helpers + + /// Translate a manual-form schedule into (cronExpr, humanText). Accepts a + /// shorthand interval ("30m", "2h", "1d") or a raw 5-field cron expression. + static func parseSchedule(_ raw: String) -> (cron: String, human: String)? { + let s = raw.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() + guard !s.isEmpty else { return nil } + // Raw cron expression (whitespace-separated fields). + if s.contains(" ") { + if let c = sanitizeCron(s) { return (c, c) } + return nil + } + // Shorthand: where unit is m(inutes) / h(ours) / d(ays). + guard let unit = s.last, "mhd".contains(unit), let n = Int(s.dropLast()), n > 0 else { + return nil + } + switch unit { + case "m": return n <= 59 ? ("*/\(n) * * * *", "every \(n) min") : nil + case "h": return n <= 23 ? ("0 */\(n) * * *", "every \(n)h") : nil + case "d": return n <= 31 ? ("0 9 */\(n) * *", n == 1 ? "daily 9am" : "every \(n) days 9am") : nil + default: return nil + } + } + + /// Validate + normalize a 5-field cron expression and reject anything with + /// shell-dangerous characters (it's interpolated into an SSH command). + static func sanitizeCron(_ raw: String) -> String? { + let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.contains("'"), + !trimmed.contains("\n"), + !trimmed.contains("#"), + !trimmed.contains("&") else { return nil } + let fields = trimmed.split(separator: " ", omittingEmptySubsequences: true) + guard fields.count == 5 else { return nil } + // Each field may contain digits, * , - / only. + let allowed = CharacterSet(charactersIn: "0123456789*,-/") + for f in fields where f.unicodeScalars.contains(where: { !allowed.contains($0) }) { + return nil + } + return fields.joined(separator: " ") + } +} diff --git a/LoopIOS/Skills/Scheduler/VMCronSkill.swift b/LoopIOS/Skills/Scheduler/VMCronSkill.swift new file mode 100644 index 0000000..30a64a2 --- /dev/null +++ b/LoopIOS/Skills/Scheduler/VMCronSkill.swift @@ -0,0 +1,252 @@ +// +// VMCronSkill.swift +// Loop +// +// Agent-facing surface for VMCronManager — recurring "agents" that run a prompt +// on the user's SSH VM via cron and push the result back as a chat. Exposes: +// - schedule_vm_agent — create a recurring VM job (NL → cron expr by the model) +// - list_vm_agents +// - delete_vm_agent +// +// Distinct from SchedulerSkill's `schedule_task` (on-device daily reminders): +// this runs on the VM with shell + web access, on a real cron schedule, and is +// for recurring agent *work* (digests, monitors) rather than local reminders. +// + +import Foundation + +final class VMCronSkill { + + static let shared = VMCronSkill() + private init() {} + + // MARK: - System prompt fragment + + static let systemPromptFragment: String = """ +You can set up recurring "VM agents" that run on the user's connected SSH VM on a schedule and push the result back as a new message in a dedicated chat thread: + +- schedule_vm_agent: create a recurring job that runs a prompt on the VM (with shell + web access) on a cron schedule. Use this for ongoing work like "every 2 hours read Hacker News and send me the top stories" or "each weekday at 9am summarize my GitHub notifications". The result of each run is delivered as a push and appended to one evolving thread for that job. + - title: a short label (e.g. "HN top stories"). + - prompt: the full instruction the VM agent runs every firing, written so it stands alone (it has no prior chat context) — e.g. "Fetch the Hacker News front page and list the top 5 stories with a one-line summary and link each." + - cron: a standard 5-field cron expression in the VM's local time. Translate the user's words: + - "every 2 hours" → "0 */2 * * *" + - "every 30 minutes" → "*/30 * * * *" + - "every day at 9am" → "0 9 * * *" + - "every weekday at 8:30am" → "30 8 * * 1-5" + - "every Monday at 7am" → "0 7 * * 1" + - schedule_text: a short human-readable version of the schedule (e.g. "every 2 hours") for display. +- list_vm_agents: list the user's VM agents. +- delete_vm_agent: remove a VM agent by id (also removes it on the VM). Use list_vm_agents to find ids. + +Notes: +- Requires an SSH connection (Settings → SSH) and a cloud model API key (Settings → Keys); if missing, the tool returns a clear error to relay. +- Cron times are the VM's local timezone — mention this if the user picks a wall-clock time. +- Prefer schedule_task (not this) for simple on-device reminders that don't need a VM. Use schedule_vm_agent when the work needs the internet/shell or should run server-side even when the phone is asleep. +After creating one, briefly confirm what was set and that results will arrive in a new chat (don't echo the id). +""" + + // MARK: - Tool schemas + + static let tools: [[String: Any]] = [ + [ + "type": "function", + "function": [ + "name": "schedule_vm_agent", + "description": "Create a recurring agent that runs a prompt on the user's SSH VM on a cron schedule. Each run's result is pushed to the device and appended to one evolving chat thread for the job. Use for ongoing internet/shell work (digests, monitors), e.g. 'every 2 hours read Hacker News and send the top stories'.", + "parameters": [ + "type": "object", + "properties": [ + "title": [ + "type": "string", + "description": "Short, user-facing label for the job (e.g. 'HN top stories')." + ], + "prompt": [ + "type": "string", + "description": "The full instruction the VM agent runs on every firing. Write it to stand alone (no prior chat context). E.g. 'Fetch the Hacker News front page and list the top 5 stories with a one-line summary and a link.'" + ], + "cron": [ + "type": "string", + "description": "Standard 5-field cron expression in the VM's local time. Examples: '0 */2 * * *' (every 2 hours), '*/30 * * * *' (every 30 min), '0 9 * * *' (daily 9am), '30 8 * * 1-5' (weekdays 8:30am)." + ], + "schedule_text": [ + "type": "string", + "description": "Short human-readable schedule for display (e.g. 'every 2 hours'). Optional." + ] + ], + "required": ["title", "prompt", "cron"] + ] + ] + ], + [ + "type": "function", + "function": [ + "name": "list_vm_agents", + "description": "List the user's recurring VM agents with their schedule and last run time.", + "parameters": ["type": "object", "properties": [:], "required": []] + ] + ], + [ + "type": "function", + "function": [ + "name": "delete_vm_agent", + "description": "Delete a recurring VM agent by id. Also removes its cron entry and files on the VM. Use list_vm_agents to find ids.", + "parameters": [ + "type": "object", + "properties": [ + "id": [ + "type": "string", + "description": "The id of the VM agent to delete." + ] + ], + "required": ["id"] + ] + ] + ] + ] + + static let toolNames: Set = [ + "schedule_vm_agent", "list_vm_agents", "delete_vm_agent" + ] + + func handles(functionName: String) -> Bool { + return VMCronSkill.toolNames.contains(functionName) + } + + // MARK: - Status text + + func statusText(for call: FunctionCallStruct) -> String? { + switch call.name { + case "schedule_vm_agent": + if let title = call.arguments["title"] as? String, !title.isEmpty { + return "scheduling \(title) on your VM" + } + return "scheduling a VM agent" + case "list_vm_agents": + return "looking up your VM agents" + case "delete_vm_agent": + return "removing VM agent" + default: + return nil + } + } + + // MARK: - Dispatch + + func handle(functionCall: FunctionCallStruct, + completion: @escaping (MessageStruct) -> Void) { + switch functionCall.name { + case "schedule_vm_agent": + scheduleVMAgent(args: functionCall.arguments, completion: completion) + case "list_vm_agents": + listVMAgents(completion: completion) + case "delete_vm_agent": + deleteVMAgent(args: functionCall.arguments, completion: completion) + default: + completion(Self.functionMessage( + name: functionCall.name, + payload: ["status": "error", "error": "Unknown VM-agent tool '\(functionCall.name)'."] + )) + } + } + + // MARK: - schedule_vm_agent + + private func scheduleVMAgent(args: [String: Any], + completion: @escaping (MessageStruct) -> Void) { + guard let title = (args["title"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !title.isEmpty, + let prompt = (args["prompt"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !prompt.isEmpty, + let cron = (args["cron"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !cron.isEmpty else { + completion(Self.functionMessage( + name: "schedule_vm_agent", + payload: ["status": "error", "error": "Missing arguments: title, prompt, cron are required."] + )) + return + } + let scheduleText = (args["schedule_text"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines) ?? "" + + Task { + let result = await VMCronManager.shared.create( + title: title, prompt: prompt, cronExpr: cron, humanSchedule: scheduleText) + switch result { + case .success(let job): + completion(Self.functionMessage( + name: "schedule_vm_agent", + payload: [ + "status": "success", + "id": job.id, + "title": job.title, + "schedule": job.humanSchedule, + "cron": job.cronExpr, + "message": "Scheduled '\(job.title)' on your VM — \(job.humanSchedule). Results will arrive in a new chat." + ] + )) + case .failure(let reason): + completion(Self.functionMessage( + name: "schedule_vm_agent", + payload: ["status": "error", "error": reason] + )) + } + } + } + + // MARK: - list_vm_agents + + private func listVMAgents(completion: @escaping (MessageStruct) -> Void) { + let jobs = VMCronManager.shared.list() + let iso = ISO8601DateFormatter() + let entries: [[String: Any]] = jobs.map { job in + var e: [String: Any] = [ + "id": job.id, + "title": job.title, + "schedule": job.humanSchedule, + "cron": job.cronExpr, + "prompt_preview": String(job.prompt.prefix(80)), + ] + if let last = job.lastRunAt { e["last_run_at"] = iso.string(from: last) } + return e + } + completion(Self.functionMessage( + name: "list_vm_agents", + payload: ["count": entries.count, "agents": entries] + )) + } + + // MARK: - delete_vm_agent + + private func deleteVMAgent(args: [String: Any], + completion: @escaping (MessageStruct) -> Void) { + guard let id = (args["id"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines), !id.isEmpty else { + completion(Self.functionMessage( + name: "delete_vm_agent", + payload: ["status": "error", "error": "Missing argument: id."] + )) + return + } + Task { + if let title = await VMCronManager.shared.delete(id: id) { + completion(Self.functionMessage( + name: "delete_vm_agent", + payload: ["status": "success", "id": id, "title": title, "message": "Deleted '\(title)'."] + )) + } else { + completion(Self.functionMessage( + name: "delete_vm_agent", + payload: ["status": "not_found", "id": id] + )) + } + } + } + + // MARK: - Helpers + + private static func functionMessage(name: String, payload: Any) -> MessageStruct { + let json: String + if let data = try? JSONSerialization.data(withJSONObject: payload, options: []), + let str = String(data: data, encoding: .utf8) { + json = str + } else { + json = "{}" + } + return MessageStruct(role: "function", content: json, name: name) + } +} diff --git a/LoopIOS/SpeechPipeline/DeepgramTTS.swift b/LoopIOS/SpeechPipeline/DeepgramTTS.swift index 1dd0055..d74d251 100644 --- a/LoopIOS/SpeechPipeline/DeepgramTTS.swift +++ b/LoopIOS/SpeechPipeline/DeepgramTTS.swift @@ -197,6 +197,7 @@ final class DeepgramTTS: NSObject, URLSessionWebSocketDelegate { // followed by the fallback's full speech back-to-back. if self.engine.isRunning { self.player.stop() + self.engine.mainMixerNode.removeTap(onBus: 0) self.engine.stop() } self.onError?(err) @@ -260,8 +261,10 @@ final class DeepgramTTS: NSObject, URLSessionWebSocketDelegate { guard let self = self else { return } if self.engine.isRunning { self.player.stop() + self.engine.mainMixerNode.removeTap(onBus: 0) self.engine.stop() } + self.onOutputAmplitude?(0) self.task?.cancel(with: .normalClosure, reason: nil) self.task = nil self.session?.invalidateAndCancel() diff --git a/LoopIOS/SpeechPipeline/VoiceLoopCoordinator.swift b/LoopIOS/SpeechPipeline/VoiceLoopCoordinator.swift index 39c42da..9b3b036 100644 --- a/LoopIOS/SpeechPipeline/VoiceLoopCoordinator.swift +++ b/LoopIOS/SpeechPipeline/VoiceLoopCoordinator.swift @@ -27,6 +27,8 @@ extension Notification.Name { /// assistant just finished". MainVC subscribes and calls /// `avatar.pulse()` on both the nav-bar and hero instances. static let voiceLoopAcknowledgePulse = Notification.Name("voiceLoopAcknowledgePulse") + /// Posted when the active STT engine changes so the badge label can update. + static let voiceLoopSTTEngineDidChange = Notification.Name("voiceLoopSTTEngineDidChange") } #if !os(macOS) @@ -37,6 +39,30 @@ final class VoiceLoopCoordinator { /// shared implementation (Phase B) doesn't need a rename. enum State { case idle, recording, transcribing, thinking, speaking } + /// Which speech-to-text backend is driving the current (or most recent) + /// transcription session. `nil` before the first recording. + enum STTEngine { + case deepgram, apple + + /// Label stamped onto a dictated message ("Deepgram STT" / "Apple STT") + /// and shown as a byline under the user's bubble. + var displayLabel: String { + switch self { + case .deepgram: return "Deepgram STT" + case .apple: return "Apple STT" + } + } + } + + private(set) var activeSTTEngine: STTEngine? { + didSet { + guard activeSTTEngine != oldValue else { return } + DispatchQueue.main.async { + NotificationCenter.default.post(name: .voiceLoopSTTEngineDidChange, object: nil) + } + } + } + private(set) var state: State = .idle { didSet { guard state != oldValue else { return } @@ -80,6 +106,10 @@ final class VoiceLoopCoordinator { state = newState } + func setSTTEngine(_ engine: STTEngine) { + activeSTTEngine = engine + } + func publishAmplitude(_ amplitude: Float) { DispatchQueue.main.async { [weak self] in self?.latestAmplitude = amplitude diff --git a/LoopIOS/Stories/StoryAttachment.swift b/LoopIOS/Stories/StoryAttachment.swift new file mode 100644 index 0000000..6eb4893 --- /dev/null +++ b/LoopIOS/Stories/StoryAttachment.swift @@ -0,0 +1,57 @@ +// +// StoryAttachment.swift +// Loop +// +// Defines the story attachment type for chat messages. Mirrors the existing +// ImageAttachment / PDFAttachment pattern: starts at .generating, flips to +// .ready with a rendered HTML file URL, or .failed with a reason. +// + +import Foundation + +/// A generated HTML story (1080×1920 portrait infographic) attached to a chat +/// message. The cell renders it as a scaled thumbnail card; tapping opens the +/// full-screen story player with tap-to-advance navigation. +struct StoryAttachment: Codable { + enum Status: String, Codable, Equatable { + case generating + case ready + case failed + } + + /// Predefined template identifiers. Each corresponds to an HTML template + /// bundled in Stories/Templates/. + enum Template: String, Codable, Equatable { + case dailyRecap = "DailyRecap" + case activitySummary = "ActivitySummary" + } + + let id: String + let title: String + let template: Template + /// The JSON data payload that was fed into the template. + let jsonPayload: String + /// URL to the rendered self-contained HTML file in the workspace. + var fileURL: URL? + var status: Status + var failureReason: String? + let conversationId: String? + + init(id: String = UUID().uuidString, + title: String, + template: Template, + jsonPayload: String, + fileURL: URL? = nil, + status: Status = .generating, + failureReason: String? = nil, + conversationId: String? = nil) { + self.id = id + self.title = title + self.template = template + self.jsonPayload = jsonPayload + self.fileURL = fileURL + self.status = status + self.failureReason = failureReason + self.conversationId = conversationId + } +} diff --git a/LoopIOS/Stories/StoryBundledTemplates.swift b/LoopIOS/Stories/StoryBundledTemplates.swift new file mode 100644 index 0000000..1f433fd --- /dev/null +++ b/LoopIOS/Stories/StoryBundledTemplates.swift @@ -0,0 +1,786 @@ +// +// StoryBundledTemplates.swift +// Loop +// +// The story HTML templates, embedded as Swift string constants. +// +// Xcode's file-system synchronized groups don't reliably copy .html files +// into the app bundle, so loading the templates from Bundle.main can fail at +// runtime. Embedding them here as raw string literals makes the renderer +// self-contained and works across every target. The source-of-truth files +// still live in Stories/Templates/ for editing; regenerate this file from +// them when they change (see the repo note in StoryGenerator). +// + +import Foundation + +enum StoryBundledTemplates { + /// Returns the embedded HTML for a template, or nil if unknown. + static func html(for template: StoryAttachment.Template) -> String? { + switch template { + case .dailyRecap: return dailyRecap + case .activitySummary: return activitySummary + } + } + + static let dailyRecap = #""" + + + + + + + + + + +
+
📊
+

Daily Recap

+
Here's your day at a glance
+
+ + +
+
+
+ + +
+
+

Goals Progress

+
+
+
+ + +
+
+

✨ Highlights

+
+
+
+ + +
+ + + + +"""# + + static let activitySummary = #""" + + + + + + + + + + +
+
🏃‍♂️
+
Activity
+
Your workout summary
+
+ + +
+
+
+ + +
+
+ + + + +
+
0%
+
of goal
+
+
+
+
+ + +
+
+

🕐 Timeline

+
+
+
+ + +
+ + + + +"""# +} diff --git a/LoopIOS/Stories/StoryDemoVC.swift b/LoopIOS/Stories/StoryDemoVC.swift new file mode 100644 index 0000000..2d91e26 --- /dev/null +++ b/LoopIOS/Stories/StoryDemoVC.swift @@ -0,0 +1,324 @@ +// +// StoryDemoVC.swift +// Loop +// +// Demo view controller that showcases the Stories feature end-to-end: +// data in → HTML story out → rendered in the app. Shows a scrollable list +// of sample story cards (inline thumbnails) that tap to open the full-screen +// story player. +// + +#if os(iOS) + +import UIKit + +final class StoryDemoVC: UIViewController { + + // MARK: - Sample data + + private struct SampleStory { + let title: String + let template: StoryAttachment.Template + let data: [String: Any] + } + + private let samples: [SampleStory] = [ + SampleStory( + title: "Morning Hike", + template: .activitySummary, + data: [ + "icon": "🥾", + "title": "Morning Hike", + "subtitle": "Mt. Tamalpais Trail", + "metrics": [ + ["emoji": "📏", "label": "Distance", "value": "12.4", "unit": "mi"], + ["emoji": "⛰️", "label": "Elevation", "value": "2,340", "unit": "ft"], + ["emoji": "👟", "label": "Steps", "value": "15,280", "unit": ""], + ["emoji": "🔥", "label": "Calories", "value": "890", "unit": "kcal"], + ], + "goal": [ + "current": 12.4, + "target": 10.0, + "label": "distance goal", + "caption": "You exceeded your 10-mile goal by 24%! 🎉", + ] as [String: Any], + "timeline": [ + ["time": "6:30 AM", "text": "Started at Pantoll trailhead"], + ["time": "7:15 AM", "text": "Reached West Point Inn viewpoint"], + ["time": "8:00 AM", "text": "Summit! Clear views of the Bay"], + ["time": "8:45 AM", "text": "Descended via Dipsea Trail"], + ["time": "9:30 AM", "text": "Back at trailhead — 3h total"], + ], + ] + ), + SampleStory( + title: "Daily Recap", + template: .dailyRecap, + data: [ + "emoji": "📊", + "title": "Your Tuesday", + "subtitle": "June 3, 2025", + "stats": [ + ["emoji": "✉️", "value": "47", "label": "Emails"], + ["emoji": "📅", "value": "6", "label": "Meetings"], + ["emoji": "✅", "value": "12", "label": "Tasks Done"], + ["emoji": "💬", "value": "23", "label": "Messages"], + ], + "goals": [ + ["label": "Deep Work", "current": 3.5, "target": 4.0] as [String: Any], + ["label": "Exercise", "current": 45, "target": 30] as [String: Any], + ["label": "Reading", "current": 15, "target": 30] as [String: Any], + ], + "highlights": [ + ["emoji": "🚀", "text": "Shipped the stories feature prototype"], + ["emoji": "🏃", "text": "Personal best on morning run (5K in 22:30)"], + ["emoji": "📖", "text": "Started reading 'Designing Data-Intensive Applications'"], + ["emoji": "🎵", "text": "Discovered a new playlist for focus work"], + ], + ] + ), + ] + + // MARK: - State + + private var renderedStories: [(SampleStory, StoryAttachment)] = [] + private let scrollView = UIScrollView() + private let stackView = UIStackView() + + // MARK: - Lifecycle + + override func viewDidLoad() { + super.viewDidLoad() + title = "Stories Demo" + view.backgroundColor = UIColor.systemBackground + navigationItem.leftBarButtonItem = UIBarButtonItem( + barButtonSystemItem: .close, target: self, action: #selector(dismissSelf) + ) + + setupScrollView() + generateStories() + } + + // MARK: - Setup + + private func setupScrollView() { + scrollView.translatesAutoresizingMaskIntoConstraints = false + view.addSubview(scrollView) + NSLayoutConstraint.activate([ + scrollView.topAnchor.constraint(equalTo: view.safeAreaLayoutGuide.topAnchor), + scrollView.leadingAnchor.constraint(equalTo: view.leadingAnchor), + scrollView.trailingAnchor.constraint(equalTo: view.trailingAnchor), + scrollView.bottomAnchor.constraint(equalTo: view.bottomAnchor), + ]) + + stackView.axis = .vertical + stackView.spacing = 24 + stackView.alignment = .center + stackView.translatesAutoresizingMaskIntoConstraints = false + scrollView.addSubview(stackView) + NSLayoutConstraint.activate([ + stackView.topAnchor.constraint(equalTo: scrollView.topAnchor, constant: 24), + stackView.leadingAnchor.constraint(equalTo: scrollView.leadingAnchor, constant: 20), + stackView.trailingAnchor.constraint(equalTo: scrollView.trailingAnchor, constant: -20), + stackView.bottomAnchor.constraint(equalTo: scrollView.bottomAnchor, constant: -24), + stackView.widthAnchor.constraint(equalTo: scrollView.widthAnchor, constant: -40), + ]) + } + + // MARK: - Generation + + private func generateStories() { + StoryGenerationService.shared.host = self + + for sample in samples { + let attachment = StoryGenerationService.shared.submit( + title: sample.title, + template: sample.template, + data: sample.data + ) + renderedStories.append((sample, attachment)) + addStoryCard(for: sample, attachment: attachment) + } + } + + private func addStoryCard(for sample: SampleStory, attachment: StoryAttachment) { + let card = StoryCardView() + card.configure(title: sample.title, template: sample.template, status: attachment.status) + card.translatesAutoresizingMaskIntoConstraints = false + card.onTap = { [weak self] in + self?.openStory(attachment: attachment) + } + stackView.addArrangedSubview(card) + NSLayoutConstraint.activate([ + card.widthAnchor.constraint(equalTo: stackView.widthAnchor), + card.heightAnchor.constraint(equalToConstant: 400), + ]) + } + + private func openStory(attachment: StoryAttachment) { + guard attachment.status == .ready else { return } + let playerVC = StoryPlayerVC() + playerVC.storyAttachment = attachment + playerVC.modalPresentationStyle = .fullScreen + present(playerVC, animated: true) + } + + @objc private func dismissSelf() { + dismiss(animated: true) + } +} + +// MARK: - StorySkillHost + +extension StoryDemoVC: StorySkillHost { + func storySkillDidStartGenerating(_ attachment: StoryAttachment) { + // Could show a loading indicator on the card + } + + func storySkillDidFinishGenerating(_ attachment: StoryAttachment) { + // Update the card + store the ready attachment + for i in 0.. Void)? + + private let titleLabel = UILabel() + private let templateBadge = UILabel() + private let statusLabel = UILabel() + private let playerView = StoryPlayerView() + private let overlayGradient = CAGradientLayer() + + override init(frame: CGRect) { + super.init(frame: frame) + setup() + } + + required init?(coder: NSCoder) { + super.init(coder: coder) + setup() + } + + private func setup() { + backgroundColor = UIColor.secondarySystemBackground + layer.cornerRadius = 20 + clipsToBounds = true + + // Player view (inline thumbnail) + playerView.isInlineMode = true + playerView.translatesAutoresizingMaskIntoConstraints = false + addSubview(playerView) + NSLayoutConstraint.activate([ + playerView.topAnchor.constraint(equalTo: topAnchor), + playerView.leadingAnchor.constraint(equalTo: leadingAnchor), + playerView.trailingAnchor.constraint(equalTo: trailingAnchor), + playerView.bottomAnchor.constraint(equalTo: bottomAnchor), + ]) + + // Bottom overlay + let overlay = UIView() + overlay.translatesAutoresizingMaskIntoConstraints = false + addSubview(overlay) + NSLayoutConstraint.activate([ + overlay.leadingAnchor.constraint(equalTo: leadingAnchor), + overlay.trailingAnchor.constraint(equalTo: trailingAnchor), + overlay.bottomAnchor.constraint(equalTo: bottomAnchor), + overlay.heightAnchor.constraint(equalToConstant: 100), + ]) + overlayGradient.colors = [ + UIColor.black.withAlphaComponent(0).cgColor, + UIColor.black.withAlphaComponent(0.7).cgColor, + ] + overlay.layer.addSublayer(overlayGradient) + + // Title + titleLabel.font = .systemFont(ofSize: 18, weight: .semibold) + titleLabel.textColor = .white + titleLabel.translatesAutoresizingMaskIntoConstraints = false + addSubview(titleLabel) + + // Template badge + templateBadge.font = .systemFont(ofSize: 12, weight: .medium) + templateBadge.textColor = UIColor.white.withAlphaComponent(0.7) + templateBadge.translatesAutoresizingMaskIntoConstraints = false + addSubview(templateBadge) + + NSLayoutConstraint.activate([ + titleLabel.leadingAnchor.constraint(equalTo: leadingAnchor, constant: 16), + titleLabel.bottomAnchor.constraint(equalTo: bottomAnchor, constant: -16), + templateBadge.leadingAnchor.constraint(equalTo: leadingAnchor, constant: 16), + templateBadge.bottomAnchor.constraint(equalTo: titleLabel.topAnchor, constant: -4), + ]) + + // Status (for loading/error states) + statusLabel.font = .systemFont(ofSize: 14, weight: .medium) + statusLabel.textColor = .secondaryLabel + statusLabel.textAlignment = .center + statusLabel.translatesAutoresizingMaskIntoConstraints = false + addSubview(statusLabel) + NSLayoutConstraint.activate([ + statusLabel.centerXAnchor.constraint(equalTo: centerXAnchor), + statusLabel.centerYAnchor.constraint(equalTo: centerYAnchor), + ]) + + // Tap gesture + let tap = UITapGestureRecognizer(target: self, action: #selector(tapped)) + addGestureRecognizer(tap) + } + + override func layoutSubviews() { + super.layoutSubviews() + overlayGradient.frame = CGRect(x: 0, y: 0, width: bounds.width, height: 100) + } + + func configure(title: String, template: StoryAttachment.Template, status: StoryAttachment.Status) { + titleLabel.text = title + templateBadge.text = template.rawValue.uppercased() + + switch status { + case .generating: + statusLabel.text = "Generating…" + statusLabel.isHidden = false + playerView.isHidden = true + case .ready: + statusLabel.isHidden = true + playerView.isHidden = false + case .failed: + statusLabel.text = "Failed to generate" + statusLabel.isHidden = false + playerView.isHidden = true + } + } + + func loadPreview(fileURL: URL) { + playerView.isHidden = false + statusLabel.isHidden = true + playerView.loadStory(fileURL: fileURL) + } + + @objc private func tapped() { + onTap?() + } +} + +#endif diff --git a/LoopIOS/Stories/StoryGenerationService.swift b/LoopIOS/Stories/StoryGenerationService.swift new file mode 100644 index 0000000..8bf3f3f --- /dev/null +++ b/LoopIOS/Stories/StoryGenerationService.swift @@ -0,0 +1,103 @@ +// +// StoryGenerationService.swift +// Loop +// +// Service layer that coordinates story generation. Mirrors the +// PDFGenerationService pattern: accepts a request, creates a placeholder +// StoryAttachment in .generating state, renders the HTML via StoryGenerator, +// and notifies the host on completion. +// + +#if os(iOS) || os(macOS) + +import Foundation + +/// Implemented by MessagingVC (iOS) or ConversationWindowController (Mac) to +/// receive story lifecycle events. +protocol StorySkillHost: AnyObject { + func storySkillDidStartGenerating(_ attachment: StoryAttachment) + func storySkillDidFinishGenerating(_ attachment: StoryAttachment) +} + +final class StoryGenerationService { + static let shared = StoryGenerationService() + + weak var host: StorySkillHost? + + private init() {} + + // MARK: - Public API + + /// Submit a story generation request. Returns the placeholder attachment + /// synchronously so the caller can render a loading cell immediately. + @discardableResult + func submit(title: String, + template: StoryAttachment.Template, + jsonPayload: String, + attachmentId: String? = nil, + conversationId: String? = nil) -> StoryAttachment { + let id = attachmentId ?? UUID().uuidString + var attachment = StoryAttachment( + id: id, + title: title, + template: template, + jsonPayload: jsonPayload, + status: .generating, + conversationId: conversationId + ) + + DispatchQueue.main.async { [weak self] in + self?.host?.storySkillDidStartGenerating(attachment) + } + + // Generate on a background queue + DispatchQueue.global(qos: .userInitiated).async { [weak self] in + do { + let outputDir = self?.storiesDirectory() + let fileURL = try StoryGenerator.shared.generate( + template: template, + jsonPayload: jsonPayload, + outputDirectory: outputDir + ) + attachment.fileURL = fileURL + attachment.status = .ready + } catch { + attachment.status = .failed + attachment.failureReason = error.localizedDescription + } + + DispatchQueue.main.async { + self?.host?.storySkillDidFinishGenerating(attachment) + } + } + + return attachment + } + + /// Convenience: submit with a dictionary payload. + @discardableResult + func submit(title: String, + template: StoryAttachment.Template, + data: [String: Any], + conversationId: String? = nil) -> StoryAttachment { + let jsonString: String + if let jsonData = try? JSONSerialization.data(withJSONObject: data), + let str = String(data: jsonData, encoding: .utf8) { + jsonString = str + } else { + jsonString = "{}" + } + return submit(title: title, template: template, jsonPayload: jsonString, conversationId: conversationId) + } + + // MARK: - Private + + private func storiesDirectory() -> URL { + let docs = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first! + let dir = docs.appendingPathComponent("Stories", isDirectory: true) + try? FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) + return dir + } +} + +#endif diff --git a/LoopIOS/Stories/StoryGenerator.swift b/LoopIOS/Stories/StoryGenerator.swift new file mode 100644 index 0000000..13f6b1f --- /dev/null +++ b/LoopIOS/Stories/StoryGenerator.swift @@ -0,0 +1,105 @@ +// +// StoryGenerator.swift +// Loop +// +// JSON → HTML renderer. Takes structured data and a template identifier, +// loads the corresponding HTML template from the bundle, injects the data +// payload, and writes a single self-contained .html file to the workspace. +// + +import Foundation + +final class StoryGenerator { + static let shared = StoryGenerator() + private init() {} + + enum GenerationError: Error, LocalizedError { + case templateNotFound(String) + case invalidJSON + case fileWriteFailed(String) + + var errorDescription: String? { + switch self { + case .templateNotFound(let name): return "Template '\(name)' not found in bundle" + case .invalidJSON: return "Invalid JSON payload" + case .fileWriteFailed(let reason): return "Failed to write HTML: \(reason)" + } + } + } + + /// Generate a self-contained HTML story file from structured data. + /// + /// - Parameters: + /// - template: Which template to use. + /// - jsonPayload: JSON string of the data to inject. + /// - outputDirectory: Where to write the rendered HTML. Defaults to tmp. + /// - Returns: URL of the rendered .html file. + func generate(template: StoryAttachment.Template, + jsonPayload: String, + outputDirectory: URL? = nil) throws -> URL { + // Validate JSON + guard let jsonData = jsonPayload.data(using: .utf8), + (try? JSONSerialization.jsonObject(with: jsonData)) != nil else { + throw GenerationError.invalidJSON + } + + // Templates are embedded as Swift constants (StoryBundledTemplates) + // because Xcode's synchronized groups don't copy .html into the app + // bundle. Prefer the embedded copy; fall back to a bundled resource + // if one is ever added so editing-via-bundle still works. + if let embedded = StoryBundledTemplates.html(for: template) { + return try renderAndWrite(html: embedded, json: jsonPayload, outputDir: outputDirectory) + } + + if let templateURL = Bundle.main.url(forResource: template.rawValue, + withExtension: "html", + subdirectory: "StoryTemplates") + ?? Bundle.main.url(forResource: template.rawValue, withExtension: "html"), + let templateHTML = try? String(contentsOf: templateURL, encoding: .utf8) { + return try renderAndWrite(html: templateHTML, json: jsonPayload, outputDir: outputDirectory) + } + + throw GenerationError.templateNotFound(template.rawValue) + } + + /// Inject the JSON data into the template and write to disk. + private func renderAndWrite(html: String, json: String, outputDir: URL?) throws -> URL { + // Inject the data payload as a global variable before the closing + let injection = "\nwindow.__STORY_DATA__ = \(json);\n" + let rendered: String + if let range = html.range(of: "" + } + + // Write to disk + let dir = outputDir ?? FileManager.default.temporaryDirectory + let fileName = "story_\(UUID().uuidString.prefix(8)).html" + let fileURL = dir.appendingPathComponent(fileName) + + do { + try rendered.write(to: fileURL, atomically: true, encoding: .utf8) + } catch { + throw GenerationError.fileWriteFailed(error.localizedDescription) + } + + return fileURL + } + + /// Convenience: generate from a dictionary payload. + func generate(template: StoryAttachment.Template, + data: [String: Any], + outputDirectory: URL? = nil) throws -> URL { + let jsonData = try JSONSerialization.data(withJSONObject: data, options: [.fragmentsAllowed]) + guard let jsonString = String(data: jsonData, encoding: .utf8) else { + throw GenerationError.invalidJSON + } + return try generate(template: template, jsonPayload: jsonString, outputDirectory: outputDirectory) + } +} diff --git a/LoopIOS/Stories/StoryPlayerVC.swift b/LoopIOS/Stories/StoryPlayerVC.swift new file mode 100644 index 0000000..759d4ea --- /dev/null +++ b/LoopIOS/Stories/StoryPlayerVC.swift @@ -0,0 +1,121 @@ +// +// StoryPlayerVC.swift +// Loop +// +// Full-screen story viewer. Presented modally when the user taps a story +// card in chat. Displays the story at native 1080×1920 resolution scaled +// to fill the screen, with tap-to-advance navigation and a close button. +// + +#if os(iOS) + +import UIKit + +final class StoryPlayerVC: UIViewController { + + // MARK: - Public + + /// The story attachment to display. Set before presentation. + var storyAttachment: StoryAttachment? + + /// Alternatively, load from raw HTML string (for demo/preview). + var rawHTML: String? + + // MARK: - Subviews + + private let playerView = StoryPlayerView() + private let closeButton = UIButton(type: .system) + private let progressBar = UIProgressView(progressViewStyle: .bar) + + // MARK: - Lifecycle + + override func viewDidLoad() { + super.viewDidLoad() + view.backgroundColor = .black + modalPresentationStyle = .fullScreen + + setupPlayerView() + setupCloseButton() + setupProgressBar() + loadContent() + } + + override var prefersStatusBarHidden: Bool { true } + override var prefersHomeIndicatorAutoHidden: Bool { true } + + // MARK: - Setup + + private func setupPlayerView() { + playerView.isInlineMode = false + playerView.delegate = self + playerView.translatesAutoresizingMaskIntoConstraints = false + view.addSubview(playerView) + NSLayoutConstraint.activate([ + playerView.topAnchor.constraint(equalTo: view.topAnchor), + playerView.leadingAnchor.constraint(equalTo: view.leadingAnchor), + playerView.trailingAnchor.constraint(equalTo: view.trailingAnchor), + playerView.bottomAnchor.constraint(equalTo: view.bottomAnchor), + ]) + } + + private func setupCloseButton() { + closeButton.translatesAutoresizingMaskIntoConstraints = false + let config = UIImage.SymbolConfiguration(pointSize: 18, weight: .semibold) + closeButton.setImage(UIImage(systemName: "xmark", withConfiguration: config), for: .normal) + closeButton.tintColor = .white + closeButton.backgroundColor = UIColor.white.withAlphaComponent(0.2) + closeButton.layer.cornerRadius = 18 + closeButton.addTarget(self, action: #selector(closeTapped), for: .touchUpInside) + view.addSubview(closeButton) + NSLayoutConstraint.activate([ + closeButton.topAnchor.constraint(equalTo: view.safeAreaLayoutGuide.topAnchor, constant: 12), + closeButton.trailingAnchor.constraint(equalTo: view.trailingAnchor, constant: -16), + closeButton.widthAnchor.constraint(equalToConstant: 36), + closeButton.heightAnchor.constraint(equalToConstant: 36), + ]) + } + + private func setupProgressBar() { + progressBar.translatesAutoresizingMaskIntoConstraints = false + progressBar.trackTintColor = UIColor.white.withAlphaComponent(0.2) + progressBar.progressTintColor = .white + progressBar.progress = 0 + view.addSubview(progressBar) + NSLayoutConstraint.activate([ + progressBar.topAnchor.constraint(equalTo: view.safeAreaLayoutGuide.topAnchor, constant: 4), + progressBar.leadingAnchor.constraint(equalTo: view.leadingAnchor, constant: 16), + progressBar.trailingAnchor.constraint(equalTo: closeButton.leadingAnchor, constant: -12), + progressBar.heightAnchor.constraint(equalToConstant: 3), + ]) + } + + private func loadContent() { + if let attachment = storyAttachment, let fileURL = attachment.fileURL { + playerView.loadStory(fileURL: fileURL) + } else if let html = rawHTML { + playerView.loadStoryHTML(html) + } + playerView.startAutoPlay() + } + + // MARK: - Actions + + @objc private func closeTapped() { + dismiss(animated: true) + } +} + +// MARK: - StoryPlayerViewDelegate + +extension StoryPlayerVC: StoryPlayerViewDelegate { + func storyPlayerDidFinish(_ player: StoryPlayerView) { + // Could auto-dismiss or loop + progressBar.setProgress(1.0, animated: true) + } + + func storyPlayerDidRequestFullScreen(_ player: StoryPlayerView) { + // Already full-screen; no-op. + } +} + +#endif diff --git a/LoopIOS/Stories/StoryPlayerView.swift b/LoopIOS/Stories/StoryPlayerView.swift new file mode 100644 index 0000000..89a3c7c --- /dev/null +++ b/LoopIOS/Stories/StoryPlayerView.swift @@ -0,0 +1,255 @@ +// +// StoryPlayerView.swift +// Loop +// +// Chromeless WKWebView that renders a 1080×1920 HTML story. Supports two +// modes: +// 1. Inline (thumbnail in chat): scaled down to fit the cell, auto-plays +// the timeline, non-interactive (tap opens full-screen). +// 2. Full-screen: native tap-to-advance (left 30% = back, right 70% = next), +// auto-plays on load with controls for pause/resume. +// +// The web view is configured with no scroll, no bounce, no selection, and +// no navigation — a pure render surface for the self-contained HTML story. +// + +#if os(iOS) + +import UIKit +import WebKit + +protocol StoryPlayerViewDelegate: AnyObject { + /// Story finished playing (reached the last scene). + func storyPlayerDidFinish(_ player: StoryPlayerView) + /// User tapped the thumbnail card in inline mode. + func storyPlayerDidRequestFullScreen(_ player: StoryPlayerView) +} + +final class StoryPlayerView: UIView { + + weak var delegate: StoryPlayerViewDelegate? + + /// Whether this instance is in inline (scaled thumbnail) mode. + /// When true, user interaction on the web view is disabled and a tap + /// gesture on the container opens full-screen instead. + var isInlineMode: Bool = true { + didSet { configureInteraction() } + } + + // MARK: - Subviews + + private let webView: WKWebView = { + let config = WKWebViewConfiguration() + config.allowsInlineMediaPlayback = true + config.mediaTypesRequiringUserActionForPlayback = [] + config.suppressesIncrementalRendering = true + + let prefs = WKWebpagePreferences() + prefs.allowsContentJavaScript = true + config.defaultWebpagePreferences = prefs + + let wv = WKWebView(frame: .zero, configuration: config) + wv.isOpaque = false + wv.backgroundColor = .clear + wv.scrollView.isScrollEnabled = false + wv.scrollView.bounces = false + wv.scrollView.showsVerticalScrollIndicator = false + wv.scrollView.showsHorizontalScrollIndicator = false + // Remove any selection / callout menu + wv.allowsLinkPreview = false + return wv + }() + + /// Overlay for inline tap gesture. + private let tapOverlay = UIView() + + // MARK: - State + + private var currentScene = 0 + private var totalScenes = 0 + private var isPlaying = false + + // MARK: - Init + + override init(frame: CGRect) { + super.init(frame: frame) + setup() + } + + required init?(coder: NSCoder) { + super.init(coder: coder) + setup() + } + + private func setup() { + clipsToBounds = true + layer.cornerRadius = 16 + + webView.translatesAutoresizingMaskIntoConstraints = false + addSubview(webView) + NSLayoutConstraint.activate([ + webView.topAnchor.constraint(equalTo: topAnchor), + webView.leadingAnchor.constraint(equalTo: leadingAnchor), + webView.trailingAnchor.constraint(equalTo: trailingAnchor), + webView.bottomAnchor.constraint(equalTo: bottomAnchor), + ]) + + // Tap overlay for inline mode + tapOverlay.translatesAutoresizingMaskIntoConstraints = false + tapOverlay.backgroundColor = .clear + addSubview(tapOverlay) + NSLayoutConstraint.activate([ + tapOverlay.topAnchor.constraint(equalTo: topAnchor), + tapOverlay.leadingAnchor.constraint(equalTo: leadingAnchor), + tapOverlay.trailingAnchor.constraint(equalTo: trailingAnchor), + tapOverlay.bottomAnchor.constraint(equalTo: bottomAnchor), + ]) + + let inlineTap = UITapGestureRecognizer(target: self, action: #selector(handleInlineTap)) + tapOverlay.addGestureRecognizer(inlineTap) + + // Full-screen tap zones + let fullTap = UITapGestureRecognizer(target: self, action: #selector(handleFullScreenTap(_:))) + webView.addGestureRecognizer(fullTap) + + configureInteraction() + } + + private func configureInteraction() { + if isInlineMode { + tapOverlay.isHidden = false + webView.isUserInteractionEnabled = false + } else { + tapOverlay.isHidden = true + webView.isUserInteractionEnabled = true + } + } + + // MARK: - Loading + + /// Load a story from a local HTML file URL. + func loadStory(fileURL: URL) { + webView.loadFileURL(fileURL, allowingReadAccessTo: fileURL.deletingLastPathComponent()) + // After loading, the story auto-plays via embedded JS. + DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) { [weak self] in + self?.querySceneState() + } + } + + /// Load a story from raw HTML string. + func loadStoryHTML(_ html: String) { + webView.loadHTMLString(html, baseURL: nil) + DispatchQueue.main.asyncAfter(deadline: .now() + 0.5) { [weak self] in + self?.querySceneState() + } + } + + // MARK: - Navigation + + func advance() { + webView.evaluateJavaScript("window.StoryBridge && window.StoryBridge.advance()") { [weak self] _, _ in + self?.querySceneState() + } + } + + func retreat() { + webView.evaluateJavaScript("window.StoryBridge && window.StoryBridge.retreat()") { [weak self] _, _ in + self?.querySceneState() + } + } + + func goToScene(_ index: Int) { + webView.evaluateJavaScript("window.StoryBridge && window.StoryBridge.goToScene(\(index))") { [weak self] _, _ in + self?.querySceneState() + } + } + + func startAutoPlay() { + isPlaying = true + webView.evaluateJavaScript("window.StoryBridge && window.StoryBridge.startAutoPlay()", completionHandler: nil) + } + + func stopAutoPlay() { + isPlaying = false + webView.evaluateJavaScript("window.StoryBridge && window.StoryBridge.stopAutoPlay()", completionHandler: nil) + } + + // MARK: - Private + + private func querySceneState() { + webView.evaluateJavaScript(""" + (function() { + if (!window.StoryBridge) return JSON.stringify({current: 0, total: 0}); + return JSON.stringify({ + current: window.StoryBridge.getCurrentScene(), + total: window.StoryBridge.getTotalScenes() + }); + })() + """) { [weak self] result, _ in + guard let self = self, + let jsonStr = result as? String, + let data = jsonStr.data(using: .utf8), + let dict = try? JSONSerialization.jsonObject(with: data) as? [String: Int] else { return } + self.currentScene = dict["current"] ?? 0 + self.totalScenes = dict["total"] ?? 0 + if self.currentScene >= self.totalScenes - 1 { + self.delegate?.storyPlayerDidFinish(self) + } + } + } + + // MARK: - Gestures + + @objc private func handleInlineTap() { + delegate?.storyPlayerDidRequestFullScreen(self) + } + + @objc private func handleFullScreenTap(_ gesture: UITapGestureRecognizer) { + let location = gesture.location(in: self) + let threshold = bounds.width * 0.3 + if location.x < threshold { + retreat() + } else { + advance() + } + } + + // MARK: - Scaling for inline mode + + /// Apply a transform to fit the 1080×1920 content into the given container + /// size. Call this after layout if using inline mode. + func applyInlineScale(containerSize: CGSize) { + let storyWidth: CGFloat = 1080 + let storyHeight: CGFloat = 1920 + let scaleX = containerSize.width / storyWidth + let scaleY = containerSize.height / storyHeight + let scale = min(scaleX, scaleY) + + webView.transform = CGAffineTransform(scaleX: scale, y: scale) + webView.frame = CGRect( + x: (containerSize.width - storyWidth * scale) / 2, + y: (containerSize.height - storyHeight * scale) / 2, + width: storyWidth, + height: storyHeight + ) + // Reset transform origin + webView.layer.anchorPoint = CGPoint(x: 0, y: 0) + webView.transform = CGAffineTransform(scaleX: scale, y: scale) + } + + /// Simpler approach: use CSS viewport scaling. The HTML is 1080×1920 and + /// WKWebView will scale to fit via pageZoom. + func applyContentScale(for containerSize: CGSize) { + let zoom = containerSize.width / 1080.0 + webView.pageZoom = zoom + } + + override func layoutSubviews() { + super.layoutSubviews() + if isInlineMode { + applyContentScale(for: bounds.size) + } + } +} + +#endif diff --git a/LoopIOS/Stories/StorySkill.swift b/LoopIOS/Stories/StorySkill.swift new file mode 100644 index 0000000..6e26767 --- /dev/null +++ b/LoopIOS/Stories/StorySkill.swift @@ -0,0 +1,198 @@ +// +// StorySkill.swift +// Loop +// +// Skill definition for the Stories feature. Exposes `generate_story` as a +// tool the model can call to produce an HTML infographic from structured +// data. Mirrors the PDFSkill interface (static `tools` + `handles` + +// `handle`) so it slots into the AgentHarness catalog and SkillDispatcher +// exactly like every other bundled skill. +// +// Submit-and-return: the render is kicked off on StoryGenerationService and +// a function-result is returned immediately so the model can write a short +// ack while the WKWebView renders. The story card swaps into the chat via +// StorySkillHost when the render completes. +// + +#if os(iOS) || os(macOS) + +import Foundation + +/// Skill that generates HTML story infographics. Registers as a tool the LLM +/// can call via the SkillDispatcher. +final class StorySkill { + static let shared = StorySkill() + private init() {} + + static let systemPromptFragment: String = """ +You can generate a visual, full-screen "story" — a 1080×1920 portrait HTML infographic with animated scenes and tap-to-advance navigation — using the generate_story tool. It renders as a tappable card in chat that opens a full-screen, Instagram-style story player. + +When to call: +- The user asks for a "story", a "recap", a visual summary, a shareable card, an "infographic", or "show me X as a story". +- You've just gathered data worth presenting visually (a day's activity, a workout, a set of stats, highlights) and a poster-style visual beats a wall of text. + +Sequencing (important — avoid duplicate cards): +- Gather ALL the data you need FIRST (health queries, web lookups, etc.), THEN call generate_story exactly once with the finished payload. Do NOT call it speculatively before you have the numbers and then call it again — every call drops a separate card in the chat. +- One generate_story call per user request. If you realize you need more data after starting, fetch the data and only THEN make the single call. + +Templates and their EXACT `data` schema — match these key names precisely, or fields render blank: + +`DailyRecap` — general day/period summary (stats + goals + highlights). `data` shape: +{ + "emoji": "📊", + "title": "Your Tuesday", + "subtitle": "June 3, 2025", + "stats": [ { "emoji": "✉️", "value": "47", "label": "Emails" }, … ], // value can be string or number + "goals": [ { "label": "Deep Work", "current": 3.5, "target": 4 }, … ], // current & target are NUMBERS; % is computed + "highlights": [ { "emoji": "🚀", "text": "Shipped the stories feature" }, … ] +} + +`ActivitySummary` — fitness/activity data (metrics grid + progress ring + timeline). `data` shape: +{ + "icon": "🥾", + "title": "Morning Hike", + "subtitle": "Mt. Tamalpais", + "metrics": [ { "emoji": "📏", "label": "Distance", "value": "12.4", "unit": "mi" }, … ], + "goal": { "current": 12.4, "target": 10, "label": "distance goal", "caption": "Exceeded by 24%!" }, // ONE object; numbers + "timeline": [ { "time": "6:30 AM", "text": "Started at the trailhead" }, … ] +} + +How to write `data`: +- Use EXACTLY the keys above. For goals/ring progress, send numeric `current` and `target` (not a pre-computed "%" string) — the template computes the percentage. Highlights/timeline entries are objects with a `text` field, not bare strings. +- Keep it tight — a story is glanceable: ~4 stats/metrics, 2–3 goals, 3–5 highlights. Not a report. + +Rules: +- One story per call. After it renders, write a short conversational reply — don't repeat the story's contents back at the user; just let them know it's ready to tap. +""" + + static let tools: [[String: Any]] = [ + [ + "type": "function", + "function": [ + "name": "generate_story", + "description": "Generate a visual HTML story infographic (1080×1920 portrait, animated, tap-to-advance scenes). Renders as a tappable card in chat that opens a full-screen story player. Use when the user wants a 'story', a visual recap, a shareable summary card, or an infographic.", + "parameters": [ + "type": "object", + "properties": [ + "title": [ + "type": "string", + "description": "Title for the story, shown on the card and first scene. Concise — 'Your Tuesday', 'Morning Run', not a full sentence.", + ], + "template": [ + "type": "string", + "enum": ["DailyRecap", "ActivitySummary"], + "description": "Which template to use. DailyRecap for general summaries with stats/goals/highlights. ActivitySummary for fitness/activity data with metrics, a ring chart, and a timeline.", + ], + "data": [ + "type": "object", + "description": "JSON data object to fill into the template. Shape depends on the chosen template — stats/goals/highlights for DailyRecap, metrics/ring/timeline for ActivitySummary.", + ], + ] as [String: Any], + "required": ["title", "template", "data"], + ] as [String: Any], + ] as [String: Any], + ] + ] + + static let toolNames: Set = ["generate_story"] + + func handles(functionName: String) -> Bool { + return StorySkill.toolNames.contains(functionName) + } + + func statusText(for call: FunctionCallStruct) -> String? { + switch call.name { + case "generate_story": + if let t = (call.arguments["title"] as? String)? + .trimmingCharacters(in: .whitespacesAndNewlines), !t.isEmpty { + return "creating \(StorySkill.truncate(t, to: 50)) story" + } + return "creating story" + default: + return nil + } + } + + // MARK: - Dispatch + + func handle(functionCall: FunctionCallStruct, + completion: @escaping (MessageStruct) -> Void) { + switch functionCall.name { + case "generate_story": + guard let title = (functionCall.arguments["title"] as? String)? + .trimmingCharacters(in: .whitespacesAndNewlines), + !title.isEmpty else { + completion(MessageStruct( + role: "function", + content: "I need a `title` to call generate_story.", + name: "generate_story" + )) + return + } + guard let templateStr = functionCall.arguments["template"] as? String, + let template = StoryAttachment.Template(rawValue: templateStr) else { + completion(MessageStruct( + role: "function", + content: "I need a valid `template`: 'DailyRecap' or 'ActivitySummary'.", + name: "generate_story" + )) + return + } + guard let data = functionCall.arguments["data"] as? [String: Any] else { + completion(MessageStruct( + role: "function", + content: "I need a `data` object to fill into the template.", + name: "generate_story" + )) + return + } + generateStory(title: title, + template: template, + data: data, + completion: completion) + default: + completion(MessageStruct( + role: "assistant", + content: "I don't know how to handle the story tool '\(functionCall.name)'." + )) + } + } + + // MARK: - Tool handler + + /// Submit-and-return: kick the render off and synthesize a function + /// result immediately so the model can write its short ack while the + /// WKWebView is rendering. The story card swaps in via the host callbacks + /// when the render completes (or fails). + private func generateStory(title: String, + template: StoryAttachment.Template, + data: [String: Any], + completion: @escaping (MessageStruct) -> Void) { + // Pin the render to whichever conversation is active *now* so a + // tab-switch between submit and finish doesn't drop the card in the + // wrong place. + let convId = SimpleConversationManager.shared.currentConversation?.id + let attachment = StoryGenerationService.shared.submit( + title: title, + template: template, + data: data, + conversationId: convId + ) + let summary = "Story generation queued (id: \(attachment.id), template: \(template.rawValue)). The story card will appear inline shortly. Acknowledge briefly to the user; do not wait for the render." + completion(MessageStruct( + role: "function", + content: summary, + name: "generate_story" + )) + } + + // MARK: - Helpers + + private static func truncate(_ s: String, to max: Int) -> String { + if s.count <= max { return s } + let idx = s.index(s.startIndex, offsetBy: max) + return String(s[.. + + + + + + + + + +
+
🏃‍♂️
+
Activity
+
Your workout summary
+
+ + +
+
+
+ + +
+
+ + + + +
+
0%
+
of goal
+
+
+
+
+ + +
+
+

🕐 Timeline

+
+
+
+ + +
+ + + + diff --git a/LoopIOS/Stories/Templates/DailyRecap.html b/LoopIOS/Stories/Templates/DailyRecap.html new file mode 100644 index 0000000..1bd2cdd --- /dev/null +++ b/LoopIOS/Stories/Templates/DailyRecap.html @@ -0,0 +1,370 @@ + + + + + + + + + + +
+
📊
+

Daily Recap

+
Here's your day at a glance
+
+ + +
+
+
+ + +
+
+

Goals Progress

+
+
+
+ + +
+
+

✨ Highlights

+
+
+
+ + +
+ + + + diff --git a/LoopIOS/Structs/Messaging.swift b/LoopIOS/Structs/Messaging.swift index 884bce2..40a6b86 100644 --- a/LoopIOS/Structs/Messaging.swift +++ b/LoopIOS/Structs/Messaging.swift @@ -138,6 +138,20 @@ struct MessageStruct { /// `show_places_on_map`. Cell renders an inline MKMapView with annotations /// that callout to Apple Maps. Synchronous — no `.generating` lifecycle. var mapAttachment: MapAttachment? = nil + /// Gallery of web image-search results, set by `SerpImageSearchSkill` when + /// the model calls `image_search`. Cell renders a horizontal thumbnail + /// strip; tap opens the full image. Synchronous — no `.generating` + /// lifecycle (mirrors `mapAttachment`). + var imageGalleryAttachment: ImageGalleryAttachment? = nil + /// HTML story infographic (1080×1920 portrait). Set by StoryGenerationService + /// when the model calls `generate_story`. Renders as a scaled card in chat; + /// tap opens the full-screen story player with tap-to-advance navigation. + var storyAttachment: StoryAttachment? = nil + /// Live/replayable browse session (WebKit driver). Set by + /// BrowseGenerationService when the model calls `browse`. Renders as a live + /// preview card in chat; tap opens the full-screen read-only live view, and + /// once the session ends the card flips into a scrubbable replay. + var browseAttachment: BrowseAttachment? = nil /// Set when this message belongs to the conversational onboarding flow. /// Drives `MessagingCell` to render an interactive card (text field, /// choice buttons, key paste, etc.) under the message body. Onboarding @@ -164,6 +178,16 @@ struct MessageStruct { /// name (e.g. "GPT-5.5 2.6s"). `nil` for on-device Apple responses, /// non-streaming paths, and older persisted messages. var ttft: TimeInterval? = nil + /// When this message was posted. Defaults to the moment the struct is + /// created (i.e. when a live message is sent/received); reload overrides it + /// with the persisted `SimpleMessage.createdAt` in `messageStruct(from:)`. + /// Surfaced by the iMessage-style swipe-left timestamp reveal in the chat. + var timestamp: Date = Date() + + /// Speech-to-text engine that produced this message when it was dictated + /// ("Deepgram STT" / "Apple STT"). `nil` for typed messages and any + /// non-voice path. Surfaced as a small byline under the user's bubble. + var sttEngine: String? = nil /// Explicit init that still accepts `function:` as a singular optional — /// keeps existing call sites compiling now that `function` is a computed @@ -182,10 +206,15 @@ struct MessageStruct { pdfAttachment: PDFAttachment? = nil, fileAttachment: FileAttachment? = nil, mapAttachment: MapAttachment? = nil, + imageGalleryAttachment: ImageGalleryAttachment? = nil, + storyAttachment: StoryAttachment? = nil, + browseAttachment: BrowseAttachment? = nil, onboardingCard: OnboardingCardKind? = nil, reasoningContent: String? = nil, tokenUsage: TokenUsage? = nil, - ttft: TimeInterval? = nil) { + ttft: TimeInterval? = nil, + timestamp: Date = Date(), + sttEngine: String? = nil) { self.id = id self.role = role self.content = content @@ -205,10 +234,15 @@ struct MessageStruct { self.pdfAttachment = pdfAttachment self.fileAttachment = fileAttachment self.mapAttachment = mapAttachment + self.imageGalleryAttachment = imageGalleryAttachment + self.storyAttachment = storyAttachment + self.browseAttachment = browseAttachment self.onboardingCard = onboardingCard self.reasoningContent = reasoningContent self.tokenUsage = tokenUsage self.ttft = ttft + self.timestamp = timestamp + self.sttEngine = sttEngine } /// Generic JSON representation of the message. Provider-specific chat @@ -280,6 +314,16 @@ struct FileAttachment: Codable { /// cap (`extractedTextCharCap`) to keep the chat payload reasonable. Nil /// for images (Vision OCR runs but may yield nothing) and `.generic`. var extractedText: String? + /// For `.image` kind only: a one-time, model-generated prose description of + /// the image, produced lazily in the background by `VisionSummaryService` + /// after the image's first send. Once present, the local chat clients stop + /// re-sending the raw base64 image on subsequent turns and inline this + /// description instead (see `AnthropicChat.wirePayload` / `OpenAIChat`), + /// which saves the image's input tokens on every turn after the first. + /// Nil until generated (or if generation failed and no OCR fallback). Being + /// `Optional`, it decodes to nil for messages persisted before this field + /// existed — no migration required. + var visionSummary: String? init(id: String = UUID().uuidString, fileURL: URL, @@ -289,7 +333,8 @@ struct FileAttachment: Codable { languageTag: String? = nil, status: Status = .ready, failureReason: String? = nil, - extractedText: String? = nil) { + extractedText: String? = nil, + visionSummary: String? = nil) { self.id = id self.fileURL = fileURL self.fileName = fileName @@ -299,6 +344,26 @@ struct FileAttachment: Codable { self.status = status self.failureReason = failureReason self.extractedText = extractedText + self.visionSummary = visionSummary + } + + /// The attachment's file re-anchored to the *current* workspace root. + /// + /// `fileURL` is persisted as an absolute path, but on iOS the app's data + /// container UUID changes between installs / rebuilds, so a `fileURL` saved + /// in a previous container points at a directory that no longer exists. + /// Every attachment lives at `/attachments/`, so when the + /// stored absolute path is missing we re-derive it under the live + /// `Workspace.shared.rootURL`. Use this — not `fileURL` — anywhere the + /// bytes are actually read (open / preview / thumbnail / share); `fileURL` + /// stays the source of truth for persistence and the assistant hint. + var resolvedFileURL: URL { + let fm = FileManager.default + if fm.fileExists(atPath: fileURL.path) { return fileURL } + let reAnchored = Workspace.shared.rootURL + .appendingPathComponent("attachments", isDirectory: true) + .appendingPathComponent(fileURL.lastPathComponent) + return fm.fileExists(atPath: reAnchored.path) ? reAnchored : fileURL } /// Maximum number of characters from `extractedText` to inline in a @@ -327,6 +392,24 @@ struct FileAttachment: Codable { hint(header: "[Attached file: \(fileName) (\(kindLabel)) — saved in your workspace at \(workspaceRelPath). Open/read it to view its contents.]") } + /// Text block that replaces the raw base64 image on every turn *after* the + /// one that introduced it. The model saw the actual image on the turn it + /// was sent; on later turns we hand it this `visionSummary` instead so we + /// don't re-transmit the image's tokens. Returns nil when no summary has + /// been generated yet — callers then fall back to re-sending the raw image + /// so the model never silently loses sight of it. See + /// `AnthropicChat.wirePayload` / `OpenAIChat`. + var imageSummaryHint: String? { + guard kind == .image, let summary = visionSummary, !summary.isEmpty else { return nil } + let workspaceRelative = fileURL.lastPathComponent + return """ + [Image: \(fileName) — shown to you in full on an earlier turn; a text description follows so it needn't be re-sent. Original at workspace://attachments/\(workspaceRelative).] + [Image description begin] + \(summary) + [Image description end] + """ + } + /// Short label for this attachment's kind, used in the hint header. private var kindLabel: String { switch kind { @@ -497,6 +580,40 @@ struct MapAttachment: Codable, Equatable { } } +/// Set of remote images returned by `SerpImageSearchSkill` (Google Images via +/// SerpAPI). Rendered as a horizontal thumbnail gallery in the chat cell; +/// tapping a thumbnail opens the full-resolution image. Synchronous — the +/// search returns all URLs in one call, so (like `MapAttachment`) there is no +/// `.generating` lifecycle. Thumbnails load lazily inside the cell. +struct ImageGalleryAttachment: Codable, Equatable { + struct Item: Codable, Equatable { + /// Small image URL used for the thumbnail grid. + let thumbnailURL: String + /// Full-resolution image URL opened on tap. + let originalURL: String + /// Source page the image was found on (for attribution / "view source"). + let sourceLink: String? + /// Short image title from the search result. + let title: String? + } + + let id: String + /// The search query that produced these results (shown as a caption). + let query: String + let items: [Item] + var conversationId: String? + + init(id: String = UUID().uuidString, + query: String, + items: [Item], + conversationId: String? = nil) { + self.id = id + self.query = query + self.items = items + self.conversationId = conversationId + } +} + /// Interactive card rendered under an onboarding message in `MessagingCell`. /// The script (in `OnboardingCoordinator`) drives which card to attach to each /// turn; the cell switches on the case to build the right UI. Equatable so the @@ -555,7 +672,9 @@ var tools: [[String: Any]] = { all += NotionSkill.tools all += SlackSkill.tools all += SchedulerSkill.tools + all += VMCronSkill.tools all += ExaSkill.tools + all += SerpImageSearchSkill.tools all += URLFetchSkill.tools all += GitSkill.tools all += GitHubSkill.tools @@ -564,6 +683,7 @@ var tools: [[String: Any]] = { all += SpecBuilderSkill.tools all += LocationSkill.tools all += MapsSkill.tools + all += GeocodingSkill.tools all += ImageSkill.tools all += PDFSkill.tools all += ObsidianSkill.tools @@ -578,9 +698,20 @@ var tools: [[String: Any]] = { all += TwitterSkill.tools all += SSHSkill.tools all += MuniRealtimeSkill.tools + all += CardSkill.tools + all += GoogleDriveSkill.tools + all += GoogleGmailSkill.tools + all += GoogleCalendarSkill.tools + all += AgentMailSkill.tools #if canImport(HealthKit) && os(iOS) all += HealthSkill.tools #endif + #if os(iOS) || os(macOS) + all += StorySkill.tools + #endif + #if os(iOS) + all += BrowseSkill.tools + #endif // Dynamic, user-authored skills get appended in AgentHarness at every // chat turn so newly hot-loaded skills become visible without restart. return all diff --git a/LoopIOS/SubAgents/SubAgentRuntime.swift b/LoopIOS/SubAgents/SubAgentRuntime.swift index f7dc329..62514d8 100644 --- a/LoopIOS/SubAgents/SubAgentRuntime.swift +++ b/LoopIOS/SubAgents/SubAgentRuntime.swift @@ -392,6 +392,9 @@ enum SubAgentRuntime { if let s = ObsidianSkill.shared.statusText(for: call) { return s } if let s = CalendarSkill.shared.statusText(for: call) { return s } if let s = SSHSkill.shared.statusText(for: call) { return s } + if let s = GoogleDriveSkill.shared.statusText(for: call) { return s } + if let s = GoogleGmailSkill.shared.statusText(for: call) { return s } + if let s = GoogleCalendarSkill.shared.statusText(for: call) { return s } return nil } diff --git a/LoopIOS/TriggerIntent.swift b/LoopIOS/TriggerIntent.swift index f966974..f64d7fb 100644 --- a/LoopIOS/TriggerIntent.swift +++ b/LoopIOS/TriggerIntent.swift @@ -49,5 +49,54 @@ struct LoopAppShortcuts: AppShortcutsProvider { shortTitle: "Start Dictation", systemImageName: "mic.fill" ) + + // iOS 27+ App Intents — guarded at the shortcut level so the + // provider compiles on older SDKs but the entries only appear + // when running on iOS 27. + if #available(iOS 27.0, *) { + AppShortcut( + intent: AskLoopIntent(), + phrases: [ + "Ask \(.applicationName)", + "Hey \(.applicationName)", + "Tell \(.applicationName)" + ], + shortTitle: "Ask Loop", + systemImageName: "bubble.left.fill" + ) + + AppShortcut( + intent: CaptureToLoopIntent(), + phrases: [ + "Send this to \(.applicationName)", + "Capture this with \(.applicationName)", + "Share screen with \(.applicationName)" + ], + shortTitle: "Send to Loop", + systemImageName: "camera.viewfinder" + ) + + AppShortcut( + intent: LoopRememberIntent(), + phrases: [ + "Remember with \(.applicationName)", + "\(.applicationName) remember this", + "Note in \(.applicationName)" + ], + shortTitle: "Remember with Loop", + systemImageName: "note.text" + ) + + AppShortcut( + intent: SearchLoopIntent(), + phrases: [ + "Search \(.applicationName)", + "Find in \(.applicationName)", + "Search with \(.applicationName)" + ], + shortTitle: "Search Loop", + systemImageName: "magnifyingglass" + ) + } } } diff --git a/LoopIOSTests/AnthropicStreamReaderTests.swift b/LoopIOSTests/AnthropicStreamReaderTests.swift new file mode 100644 index 0000000..2f4bacc --- /dev/null +++ b/LoopIOSTests/AnthropicStreamReaderTests.swift @@ -0,0 +1,182 @@ +// +// AnthropicStreamReaderTests.swift +// LoopIOSTests +// +// Tests for AnthropicStreamReader: tool-call assembly, onDelta suppression +// once tool_use blocks are detected, and content accumulation. +// + +import XCTest +@testable import Loop + +final class AnthropicStreamReaderTests: XCTestCase { + + // MARK: - Helpers + + /// Feed SSE event lines to an AnthropicStreamReader and return the result. + private func feedLines(_ lines: [String], + collectDeltas: Bool = false, + file: StaticString = #file, + line: UInt = #line) -> (result: AnthropicStreamReader.Result?, deltas: [String]) { + let expectation = expectation(description: "Anthropic completion") + var captured: AnthropicStreamReader.Result? + var deltas: [String] = [] + + let metrics = InferenceMetrics(provider: "Test", model: "test-model", toolCount: 0) + let reader = AnthropicStreamReader( + metrics: metrics, + onDelta: collectDeltas ? { delta in deltas.append(delta) } : nil + ) { result in + if case .success(let r) = result { + captured = r + } + expectation.fulfill() + } + + let session = URLSession.shared + let fakeTask = session.dataTask(with: URLRequest(url: URL(string: "https://example.com")!)) + + for ssePayload in lines { + let data = (ssePayload + "\n").data(using: .utf8)! + reader.urlSession(session, dataTask: fakeTask, didReceive: data) + } + reader.urlSession(session, task: fakeTask, didCompleteWithError: nil) + + wait(for: [expectation], timeout: 2) + return (captured, deltas) + } + + private func event(_ type: String, json: [String: Any]) -> [String] { + let data = try! JSONSerialization.data(withJSONObject: json) + let payload = String(data: data, encoding: .utf8)! + return ["event: \(type)", "data: \(payload)"] + } + + // MARK: - Tests + + func testTextOnlyContentAssembly() { + let lines: [String] = + event("message_start", json: ["message": ["usage": ["input_tokens": 10]]]) + + event("content_block_start", json: ["index": 0, "content_block": ["type": "text", "text": ""]]) + + event("content_block_delta", json: ["index": 0, "delta": ["type": "text_delta", "text": "Hello"]]) + + event("content_block_delta", json: ["index": 0, "delta": ["type": "text_delta", "text": " world"]]) + + event("content_block_stop", json: ["index": 0]) + + event("message_stop", json: [:]) + + let (result, _) = feedLines(lines) + XCTAssertEqual(result?.content, "Hello world") + XCTAssertTrue(result?.toolCalls.isEmpty ?? true) + } + + func testToolCallAssembly() { + let lines: [String] = + event("message_start", json: ["message": ["usage": ["input_tokens": 5]]]) + + event("content_block_start", json: ["index": 0, "content_block": ["type": "tool_use", "id": "call_123", "name": "exa_search"]]) + + event("content_block_delta", json: ["index": 0, "delta": ["type": "input_json_delta", "partial_json": "{\"query\":"]]) + + event("content_block_delta", json: ["index": 0, "delta": ["type": "input_json_delta", "partial_json": "\"test\"}"]]) + + event("content_block_stop", json: ["index": 0]) + + event("message_stop", json: [:]) + + let (result, _) = feedLines(lines) + XCTAssertEqual(result?.toolCalls.count, 1) + XCTAssertEqual(result?.toolCalls.first?.name, "exa_search") + XCTAssertEqual(result?.toolCalls.first?.callId, "call_123") + XCTAssertEqual(result?.toolCalls.first?.arguments["query"] as? String, "test") + } + + func testTextBeforeToolCallSuppressedFromDelta() { + // Claude emits a text block ("I'll search for that") then a tool_use. + // The text should accumulate in content but NOT fire onDelta once + // the tool_use block starts (preventing leakage into the streaming bubble). + let lines: [String] = + event("message_start", json: ["message": ["usage": ["input_tokens": 5]]]) + + event("content_block_start", json: ["index": 0, "content_block": ["type": "text", "text": ""]]) + + event("content_block_delta", json: ["index": 0, "delta": ["type": "text_delta", "text": "Searching"]]) + + event("content_block_stop", json: ["index": 0]) + + event("content_block_start", json: ["index": 1, "content_block": ["type": "tool_use", "id": "call_abc", "name": "search"]]) + + // Any text delta after tool_use starts should not fire onDelta + event("content_block_delta", json: ["index": 1, "delta": ["type": "input_json_delta", "partial_json": "{\"q\":\"x\"}"]]) + + event("content_block_stop", json: ["index": 1]) + + event("message_stop", json: [:]) + + let (result, deltas) = feedLines(lines, collectDeltas: true) + + // Content buffer has the pre-tool text + XCTAssertEqual(result?.content, "Searching") + // Tool call is assembled + XCTAssertEqual(result?.toolCalls.count, 1) + XCTAssertEqual(result?.toolCalls.first?.name, "search") + // The text delta "Searching" DID fire because it arrived before the tool_use block + XCTAssertEqual(deltas, ["Searching"]) + } + + func testTextDeltaAfterToolUseStartSuppressed() { + // Edge case: a text_delta arrives on a text block AFTER a tool_use + // block has already started (shouldn't normally happen, but tests + // the guard). The delta should NOT fire onDelta. + let lines: [String] = + event("message_start", json: ["message": ["usage": ["input_tokens": 5]]]) + + event("content_block_start", json: ["index": 0, "content_block": ["type": "tool_use", "id": "call_1", "name": "tool"]]) + + event("content_block_delta", json: ["index": 0, "delta": ["type": "input_json_delta", "partial_json": "{}"]]) + + event("content_block_stop", json: ["index": 0]) + + // Hypothetical late text block + event("content_block_start", json: ["index": 1, "content_block": ["type": "text", "text": ""]]) + + event("content_block_delta", json: ["index": 1, "delta": ["type": "text_delta", "text": "leaked"]]) + + event("content_block_stop", json: ["index": 1]) + + event("message_stop", json: [:]) + + let (result, deltas) = feedLines(lines, collectDeltas: true) + + // Content buffer captured it (for the disclosure prose) + XCTAssertEqual(result?.content, "leaked") + // But it was NOT streamed live + XCTAssertTrue(deltas.isEmpty) + } + + func testUsageParsing() { + let lines: [String] = + event("message_start", json: ["message": ["usage": ["input_tokens": 100]]]) + + event("content_block_start", json: ["index": 0, "content_block": ["type": "text", "text": ""]]) + + event("content_block_delta", json: ["index": 0, "delta": ["type": "text_delta", "text": "Hi"]]) + + event("content_block_stop", json: ["index": 0]) + + event("message_delta", json: ["usage": ["output_tokens": 50]]) + + event("message_stop", json: [:]) + + let (result, _) = feedLines(lines) + XCTAssertEqual(result?.usage?.promptTokens, 100) + XCTAssertEqual(result?.usage?.completionTokens, 50) + XCTAssertEqual(result?.usage?.totalTokens, 150) + } + + func testErrorMidStreamFiresFailure() { + let expectation = expectation(description: "Error completion") + var capturedError: Error? + + let metrics = InferenceMetrics(provider: "Test", model: "test-model", toolCount: 0) + let reader = AnthropicStreamReader(metrics: metrics) { result in + if case .failure(let error) = result { + capturedError = error + } + expectation.fulfill() + } + + let session = URLSession.shared + let fakeTask = session.dataTask(with: URLRequest(url: URL(string: "https://example.com")!)) + + // Feed some text + let textLines = event("content_block_start", json: ["index": 0, "content_block": ["type": "text", "text": ""]]) + + event("content_block_delta", json: ["index": 0, "delta": ["type": "text_delta", "text": "Partial"]]) + for line in textLines { + let data = (line + "\n").data(using: .utf8)! + reader.urlSession(session, dataTask: fakeTask, didReceive: data) + } + + // Simulate connectivity failure + let networkError = NSError(domain: NSURLErrorDomain, code: NSURLErrorNetworkConnectionLost) + reader.urlSession(session, task: fakeTask, didCompleteWithError: networkError) + + wait(for: [expectation], timeout: 2) + XCTAssertNotNil(capturedError) + XCTAssertEqual((capturedError as NSError?)?.code, NSURLErrorNetworkConnectionLost) + } +} diff --git a/LoopIOSTests/ImageSummaryDowngradeTests.swift b/LoopIOSTests/ImageSummaryDowngradeTests.swift new file mode 100644 index 0000000..a02ec95 --- /dev/null +++ b/LoopIOSTests/ImageSummaryDowngradeTests.swift @@ -0,0 +1,153 @@ +// +// ImageSummaryDowngradeTests.swift +// LoopIOSTests +// +// Verifies the image-token optimization in the local chat clients: an image +// attachment is sent at full resolution only on the human turn that +// introduced it. On every later turn, once a `visionSummary` exists, the raw +// base64 image is replaced by a text description — but if no summary exists +// yet (the fast-follow-up race), the image is still re-sent so the model never +// loses sight of it. +// +// Exercises both `AnthropicChat.wirePayload` and `OpenAIChat.wireMessages` +// without any network calls. +// + +import XCTest +@testable import Loop + +final class ImageSummaryDowngradeTests: XCTestCase { + + // A 1×1 transparent PNG — enough for `imageBlock`/`imageDataURL` to read + // real bytes off disk and emit a vision block. + private static let onePixelPNGBase64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" + + private var tempURL: URL! + + override func setUpWithError() throws { + let data = Data(base64Encoded: Self.onePixelPNGBase64)! + tempURL = FileManager.default.temporaryDirectory + .appendingPathComponent("img-summary-test-\(UUID().uuidString).png") + try data.write(to: tempURL) + } + + override func tearDownWithError() throws { + if let url = tempURL { try? FileManager.default.removeItem(at: url) } + } + + private func imageAttachment(summary: String? = nil) -> FileAttachment { + FileAttachment( + fileURL: tempURL, + fileName: "photo.png", + kind: .image, + mimeType: "image/png", + status: .ready, + visionSummary: summary + ) + } + + private func userImageMessage(id: String, summary: String? = nil) -> MessageStruct { + var m = MessageStruct(id: id, role: "user", content: "What's in this image?") + m.fileAttachment = imageAttachment(summary: summary) + return m + } + + // MARK: - Anthropic + + /// Counts `{"type":"image"}` content blocks across all wire turns. + private func anthropicImageBlockCount(_ wire: [[String: Any]]) -> Int { + wire.reduce(0) { acc, turn in + let blocks = (turn["content"] as? [[String: Any]]) ?? [] + return acc + blocks.filter { ($0["type"] as? String) == "image" }.count + } + } + + private func anthropicAllText(_ wire: [[String: Any]]) -> String { + wire.flatMap { ($0["content"] as? [[String: Any]]) ?? [] } + .compactMap { $0["text"] as? String } + .joined(separator: "\n") + } + + func testAnthropicRawImageOnIntroducingTurn() { + // The image is the last (and only) user turn → sent raw. + let messages = [userImageMessage(id: "img-turn")] + let (_, wire) = AnthropicChat.testableWirePayload(from: messages) + XCTAssertEqual(anthropicImageBlockCount(wire), 1, "Image on its own turn must be sent raw") + } + + func testAnthropicDowngradesPriorImageWithSummary() { + let messages = [ + userImageMessage(id: "img-turn", summary: "A red bicycle leaning on a brick wall."), + MessageStruct(role: "assistant", content: "It's a red bicycle."), + MessageStruct(role: "user", content: "What color is the wall?"), + ] + let (_, wire) = AnthropicChat.testableWirePayload(from: messages) + XCTAssertEqual(anthropicImageBlockCount(wire), 0, + "A summarized image on a prior turn must NOT re-send the raw image") + XCTAssertTrue(anthropicAllText(wire).contains("A red bicycle leaning on a brick wall."), + "The cached description should be inlined as text") + } + + func testAnthropicFallsBackToRawImageWhenNoSummaryYet() { + // Prior-turn image but no summary generated yet → still sent raw. + let messages = [ + userImageMessage(id: "img-turn", summary: nil), + MessageStruct(role: "assistant", content: "It's a red bicycle."), + MessageStruct(role: "user", content: "What color is the wall?"), + ] + let (_, wire) = AnthropicChat.testableWirePayload(from: messages) + XCTAssertEqual(anthropicImageBlockCount(wire), 1, + "Without a summary the image must still be re-sent (correctness over savings)") + } + + // MARK: - OpenAI + + /// Counts `image_url` content parts across all wire turns. + private func openAIImageURLCount(_ wire: [[String: Any]]) -> Int { + wire.reduce(0) { acc, turn in + let parts = (turn["content"] as? [[String: Any]]) ?? [] + return acc + parts.filter { ($0["type"] as? String) == "image_url" }.count + } + } + + private func openAIAllText(_ wire: [[String: Any]]) -> String { + wire.compactMap { turn -> String? in + if let s = turn["content"] as? String { return s } + if let parts = turn["content"] as? [[String: Any]] { + return parts.compactMap { $0["text"] as? String }.joined(separator: "\n") + } + return nil + }.joined(separator: "\n") + } + + func testOpenAIRawImageOnIntroducingTurn() { + let messages = [userImageMessage(id: "img-turn")] + let wire = OpenAIChat.wireMessages(from: messages) + XCTAssertEqual(openAIImageURLCount(wire), 1, "Image on its own turn must ride inline") + } + + func testOpenAIDowngradesPriorImageWithSummary() { + let messages = [ + userImageMessage(id: "img-turn", summary: "A red bicycle leaning on a brick wall."), + MessageStruct(role: "assistant", content: "It's a red bicycle."), + MessageStruct(role: "user", content: "What color is the wall?"), + ] + let wire = OpenAIChat.wireMessages(from: messages) + XCTAssertEqual(openAIImageURLCount(wire), 0, + "A summarized image on a prior turn must NOT re-send the bytes") + XCTAssertTrue(openAIAllText(wire).contains("A red bicycle leaning on a brick wall."), + "The cached description should be inlined as text") + } + + func testOpenAIFallsBackToRawImageWhenNoSummaryYet() { + let messages = [ + userImageMessage(id: "img-turn", summary: nil), + MessageStruct(role: "assistant", content: "It's a red bicycle."), + MessageStruct(role: "user", content: "What color is the wall?"), + ] + let wire = OpenAIChat.wireMessages(from: messages) + XCTAssertEqual(openAIImageURLCount(wire), 1, + "Without a summary the image must still be re-sent") + } +} diff --git a/LoopIOSTests/MusicSkillTests.swift b/LoopIOSTests/MusicSkillTests.swift new file mode 100644 index 0000000..22f4eb9 --- /dev/null +++ b/LoopIOSTests/MusicSkillTests.swift @@ -0,0 +1,77 @@ +// +// MusicSkillTests.swift +// LoopIOSTests +// +// Tests for MusicSkill tool schema and dispatch validation. The queue- +// rebuild and playlist-creation paths need a live MusicKit session (device +// or simulator with Apple Music), so they are covered by the manual repro +// steps in the PR description. These tests verify the parts that can run +// without hardware. +// + +import XCTest +@testable import Loop + +final class MusicSkillTests: XCTestCase { + + private let skill = MusicSkill.shared + + // MARK: - Tool schema validation + + func testToolNamesMatchToolDefinitions() { + let definedNames = MusicSkill.tools.compactMap { tool -> String? in + (tool["function"] as? [String: Any])?["name"] as? String + } + XCTAssertEqual(Set(definedNames), MusicSkill.toolNames, + "toolNames must exactly match the names in the tools array") + } + + func testPlayMusicSchemaIncludesQueueMode() { + let playTool = MusicSkill.tools.first { tool in + (tool["function"] as? [String: Any])?["name"] as? String == "play_music" + } + let params = ((playTool?["function"] as? [String: Any])?["parameters"] as? [String: Any]) + let props = params?["properties"] as? [String: Any] + + XCTAssertNotNil(props?["queue_mode"], "play_music must expose queue_mode") + let qm = props?["queue_mode"] as? [String: Any] + let allowed = qm?["enum"] as? [String] + XCTAssertEqual(allowed, ["replace", "append"]) + } + + func testCreatePlaylistSchemaRequiresNameAndTrackIds() { + let tool = MusicSkill.tools.first { tool in + (tool["function"] as? [String: Any])?["name"] as? String == "create_playlist" + } + let params = ((tool?["function"] as? [String: Any])?["parameters"] as? [String: Any]) + let required = params?["required"] as? [String] + XCTAssertTrue(required?.contains("name") == true) + XCTAssertTrue(required?.contains("track_ids") == true) + } + + // MARK: - Dispatch routing + + func testHandlesRecognisesAllMusicTools() { + for name in MusicSkill.toolNames { + XCTAssertTrue(skill.handles(functionName: name), + "MusicSkill should handle \(name)") + } + XCTAssertFalse(skill.handles(functionName: "unknown_tool")) + } + + func testStatusTextForPlayMusicReturnsExpected() { + let call = FunctionCallStruct( + name: "play_music", + arguments: ["target_id": "12345", "target_type": "song", "queue_mode": "append"] + ) + XCTAssertEqual(skill.statusText(for: call), "starting playback") + } + + func testStatusTextForCreatePlaylistIncludesName() { + let call = FunctionCallStruct( + name: "create_playlist", + arguments: ["name": "Focus Mix", "track_ids": ["a", "b"]] + ) + XCTAssertEqual(skill.statusText(for: call), "creating playlist \"Focus Mix\"") + } +} diff --git a/LoopMac/ConversationWindowController.swift b/LoopMac/ConversationWindowController.swift index 22a11d9..df0cbe5 100644 --- a/LoopMac/ConversationWindowController.swift +++ b/LoopMac/ConversationWindowController.swift @@ -84,6 +84,15 @@ final class ConversationWindowController: NSWindowController, ConversationPresen private(set) var pdfAttachments: [String: PDFAttachment] = [:] private static let pdfMessageIdPrefix = "pdf-" + /// Story plumbing, parallel to image/PDF. Placeholder message persisted + /// under id `story-`; the live attachment (with its rendered + /// HTML file URL) lives in `storyAttachments` so a tab switch / reload can + /// re-render the card. `storyPlayer` retains the open full-screen player. + private var storyBubbles: [String: StoryBubbleView] = [:] + private(set) var storyAttachments: [String: StoryAttachment] = [:] + private static let storyMessageIdPrefix = "story-" + private var storyPlayer: StoryPlayerWindowController? + /// The markdown editor currently slid up over the chat pane, if any, plus /// the top constraint we animate to drive the vertical slide. private var markdownEditorVC: MarkdownEditorViewController? @@ -198,6 +207,20 @@ final class ConversationWindowController: NSWindowController, ConversationPresen name: .voiceLoopStateDidChange, object: nil ) + // Drop our retained story player when its window closes so it (and the + // WKWebView) deallocate. + NotificationCenter.default.addObserver( + self, + selector: #selector(storyPlayerDidClose(_:)), + name: .storyPlayerDidClose, + object: nil + ) + } + + @objc private func storyPlayerDidClose(_ note: Notification) { + if let closed = note.object as? StoryPlayerWindowController, closed === storyPlayer { + storyPlayer = nil + } } deinit { @@ -871,6 +894,7 @@ final class ConversationWindowController: NSWindowController, ConversationPresen // map survives so we can re-render their state below. imageBubbles.removeAll() pdfBubbles.removeAll() + storyBubbles.removeAll() lastRebuiltMessages = messages let manager = SimpleConversationManager.shared for (messageIndex, message) in messages.enumerated() { @@ -882,6 +906,8 @@ final class ConversationWindowController: NSWindowController, ConversationPresen let m = manager.messageStruct(from: message) if let mapAttachment = m.mapAttachment { stack.addArrangedSubview(makeMapRow(bubbleView: MapBubbleView(attachment: mapAttachment))) + } else if let gallery = m.imageGalleryAttachment { + stack.addArrangedSubview(makeImageGalleryRow(bubbleView: ImageGalleryBubbleView(attachment: gallery))) } else if let fileAttachment = m.fileAttachment { // role="assistant" routes the existing bubble factory // to the assistant-side alignment (leading edge, @@ -939,6 +965,25 @@ final class ConversationWindowController: NSWindowController, ConversationPresen onRetry: { [weak self] att in self?.retryPDF(attachment: att) }) pdfBubbles[attachmentId] = bubble stack.addArrangedSubview(makePDFRow(bubbleView: bubble)) + } else if message.role == "assistant", + message.id.hasPrefix(Self.storyMessageIdPrefix) { + // story-prefixed assistant rows mirror the image/PDF + // branches: the live attachment (with the rendered HTML + // file URL) lives in `storyAttachments`. On a cold relaunch + // before the render finished we surface the same recovery. + let attachmentId = String(message.id.dropFirst(Self.storyMessageIdPrefix.count)) + let attachment = storyAttachments[attachmentId] + ?? StoryAttachment(id: attachmentId, + title: message.content.isEmpty ? "Story" : message.content, + template: .dailyRecap, + jsonPayload: "{}", + status: .failed, + failureReason: "Story is no longer available — ask Loop to regenerate it.") + let bubble = StoryBubbleView(attachment: attachment, + onOpen: { [weak self] att in self?.openStory(attachment: att) }, + onRetry: { [weak self] att in self?.retryStory(attachment: att) }) + storyBubbles[attachmentId] = bubble + stack.addArrangedSubview(makeStoryRow(bubbleView: bubble)) } else { // Going through messageStruct gives us the decoded // FileAttachment (if any) without re-parsing JSON here. @@ -959,6 +1004,8 @@ final class ConversationWindowController: NSWindowController, ConversationPresen role: message.role)) } else if let mapAttachment = m.mapAttachment { stack.addArrangedSubview(makeMapRow(bubbleView: MapBubbleView(attachment: mapAttachment))) + } else if let gallery = m.imageGalleryAttachment { + stack.addArrangedSubview(makeImageGalleryRow(bubbleView: ImageGalleryBubbleView(attachment: gallery))) } else { stack.addArrangedSubview(makeBubble(role: message.role, text: message.content, model: m.role == "assistant" ? m.model : nil)) } @@ -1030,6 +1077,31 @@ final class ConversationWindowController: NSWindowController, ConversationPresen ) } + /// Open the rendered story in the full-screen player window. We retain a + /// single player at a time; opening another replaces it. + private func openStory(attachment: StoryAttachment) { + guard attachment.status == .ready, + let url = attachment.fileURL, + FileManager.default.fileExists(atPath: url.path) else { return } + storyPlayer?.close() + let player = StoryPlayerWindowController(attachment: attachment) + storyPlayer = player + player.present() + } + + /// Re-run a failed render under the same attachment id so the existing + /// bubble refreshes in place (StoryGenerationService has no separate + /// retry entry point — re-submitting with the same id is the retry). + private func retryStory(attachment: StoryAttachment) { + StoryGenerationService.shared.submit( + title: attachment.title, + template: attachment.template, + jsonPayload: attachment.jsonPayload, + attachmentId: attachment.id, + conversationId: attachment.conversationId + ) + } + // MARK: - Branch conversation @objc private func branchFromContextMenu(_ sender: NSMenuItem) { @@ -1409,6 +1481,12 @@ final class ConversationWindowController: NSWindowController, ConversationPresen } NSWorkspace.shared.open(url) } + card.onShare = { url in + guard let data = try? Data(contentsOf: url), + let text = String(data: data, encoding: .utf8) else { return } + let picker = NSSharingServicePicker(items: [text]) + picker.show(relativeTo: card.bounds, of: card, preferredEdge: .minY) + } bubble.addSubview(card) let cardWidth: CGFloat = 240 @@ -1693,7 +1771,7 @@ final class ConversationWindowController: NSWindowController, ConversationPresen stack.addArrangedSubview(gridView) gridView.widthAnchor.constraint(equalTo: stack.widthAnchor).isActive = true case .codeBlock(let block): - let codeView = makeCodeBlockView(block) + let codeView = makeCodeBlockView(block, maxWidth: maxWidth) stack.addArrangedSubview(codeView) codeView.widthAnchor.constraint(equalTo: stack.widthAnchor).isActive = true } @@ -1703,7 +1781,7 @@ final class ConversationWindowController: NSWindowController, ConversationPresen /// Builds a rounded container with monospaced code text, a subtle /// background, and an optional language label in the top-right corner. - private func makeCodeBlockView(_ block: MarkdownCodeBlock) -> NSView { + private func makeCodeBlockView(_ block: MarkdownCodeBlock, maxWidth: CGFloat) -> NSView { let container = AdaptiveTableLayerView() container.translatesAutoresizingMaskIntoConstraints = false container.adaptiveCornerRadius = 8 @@ -1711,9 +1789,12 @@ final class ConversationWindowController: NSWindowController, ConversationPresen container.adaptiveBorder = nil container.adaptiveBorderWidth = 0 + // Wrap code at the bubble width (minus the 12pt inset on each side) + // so long lines reflow instead of being clipped off the right edge. + let inset: CGFloat = 12 let codeFont = NSFont.monospacedSystemFont(ofSize: 13, weight: .regular) - let tv = ChatLinkTextView.makeBubbleTextView(maxTextWidth: .greatestFiniteMagnitude) - tv.textContainerInset = NSSize(width: 12, height: 12) + let tv = ChatLinkTextView.makeBubbleTextView(maxTextWidth: max(0, maxWidth - inset * 2)) + tv.textContainerInset = NSSize(width: inset, height: inset) tv.textStorage?.setAttributedString( CodeSyntaxHighlighter.highlight(block.code, language: block.language, font: codeFont) ) @@ -1938,6 +2019,21 @@ final class ConversationWindowController: NSWindowController, ConversationPresen return row } + /// Left-aligned row for a generated story card, on the assistant side. + private func makeStoryRow(bubbleView: StoryBubbleView) -> NSView { + bubbleView.translatesAutoresizingMaskIntoConstraints = false + let row = NSStackView() + row.translatesAutoresizingMaskIntoConstraints = false + row.orientation = .horizontal + row.alignment = .top + row.addArrangedSubview(bubbleView) + let spacer = NSView() + spacer.setContentHuggingPriority(.defaultLow, for: .horizontal) + row.addArrangedSubview(spacer) + bubbleView.widthAnchor.constraint(lessThanOrEqualToConstant: 200).isActive = true + return row + } + /// Wrap an image bubble in the same left-aligned row layout /// `makeBubble(role:"assistant", …)` uses, so generated images sit on /// the assistant side of the conversation alongside text replies. @@ -1973,6 +2069,22 @@ final class ConversationWindowController: NSWindowController, ConversationPresen return row } + /// Wrap a web image-search gallery in the same left-aligned assistant-side + /// row layout as image / map bubbles. + private func makeImageGalleryRow(bubbleView: ImageGalleryBubbleView) -> NSView { + bubbleView.translatesAutoresizingMaskIntoConstraints = false + let row = NSStackView() + row.translatesAutoresizingMaskIntoConstraints = false + row.orientation = .horizontal + row.alignment = .top + row.addArrangedSubview(bubbleView) + let spacer = NSView() + spacer.setContentHuggingPriority(.defaultLow, for: .horizontal) + row.addArrangedSubview(spacer) + bubbleView.widthAnchor.constraint(lessThanOrEqualToConstant: 460).isActive = true + return row + } + private func scrollToBottom() { DispatchQueue.main.async { [weak self] in guard let self = self, @@ -2214,6 +2326,118 @@ final class ImageBubbleView: NSView { } } +/// Assistant-side bubble that renders web image-search results +/// (`ImageGalleryAttachment`) as a horizontal strip of thumbnails. Thumbnails +/// load asynchronously from their remote URLs; clicking one opens the +/// full-resolution image in the default browser. +final class ImageGalleryBubbleView: NSView { + private let attachment: ImageGalleryAttachment + private let titleLabel = NSTextField(labelWithString: "") + private let scrollView = NSScrollView() + private let stack = NSStackView() + private static let thumbSide: CGFloat = 120 + + init(attachment: ImageGalleryAttachment) { + self.attachment = attachment + super.init(frame: .zero) + configure() + } + + required init?(coder: NSCoder) { fatalError() } + + private func configure() { + translatesAutoresizingMaskIntoConstraints = false + + let query = attachment.query.trimmingCharacters(in: .whitespacesAndNewlines) + titleLabel.translatesAutoresizingMaskIntoConstraints = false + titleLabel.font = NSFont.systemFont(ofSize: 13, weight: .semibold) + titleLabel.textColor = .labelColor + titleLabel.maximumNumberOfLines = 2 + titleLabel.lineBreakMode = .byTruncatingTail + titleLabel.stringValue = query + titleLabel.isHidden = query.isEmpty + addSubview(titleLabel) + + scrollView.translatesAutoresizingMaskIntoConstraints = false + scrollView.hasHorizontalScroller = true + scrollView.hasVerticalScroller = false + scrollView.drawsBackground = false + scrollView.borderType = .noBorder + addSubview(scrollView) + + stack.orientation = .horizontal + stack.alignment = .centerY + stack.spacing = 8 + stack.translatesAutoresizingMaskIntoConstraints = false + let documentView = FlippedView() + documentView.translatesAutoresizingMaskIntoConstraints = false + documentView.addSubview(stack) + scrollView.documentView = documentView + + let side = ImageGalleryBubbleView.thumbSide + NSLayoutConstraint.activate([ + titleLabel.topAnchor.constraint(equalTo: topAnchor), + titleLabel.leadingAnchor.constraint(equalTo: leadingAnchor), + titleLabel.trailingAnchor.constraint(equalTo: trailingAnchor), + + scrollView.topAnchor.constraint(equalTo: query.isEmpty ? topAnchor : titleLabel.bottomAnchor, + constant: query.isEmpty ? 0 : 6), + scrollView.leadingAnchor.constraint(equalTo: leadingAnchor), + scrollView.trailingAnchor.constraint(equalTo: trailingAnchor), + scrollView.bottomAnchor.constraint(equalTo: bottomAnchor), + scrollView.heightAnchor.constraint(equalToConstant: side), + + stack.leadingAnchor.constraint(equalTo: documentView.leadingAnchor), + stack.trailingAnchor.constraint(equalTo: documentView.trailingAnchor), + stack.topAnchor.constraint(equalTo: documentView.topAnchor), + stack.bottomAnchor.constraint(equalTo: documentView.bottomAnchor), + documentView.heightAnchor.constraint(equalToConstant: side), + ]) + + for (index, item) in attachment.items.enumerated() { + let tile = makeTile(side: side, index: index) + stack.addArrangedSubview(tile) + if let thumbURL = URL(string: item.thumbnailURL) { + loadThumbnail(thumbURL, into: tile) + } + } + } + + private func makeTile(side: CGFloat, index: Int) -> NSImageView { + let iv = ClickableImageView() + iv.tag = index + iv.translatesAutoresizingMaskIntoConstraints = false + iv.imageScaling = .scaleProportionallyUpOrDown + iv.wantsLayer = true + iv.layer?.cornerRadius = 10 + iv.layer?.masksToBounds = true + iv.layer?.borderWidth = 1 + iv.layer?.borderColor = NSColor.separatorColor.cgColor + iv.layer?.backgroundColor = NSColor.quaternaryLabelColor.cgColor + iv.onClick = { [weak self] in + guard let self = self, + index < self.attachment.items.count, + let url = URL(string: self.attachment.items[index].originalURL) else { return } + NSWorkspace.shared.open(url) + } + NSLayoutConstraint.activate([ + iv.widthAnchor.constraint(equalToConstant: side), + iv.heightAnchor.constraint(equalToConstant: side), + ]) + return iv + } + + private func loadThumbnail(_ url: URL, into tile: NSImageView) { + URLSession.shared.dataTask(with: url) { [weak tile] data, _, _ in + guard let data = data, let image = NSImage(data: data) else { return } + DispatchQueue.main.async { + tile?.image = image + tile?.layer?.backgroundColor = NSColor.clear.cgColor + } + }.resume() + } +} + /// Standard flipped-coordinate document view so newly added bubbles append /// at the bottom rather than at the top. final class FlippedView: NSView { @@ -2273,8 +2497,15 @@ final class AdaptiveTableLayerView: NSView { /// image/PDF full-size via Preview.app. final class ClickableImageView: NSImageView { var fileURL: URL? + /// Optional click handler — takes precedence over `fileURL` when set (used + /// by the image-gallery tiles to open a remote URL). + var onClick: (() -> Void)? override func mouseDown(with event: NSEvent) { + if let onClick = onClick { + onClick() + return + } guard let url = fileURL else { super.mouseDown(with: event) return @@ -2290,6 +2521,7 @@ final class ClickableImageView: NSImageView { final class MacFilePreviewCardView: NSView { var onClick: ((URL) -> Void)? + var onShare: ((URL) -> Void)? private let iconView = NSImageView() private let titleLabel = NSTextField(labelWithString: "") @@ -2297,6 +2529,22 @@ final class MacFilePreviewCardView: NSView { private let badgeContainer = NSView() private let subtitleLabel = NSTextField(labelWithString: "") private let snippetView = NSTextView() + private let shareRow = NSView() + private let shareRowSeparator = NSBox() + private let shareButton: NSButton = { + let b = NSButton() + b.translatesAutoresizingMaskIntoConstraints = false + b.bezelStyle = .recessed + b.isBordered = false + b.image = NSImage(systemSymbolName: "square.and.arrow.up", + accessibilityDescription: "Share") + b.imagePosition = .imageOnly + b.symbolConfiguration = NSImage.SymbolConfiguration(pointSize: 12, weight: .medium) + b.contentTintColor = .secondaryLabelColor + b.setContentHuggingPriority(.required, for: .horizontal) + return b + }() + private lazy var shareRowCollapsed: NSLayoutConstraint = shareRow.heightAnchor.constraint(equalToConstant: 0) private var fileURL: URL? init() { @@ -2363,11 +2611,36 @@ final class MacFilePreviewCardView: NSView { snippetView.textContainer?.maximumNumberOfLines = 8 snippetView.textContainer?.lineBreakMode = .byTruncatingTail + // Share row — separator + button, hidden by default. + shareRow.translatesAutoresizingMaskIntoConstraints = false + shareRow.isHidden = true + + shareRowSeparator.translatesAutoresizingMaskIntoConstraints = false + shareRowSeparator.boxType = .separator + shareRow.addSubview(shareRowSeparator) + + shareButton.target = self + shareButton.action = #selector(handleShareTap) + shareRow.addSubview(shareButton) + + NSLayoutConstraint.activate([ + shareRowSeparator.topAnchor.constraint(equalTo: shareRow.topAnchor), + shareRowSeparator.leadingAnchor.constraint(equalTo: shareRow.leadingAnchor), + shareRowSeparator.trailingAnchor.constraint(equalTo: shareRow.trailingAnchor), + + shareButton.topAnchor.constraint(equalTo: shareRowSeparator.bottomAnchor, constant: 2), + shareButton.trailingAnchor.constraint(equalTo: shareRow.trailingAnchor, constant: -4), + shareButton.bottomAnchor.constraint(equalTo: shareRow.bottomAnchor, constant: -2), + shareButton.widthAnchor.constraint(equalToConstant: 24), + shareButton.heightAnchor.constraint(equalToConstant: 24), + ]) + addSubview(iconView) addSubview(titleLabel) addSubview(badgeContainer) addSubview(subtitleLabel) addSubview(snippetView) + addSubview(shareRow) NSLayoutConstraint.activate([ iconView.widthAnchor.constraint(equalToConstant: 16), @@ -2394,12 +2667,20 @@ final class MacFilePreviewCardView: NSView { snippetView.topAnchor.constraint(equalTo: subtitleLabel.bottomAnchor, constant: 8), snippetView.leadingAnchor.constraint(equalTo: leadingAnchor, constant: 12), snippetView.trailingAnchor.constraint(equalTo: trailingAnchor, constant: -12), - snippetView.bottomAnchor.constraint(lessThanOrEqualTo: bottomAnchor, constant: -12), + + shareRow.topAnchor.constraint(equalTo: snippetView.bottomAnchor, constant: 8), + shareRow.leadingAnchor.constraint(equalTo: leadingAnchor), + shareRow.trailingAnchor.constraint(equalTo: trailingAnchor), + shareRow.bottomAnchor.constraint(lessThanOrEqualTo: bottomAnchor), ]) - // Two competing bottom constraints — the snippet drives the height - // when it's visible; otherwise the subtitle (chip-like generic - // card) pulls the bottom up. Priorities pick the right one. + shareRow.clipsToBounds = true + shareRowCollapsed.isActive = true + + // Bottom priorities: share row > snippet > subtitle. + let shareRowBottom = bottomAnchor.constraint(equalTo: shareRow.bottomAnchor) + shareRowBottom.priority = NSLayoutConstraint.Priority(751) + shareRowBottom.isActive = true let bottomEqualSnippet = bottomAnchor.constraint(equalTo: snippetView.bottomAnchor, constant: 12) bottomEqualSnippet.priority = .defaultHigh bottomEqualSnippet.isActive = true @@ -2408,6 +2689,11 @@ final class MacFilePreviewCardView: NSView { bottomEqualSubtitle.isActive = true } + @objc private func handleShareTap() { + guard let url = fileURL else { return } + onShare?(url) + } + func configure(for attachment: FileAttachment) { fileURL = attachment.fileURL titleLabel.stringValue = attachment.fileName @@ -2426,6 +2712,8 @@ final class MacFilePreviewCardView: NSView { badgeContainer.isHidden = false subtitleLabel.stringValue = Self.subtitle("Markdown", size: sizeText) applyMarkdownSnippet(attachment.extractedText ?? "") + shareRow.isHidden = false + shareRowCollapsed.isActive = false case .text: iconView.image = NSImage(systemSymbolName: "chevron.left.forwardslash.chevron.right", accessibilityDescription: nil) if let lang = attachment.languageTag { @@ -2437,6 +2725,8 @@ final class MacFilePreviewCardView: NSView { subtitleLabel.stringValue = Self.subtitle("Text", size: sizeText) } applyCodeSnippet(attachment.extractedText ?? "") + shareRow.isHidden = false + shareRowCollapsed.isActive = false case .generic: iconView.image = NSImage(systemSymbolName: "doc", accessibilityDescription: nil) badgeContainer.isHidden = true @@ -2450,11 +2740,15 @@ final class MacFilePreviewCardView: NSView { subtitleLabel.stringValue = Self.subtitle(mimeLabel, size: sizeText) snippetView.string = "" snippetView.isHidden = true + shareRow.isHidden = true + shareRowCollapsed.isActive = true case .image, .pdf: iconView.image = NSImage(systemSymbolName: "doc", accessibilityDescription: nil) badgeContainer.isHidden = true subtitleLabel.stringValue = Self.subtitle(attachment.mimeType, size: sizeText) snippetView.isHidden = true + shareRow.isHidden = true + shareRowCollapsed.isActive = true } } @@ -2945,6 +3239,65 @@ extension ConversationWindowController: PDFSkillHost { } } +// MARK: - StorySkillHost + +extension ConversationWindowController: StorySkillHost { + + /// Insert (or, on retry, refresh) a story card for a render that just + /// kicked off. Mirrors the image / PDF hosts: a placeholder message goes + /// into the persistent store under id `story-` so a tab switch or + /// window reload re-renders the card. + func storySkillDidStartGenerating(_ attachment: StoryAttachment) { + storyAttachments[attachment.id] = attachment + + if let existing = storyBubbles[attachment.id] { + existing.update(attachment: attachment) + if isStoryAttachmentForActiveTab(attachment) { surfaceForResponse() } + return + } + + if let conversation = resolveStoryAttachmentConversation(attachment) { + let marker = MessageStruct( + id: "\(Self.storyMessageIdPrefix)\(attachment.id)", + role: "assistant", + content: attachment.title, + model: "loop-story" + ) + SimpleConversationManager.shared.addMessage(marker, to: conversation) + } + + guard isStoryAttachmentForActiveTab(attachment) else { return } + + let bubble = StoryBubbleView(attachment: attachment, + onOpen: { [weak self] att in self?.openStory(attachment: att) }, + onRetry: { [weak self] att in self?.retryStory(attachment: att) }) + storyBubbles[attachment.id] = bubble + stack.addArrangedSubview(makeStoryRow(bubbleView: bubble)) + scrollToBottom() + surfaceForResponse() + } + + func storySkillDidFinishGenerating(_ attachment: StoryAttachment) { + storyAttachments[attachment.id] = attachment + if let bubble = storyBubbles[attachment.id] { + bubble.update(attachment: attachment) + } + } + + private func isStoryAttachmentForActiveTab(_ attachment: StoryAttachment) -> Bool { + guard let stamped = attachment.conversationId else { return true } + return stamped == activeTab?.conversation.id + } + + private func resolveStoryAttachmentConversation(_ attachment: StoryAttachment) -> SimpleConversation? { + if let id = attachment.conversationId, + let conv = SimpleConversationManager.shared.getConversation(by: id) { + return conv + } + return SimpleConversationManager.shared.currentConversation + } +} + /// Inline assistant bubble for a generated PDF on Mac. Three states match /// the iOS PDF card: /// - `.generating` — spinner over a placeholder thumbnail + "Generating…" diff --git a/LoopMac/EarconPlayer.swift b/LoopMac/EarconPlayer.swift index c4a0733..b76274a 100644 --- a/LoopMac/EarconPlayer.swift +++ b/LoopMac/EarconPlayer.swift @@ -65,17 +65,22 @@ final class EarconPlayer { if !player.isPlaying { player.play() } } - /// Schedule the earcon and block the current runloop until the audio - /// has had a chance to finish. Used for `.goodbye` at app quit, where - /// we need the sound to actually play before the process exits. - func playBlocking(_ name: Name, timeout: TimeInterval = 0.5) { - guard enabled, let buf = buffers[name] else { return } + /// Schedule the earcon and invoke `completion` once the buffer finishes + /// playing (or immediately if it can't). Used for `.goodbye` at app quit: + /// the caller defers termination until the sound completes instead of + /// blocking the main thread on a cross-QoS semaphore — the latter trips + /// the Thread Performance Checker's "hang risk" priority-inversion warning + /// because the main (user-interactive) thread would wait on AVAudioEngine's + /// lower-QoS render thread. + /// + /// `completion` fires on an AVAudioEngine internal thread — hop to your + /// queue of choice inside it. + func play(_ name: Name, completion: @escaping () -> Void) { + guard enabled, let buf = buffers[name] else { completion(); return } if !engine.isRunning { startEngine() } - guard engine.isRunning else { return } - let semaphore = DispatchSemaphore(value: 0) - player.scheduleBuffer(buf) { semaphore.signal() } + guard engine.isRunning else { completion(); return } + player.scheduleBuffer(buf) { completion() } if !player.isPlaying { player.play() } - _ = semaphore.wait(timeout: .now() + timeout) } private func startEngine() { diff --git a/LoopMac/Info.plist b/LoopMac/Info.plist index e4f47eb..09369ae 100644 --- a/LoopMac/Info.plist +++ b/LoopMac/Info.plist @@ -31,6 +31,18 @@ Loop uses Apple Music to find tracks and play music that matches the mood of the conversation. NSAppleEventsUsageDescription Loop drives Terminal, Ghostty, and other terminal apps via AppleScript so it can open windows and run shell commands on your behalf — e.g. starting a Claude Code session in one of your repos. + + SUFeedURL + https://github.com/getathelas/LoopHarness/releases/download/appcast/appcast.xml + SUPublicEDKey + DbNybdk5OuCooBgHeC6+JykhxQ4yYjHBAv+igHWXBBw= + SUEnableAutomaticChecks + + SUScheduledCheckInterval + 86400 DEEPGRAM_API_KEY $(DEEPGRAM_API_KEY) ELEVEN_LABS_KEY @@ -47,6 +59,24 @@ $(OBSIDIAN_API_KEY) OBSIDIAN_BASE_URL $(OBSIDIAN_BASE_URL) + AGENT_MAIL_API_KEY + $(AGENT_MAIL_API_KEY) + AGENT_MAIL_INBOX + $(AGENT_MAIL_INBOX) + SSH_HOST + $(SSH_HOST) + SSH_PORT + $(SSH_PORT) + SSH_USERNAME + $(SSH_USERNAME) + SSH_NAME + $(SSH_NAME) + SSH_PRIVATE_KEY_B64 + $(SSH_PRIVATE_KEY_B64) + SSH_PASSPHRASE + $(SSH_PASSPHRASE) + LOOP_FLAG + $(LOOP_FLAG) NSUbiquitousContainers iCloud.com.bhat.intel diff --git a/LoopMac/LoopMacApp.swift b/LoopMac/LoopMacApp.swift index e0d9433..9eae53d 100644 --- a/LoopMac/LoopMacApp.swift +++ b/LoopMac/LoopMacApp.swift @@ -13,8 +13,16 @@ import AppKit import UserNotifications +import Sparkle final class AppDelegate: NSObject, NSApplicationDelegate { + /// Sparkle updater. `startingUpdater: true` plus `SUEnableAutomaticChecks` + /// in Info.plist means it checks the appcast on launch + on the scheduled + /// interval, downloads the EdDSA-signed update in the background, and + /// prompts to install. Also backs the "Check for Updates…" menu item. + /// Held strongly for the app lifetime so the background scheduler keeps + /// running. + private var updaterController: SPUStandardUpdaterController? private var recorderController: RecorderWindowController? /// Internal so other module-level controllers (e.g. the Scheduled Tasks /// window's "Open Last" path) can route through the tab manager rather @@ -130,6 +138,10 @@ final class AppDelegate: NSObject, NSApplicationDelegate { // implements PDFSkillHost and renders the PDF cell when the render // completes. PDFGenerationService.shared.host = conversation + // And StoryGenerationService — the conversation window implements + // StorySkillHost and renders the story card (then the full-screen + // player on tap) when the HTML render completes. + StoryGenerationService.shared.host = conversation let monitor = HotKeyMonitor() // All hotkey closures route through `recorder.coordinator`, which the @@ -163,6 +175,15 @@ final class AppDelegate: NSObject, NSApplicationDelegate { monitor.start() self.hotKeyMonitor = monitor + // Start Sparkle before the menu is built so the "Check for Updates…" + // item can target it. Kicks off the first background update check on + // launch (subject to SUScheduledCheckInterval) per Info.plist config. + updaterController = SPUStandardUpdaterController( + startingUpdater: true, + updaterDelegate: nil, + userDriverDelegate: nil + ) + // Build the app menu so cmd-Q etc. work — required for any regular // (.regular activation policy) AppKit app. installAppMenu() @@ -249,11 +270,31 @@ final class AppDelegate: NSObject, NSApplicationDelegate { return false } - func applicationWillTerminate(_ notification: Notification) { - // Mirror the welcome cue in reverse (E5 → A4) at shutdown. `playBlocking` - // pumps the runloop until the buffer finishes (or a 0.5s timeout) - // so the process doesn't exit before the sound completes. - EarconPlayer.shared.playBlocking(.goodbye) + private var isTerminating = false + + func applicationShouldTerminate(_ sender: NSApplication) -> NSApplication.TerminateReply { + // Mirror the welcome cue in reverse (E5 → A4) at shutdown. Rather than + // block the main thread on a semaphore (which would wait on + // AVAudioEngine's lower-QoS render thread and trip the Thread + // Performance Checker's hang-risk warning), defer termination until the + // buffer finishes, with a safety timeout so we always quit. + if isTerminating { return .terminateNow } + isTerminating = true + + var replied = false + let reply = { + guard !replied else { return } + replied = true + NSApp.reply(toApplicationShouldTerminate: true) + } + + EarconPlayer.shared.play(.goodbye) { + DispatchQueue.main.async { reply() } + } + // Guard against the completion never firing (engine stopped, etc.). + DispatchQueue.main.asyncAfter(deadline: .now() + 0.6) { reply() } + + return .terminateLater } @objc private func remoteConversationChanged() { @@ -384,6 +425,16 @@ final class AppDelegate: NSObject, NSApplicationDelegate { let appMenu = NSMenu() appMenu.addItem(withTitle: "About Loop", action: #selector(NSApplication.orderFrontStandardAboutPanel(_:)), keyEquivalent: "") appMenu.addItem(NSMenuItem.separator()) + // Manual update trigger. Targets the Sparkle controller, which also + // validates the item (greys it out while a check is already running). + let checkForUpdatesItem = NSMenuItem( + title: "Check for Updates…", + action: #selector(SPUStandardUpdaterController.checkForUpdates(_:)), + keyEquivalent: "" + ) + checkForUpdatesItem.target = updaterController + appMenu.addItem(checkForUpdatesItem) + appMenu.addItem(NSMenuItem.separator()) appMenu.addItem(withTitle: "Hide Loop", action: #selector(NSApplication.hide(_:)), keyEquivalent: "h") let hideOthers = NSMenuItem(title: "Hide Others", action: #selector(NSApplication.hideOtherApplications(_:)), keyEquivalent: "h") hideOthers.keyEquivalentModifierMask = [.command, .option] @@ -557,6 +608,25 @@ final class AppDelegate: NSObject, NSApplicationDelegate { } } + // OpenAI voice picker only when gpt-4o-mini-tts is the active provider. + if active == .openAIMiniTTS { + menu.addItem(NSMenuItem.separator()) + let header = NSMenuItem(title: "OpenAI Voice", action: nil, keyEquivalent: "") + header.isEnabled = false + menu.addItem(header) + let activeVoice = TTSSettings.shared.openAIVoice + for voice in TTSSettings.openAIVoices { + let item = NSMenuItem( + title: voice.label, + action: #selector(AppDelegate.selectOpenAIVoice(_:)), + keyEquivalent: "" + ) + item.representedObject = voice.id + item.state = (voice.id == activeVoice) ? .on : .off + menu.addItem(item) + } + } + return menu } @@ -727,6 +797,11 @@ final class AppDelegate: NSObject, NSApplicationDelegate { TTSSettings.shared.setElevenLabsVoice(voiceId, for: provider) } + @objc fileprivate func selectOpenAIVoice(_ sender: NSMenuItem) { + guard let voiceId = sender.representedObject as? String else { return } + TTSSettings.shared.openAIVoice = voiceId + } + @objc fileprivate func openKeysSettings(_ sender: Any?) { SettingsWindowController.shared.showKeys() } diff --git a/LoopMac/MacMarkdownShareToolbar.swift b/LoopMac/MacMarkdownShareToolbar.swift new file mode 100644 index 0000000..536d0f8 --- /dev/null +++ b/LoopMac/MacMarkdownShareToolbar.swift @@ -0,0 +1,75 @@ +// +// MacMarkdownShareToolbar.swift +// LoopMac +// +// Bottom toolbar for markdown preview surfaces on macOS. Contains a Share +// button that triggers NSSharingServicePicker with the raw markdown source. +// Styled to match the existing Loop visual language. +// + +import AppKit + +final class MacMarkdownShareToolbar: NSView { + + // MARK: - Public + + var onShare: (() -> Void)? + + /// The bounds/view needed for anchoring NSSharingServicePicker. + var shareButtonBounds: NSRect { shareButton.bounds } + var shareButtonView: NSView { shareButton } + + // MARK: - Subviews + + private let separator = NSBox() + private let shareButton: NSButton = { + let b = NSButton() + b.translatesAutoresizingMaskIntoConstraints = false + b.bezelStyle = .recessed + b.isBordered = false + b.image = NSImage(systemSymbolName: "square.and.arrow.up", + accessibilityDescription: "Share") + b.imagePosition = .imageOnly + b.symbolConfiguration = NSImage.SymbolConfiguration(pointSize: 14, weight: .medium) + b.contentTintColor = .labelColor + b.setContentHuggingPriority(.required, for: .horizontal) + return b + }() + + // MARK: - Init + + override init(frame frameRect: NSRect) { + super.init(frame: frameRect) + setup() + } + + required init?(coder: NSCoder) { fatalError("init(coder:) has not been implemented") } + + // MARK: - Setup + + private func setup() { + wantsLayer = true + + separator.translatesAutoresizingMaskIntoConstraints = false + separator.boxType = .separator + addSubview(separator) + + shareButton.target = self + shareButton.action = #selector(shareTapped) + addSubview(shareButton) + + NSLayoutConstraint.activate([ + separator.topAnchor.constraint(equalTo: topAnchor), + separator.leadingAnchor.constraint(equalTo: leadingAnchor), + separator.trailingAnchor.constraint(equalTo: trailingAnchor), + + shareButton.topAnchor.constraint(equalTo: separator.bottomAnchor, constant: 4), + shareButton.trailingAnchor.constraint(equalTo: trailingAnchor, constant: -12), + shareButton.bottomAnchor.constraint(equalTo: bottomAnchor, constant: -4), + shareButton.widthAnchor.constraint(equalToConstant: 28), + shareButton.heightAnchor.constraint(equalToConstant: 28), + ]) + } + + @objc private func shareTapped() { onShare?() } +} diff --git a/LoopMac/MacTTS.swift b/LoopMac/MacTTS.swift index 3007fe5..7aab4ff 100644 --- a/LoopMac/MacTTS.swift +++ b/LoopMac/MacTTS.swift @@ -19,6 +19,7 @@ enum MacTTSProvider: String, CaseIterable { case aura2 = "aura2" // Deepgram Aura-2 (streaming, lowest latency) case elevenLabsV3 = "elevenLabsV3" // ElevenLabs Eleven v3 (most expressive) case elevenLabsFlashV25 = "elevenLabsFlashV25" // ElevenLabs Flash v2.5 (low-latency) + case openAIMiniTTS = "openAIMiniTTS" // OpenAI gpt-4o-mini-tts (steerable via instructions) case system = "system" // AVSpeechSynthesizer (offline) var displayName: String { @@ -26,6 +27,7 @@ enum MacTTSProvider: String, CaseIterable { case .aura2: return "Deepgram Aura-2" case .elevenLabsV3: return "ElevenLabs v3" case .elevenLabsFlashV25: return "ElevenLabs Flash v2.5" + case .openAIMiniTTS: return "OpenAI gpt-4o-mini-tts" case .system: return "On-device (offline)" } } @@ -51,6 +53,7 @@ final class TTSSettings { private let auraVoiceKey = "loopmac.ttsAuraVoice" private let elevenLabsV3VoiceKey = "loopmac.ttsElevenLabsV3Voice" private let elevenLabsFlashVoiceKey = "loopmac.ttsElevenLabsFlashVoice" + private let openAIVoiceKey = "loopmac.ttsOpenAIVoice" /// Defaults to .aura2 to match the iOS preference the user mentioned. var provider: MacTTSProvider { @@ -83,6 +86,7 @@ final class TTSSettings { /// ElevenLabs voice library. Both v3 and Flash v2.5 share the same set; /// callers pick per-provider so v3 / Flash can store different picks. + /// Kept in sync with iOS's `TTSProvider.voiceOptions`. static let elevenLabsVoices: [(label: String, id: String)] = [ ("Rachel (warm female)", "21m00Tcm4TlvDq8ikWAM"), ("Bella (young female)", "EXAVITQu4vr4xnSDxMaL"), @@ -90,6 +94,11 @@ final class TTSSettings { ("Antoni (calm male)", "ErXwobaYiN019PkySvjV"), ("Elli (soft female)", "MF3mGyEYCl7XYWbV9V6O"), ("Josh (steady male)", "TxGEqnHWrfWFTfGW9XjX"), + ("Hayes (english male)", "sIivXWc5MTlPIP3kJXhg"), + ("Rory (irish male)", "hmMWXCj9K7N5mCPcRkfC"), + ("Hannah (american female)", "ZSNL4hPqCnqoMPaI4jGX"), + ("Zoe (african american female)", "M6ic45wruJGWAxLFEMNK"), + ("Agent (secret agent male)", "ICIc5IiEgLitxGwyb7ZG"), ] private static let elevenLabsDefaultVoiceId = "21m00Tcm4TlvDq8ikWAM" @@ -116,6 +125,23 @@ final class TTSSettings { } NotificationCenter.default.post(name: .ttsSettingsChanged, object: nil) } + + /// OpenAI gpt-4o-mini-tts voice library. Kept in sync with iOS's + /// `TTSProvider.voiceOptions` for the `.openAIMiniTTS` case. + static let openAIVoices: [(label: String, id: String)] = [ + "alloy", "echo", "fable", "onyx", "nova", + "shimmer", "coral", "sage", "ash", "ballad", "verse", + ].map { ($0.capitalized, $0) } + + static let openAIDefaultVoice = "shimmer" + + var openAIVoice: String { + get { defaults.string(forKey: openAIVoiceKey) ?? Self.openAIDefaultVoice } + set { + defaults.set(newValue, forKey: openAIVoiceKey) + NotificationCenter.default.post(name: .ttsSettingsChanged, object: nil) + } + } } extension Notification.Name { @@ -153,7 +179,15 @@ final class MacSpeechPlayer { audioPlayer?.stop() audioPlayer = nil meteringTimer?.invalidate(); meteringTimer = nil - if synthesizer.isSpeaking { synthesizer.stopSpeaking(at: .immediate) } + // `stopSpeaking(at:)` synchronously blocks on AVSpeechSynthesizer's + // internal Default-QoS speech-service thread. Calling it from the main + // (user-interactive) thread is a priority inversion that trips the + // Thread Performance Checker's "hang risk" warning, so hop to a + // matching Default-QoS queue — stop() is fire-and-forget (callers + // observe onFinished), and stopping when idle is harmless. + DispatchQueue.global(qos: .default).async { [synthesizer] in + if synthesizer.isSpeaking { synthesizer.stopSpeaking(at: .immediate) } + } // Drop the avatar's speaking amplitude to 0 so its EMA decays back // to the canned/idle behavior promptly. DispatchQueue.main.async { [weak self] in self?.onOutputAmplitude?(0) } @@ -199,6 +233,9 @@ final class MacSpeechPlayer { case .elevenLabsV3, .elevenLabsFlashV25: if speakViaElevenLabs(cleaned, provider: provider, token: token) { return } speakViaSystem(cleaned, token: token) + case .openAIMiniTTS: + if speakViaOpenAI(cleaned, token: token) { return } + speakViaSystem(cleaned, token: token) case .system: speakViaSystem(cleaned, token: token) } @@ -302,29 +339,98 @@ final class MacSpeechPlayer { self.speakViaSystem(text, token: token) return } - guard let data = data, !data.isEmpty, - let player = try? AVAudioPlayer(data: data) else { - print("ElevenLabs TTS empty/bad audio — falling back to on-device") + guard let data = data, !data.isEmpty else { + print("ElevenLabs TTS empty audio — falling back to on-device") self.speakViaSystem(text, token: token) return } - player.prepareToPlay() - player.play() - self.audioPlayer = player - self.startMetering(for: player, token: token) - let duration = max(player.duration, 0.5) - DispatchQueue.main.asyncAfter(deadline: .now() + duration) { [weak self] in - guard let self = self, self.currentToken == token else { return } - self.meteringTimer?.invalidate(); self.meteringTimer = nil - self.audioPlayer = nil - self.onOutputAmplitude?(0) - self.onFinished?() + self.playMP3Data(data, fallbackText: text, providerLabel: "ElevenLabs", token: token) + } + }.resume() + return true + } + + // MARK: - OpenAI gpt-4o-mini-tts HTTP + + /// Steers OpenAI's gpt-4o-mini-tts toward better list pacing and warmth. + /// Mirrors the iOS `openAITTSInstructions`. + private static let openAIInstructions = """ + Speak in a warm, natural, conversational tone. When reading lists, pause \ + briefly between items so each one is distinct. Vary pace and emphasis \ + like a person would. + """ + + /// Returns true if the request was kicked off; false if there's no key + /// (caller falls back to on-device). Network/HTTP failures inside the + /// task also fall back. + private func speakViaOpenAI(_ text: String, token: Int) -> Bool { + guard let apiKey = Self.openAIKey else { return false } + + guard let url = URL(string: "https://api.openai.com/v1/audio/speech") else { return false } + var req = URLRequest(url: url) + req.httpMethod = "POST" + req.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") + req.setValue("application/json", forHTTPHeaderField: "Content-Type") + let body: [String: Any] = [ + "model": "gpt-4o-mini-tts", + "input": text, + "voice": TTSSettings.shared.openAIVoice, + "response_format": "mp3", + "instructions": Self.openAIInstructions + ] + guard let bodyData = try? JSONSerialization.data(withJSONObject: body) else { return false } + req.httpBody = bodyData + + URLSession.shared.dataTask(with: req) { [weak self] data, response, error in + DispatchQueue.main.async { + guard let self = self, self.currentToken == token else { return } + if let error = error { + print("OpenAI TTS error: \(error) — falling back to on-device") + self.speakViaSystem(text, token: token) + return + } + if let http = response as? HTTPURLResponse, http.statusCode >= 400 { + let bodyStr = data.flatMap { String(data: $0, encoding: .utf8) } ?? "" + print("OpenAI TTS HTTP \(http.statusCode): \(bodyStr) — falling back to on-device") + self.speakViaSystem(text, token: token) + return } + guard let data = data, !data.isEmpty else { + print("OpenAI TTS empty audio — falling back to on-device") + self.speakViaSystem(text, token: token) + return + } + self.playMP3Data(data, fallbackText: text, providerLabel: "OpenAI", token: token) } }.resume() return true } + // MARK: - Shared MP3-buffer playback + + /// Plays full-buffer MP3 bytes (ElevenLabs / OpenAI) via AVAudioPlayer and + /// drives avatar metering. Falls back to on-device if the bytes won't + /// decode. Must be called on the main thread with a current token. + private func playMP3Data(_ data: Data, fallbackText: String, providerLabel: String, token: Int) { + guard let player = try? AVAudioPlayer(data: data) else { + print("\(providerLabel) TTS bad audio — falling back to on-device") + self.speakViaSystem(fallbackText, token: token) + return + } + player.prepareToPlay() + player.play() + self.audioPlayer = player + self.startMetering(for: player, token: token) + let duration = max(player.duration, 0.5) + DispatchQueue.main.asyncAfter(deadline: .now() + duration) { [weak self] in + guard let self = self, self.currentToken == token else { return } + self.meteringTimer?.invalidate(); self.meteringTimer = nil + self.audioPlayer = nil + self.onOutputAmplitude?(0) + self.onFinished?() + } + } + // MARK: - On-device fallback private func speakViaSystem(_ text: String, token: Int) { @@ -353,4 +459,8 @@ final class MacSpeechPlayer { fileprivate static var elevenLabsKey: String? { KeyStore.shared.value(for: .elevenLabs) } + + fileprivate static var openAIKey: String? { + KeyStore.shared.value(for: .openAI) + } } diff --git a/LoopMac/MarkdownEditorViewController.swift b/LoopMac/MarkdownEditorViewController.swift index 1f5d4ca..d2c25e1 100644 --- a/LoopMac/MarkdownEditorViewController.swift +++ b/LoopMac/MarkdownEditorViewController.swift @@ -44,6 +44,7 @@ final class MarkdownEditorViewController: NSViewController { private let textView = NSTextView() private let spinner = NSProgressIndicator() private let errorLabel = NSTextField(labelWithString: "") + private let shareToolbar = MacMarkdownShareToolbar() // MARK: - Init @@ -88,6 +89,12 @@ final class MarkdownEditorViewController: NSViewController { errorLabel.isHidden = true root.addSubview(errorLabel) + shareToolbar.translatesAutoresizingMaskIntoConstraints = false + shareToolbar.onShare = { [weak self] in + self?.shareMarkdownText() + } + root.addSubview(shareToolbar) + NSLayoutConstraint.activate([ header.topAnchor.constraint(equalTo: root.topAnchor), header.leadingAnchor.constraint(equalTo: root.leadingAnchor), @@ -97,7 +104,11 @@ final class MarkdownEditorViewController: NSViewController { scrollView.topAnchor.constraint(equalTo: header.bottomAnchor), scrollView.leadingAnchor.constraint(equalTo: root.leadingAnchor), scrollView.trailingAnchor.constraint(equalTo: root.trailingAnchor), - scrollView.bottomAnchor.constraint(equalTo: root.bottomAnchor), + scrollView.bottomAnchor.constraint(equalTo: shareToolbar.topAnchor), + + shareToolbar.leadingAnchor.constraint(equalTo: root.leadingAnchor), + shareToolbar.trailingAnchor.constraint(equalTo: root.trailingAnchor), + shareToolbar.bottomAnchor.constraint(equalTo: root.bottomAnchor), spinner.centerXAnchor.constraint(equalTo: root.centerXAnchor), spinner.centerYAnchor.constraint(equalTo: root.centerYAnchor), @@ -313,6 +324,15 @@ final class MarkdownEditorViewController: NSViewController { private func updateSaveButton() { saveButton.isEnabled = isDirty } + + private func shareMarkdownText() { + guard isLoaded else { return } + let text = textView.string + let picker = NSSharingServicePicker(items: [text]) + picker.show(relativeTo: shareToolbar.shareButtonBounds, + of: shareToolbar.shareButtonView, + preferredEdge: .minY) + } } // MARK: - NSTextViewDelegate diff --git a/LoopMac/RecorderWindowController.swift b/LoopMac/RecorderWindowController.swift index 7af816d..e361719 100644 --- a/LoopMac/RecorderWindowController.swift +++ b/LoopMac/RecorderWindowController.swift @@ -410,8 +410,17 @@ final class RecorderWindowController: NSWindowController, NSTextFieldDelegate, N } private func positionAtBottomCenter() { - guard let window = window, - let screen = NSScreen.main else { return } + guard let window = window else { return } + // Surface on the screen the cursor is on, not `NSScreen.main`. + // `NSScreen.main` tracks the *key* window, which — when Loop is in the + // background and the user triggers ctrl+fn from another app — is + // whatever window they last focused, frequently on a different monitor + // (a different "desktop") than where their attention currently is. The + // cursor is the most reliable proxy for the active display, matching + // how Spotlight / Raycast pick their screen. + let mouse = NSEvent.mouseLocation + guard let screen = NSScreen.screens.first(where: { NSMouseInRect(mouse, $0.frame, false) }) + ?? NSScreen.main else { return } let frame = window.frame let visible = screen.visibleFrame let originX = visible.midX - frame.width / 2 @@ -559,6 +568,16 @@ final class RecorderWindowController: NSWindowController, NSTextFieldDelegate, N // NSTextField. Forcing it enabled here makes focusTextInput safe to // call from any state transition. textField.isEnabled = true + // Force a layout pass before focusing. makeFirstResponder makes AppKit + // install a field editor (RecorderFieldEditor) sized to the text + // field's current frame — and on the very first show no layout has run + // yet, so the field would come up zero-width and render nothing (no + // caret, no placeholder) until some later layout. Previously that + // "later layout" only happened after the first send + // (adjustWindowHeightToFitText), which is why the field stayed blank + // until you sent a message. Laying out here gives the field a real + // frame so the editor surfaces the moment the bar gains focus. + window.contentView?.layoutSubtreeIfNeeded() window.makeFirstResponder(textField) installClickAwayMonitorIfNeeded() } @@ -624,7 +643,13 @@ final class RecorderWindowController: NSWindowController, NSTextFieldDelegate, N // while the assistant works. The placeholder still conveys the // current pipeline stage. textField.isEnabled = true - textField.placeholderAttributedString = makeStateText("transcribing…") + let engineTag: String + switch VoiceLoopCoordinator.current?.activeSTTEngine { + case .deepgram: engineTag = " · DG" + case .apple: engineTag = " · APL" + case nil: engineTag = "" + } + textField.placeholderAttributedString = makeStateText("transcribing…\(engineTag)") waveformContainer.isHidden = true avatarView.mode = .thinking case .thinking: diff --git a/LoopMac/StoryMacUI.swift b/LoopMac/StoryMacUI.swift new file mode 100644 index 0000000..76ab82d --- /dev/null +++ b/LoopMac/StoryMacUI.swift @@ -0,0 +1,333 @@ +// +// StoryMacUI.swift +// LoopMac +// +// Mac UI for the Stories feature. Mirrors the iOS StoryPlayerVC / inline +// card, but in AppKit: +// - `StoryBubbleView` is the inline portrait card painted into the +// conversation transcript (generating / ready / failed states), matching +// the ImageBubbleView / PDFBubbleView pattern. +// - `StoryPlayerWindowController` opens the rendered 1080×1920 HTML in a +// portrait window with click/arrow tap-to-advance, driving the same +// `window.StoryBridge` JS API the iOS player uses. +// +// The cross-platform pieces (StorySkill, StoryGenerationService, +// StoryGenerator, StoryAttachment, StoryBundledTemplates) are shared with +// iOS; only the presentation layer differs. +// + +import AppKit +import WebKit + +// MARK: - Inline card + +/// Inline assistant bubble for a generated story on Mac. Three states match +/// the iOS story card: +/// - `.generating` — spinner over a portrait placeholder + "Creating story…" +/// - `.ready` — gradient poster with the title + a play glyph; click to open +/// the full-screen player. +/// - `.failed` — error text + a Try again button that re-renders the same id. +final class StoryBubbleView: NSView { + private(set) var attachment: StoryAttachment + + private let onOpen: (StoryAttachment) -> Void + private let onRetry: (StoryAttachment) -> Void + + private let card = NSView() + private let poster = NSView() + private let posterGradient = CAGradientLayer() + private let glyph = NSImageView() + private let spinner = NSProgressIndicator() + private let titleLabel = NSTextField(labelWithString: "") + private let subtitleLabel = NSTextField(labelWithString: "") + private let errorLabel = NSTextField(labelWithString: "") + private let retryButton = NSButton(title: "Try again", target: nil, action: nil) + + init(attachment: StoryAttachment, + onOpen: @escaping (StoryAttachment) -> Void, + onRetry: @escaping (StoryAttachment) -> Void) { + self.attachment = attachment + self.onOpen = onOpen + self.onRetry = onRetry + super.init(frame: .zero) + configure() + update(attachment: attachment) + } + + required init?(coder: NSCoder) { fatalError() } + + private func configure() { + translatesAutoresizingMaskIntoConstraints = false + wantsLayer = true + + card.translatesAutoresizingMaskIntoConstraints = false + card.wantsLayer = true + card.layer?.cornerRadius = 14 + card.layer?.borderWidth = 1 + card.layer?.borderColor = NSColor.separatorColor.cgColor + card.layer?.backgroundColor = NSColor.controlBackgroundColor.cgColor + addSubview(card) + + // Portrait poster (9:16) — gradient backdrop with a centered glyph. + poster.translatesAutoresizingMaskIntoConstraints = false + poster.wantsLayer = true + poster.layer?.cornerRadius = 10 + poster.layer?.masksToBounds = true + posterGradient.colors = [ + NSColor(calibratedRed: 0.42, green: 0.30, blue: 0.85, alpha: 1).cgColor, + NSColor(calibratedRed: 0.85, green: 0.30, blue: 0.55, alpha: 1).cgColor, + ] + posterGradient.startPoint = CGPoint(x: 0, y: 0) + posterGradient.endPoint = CGPoint(x: 1, y: 1) + poster.layer?.addSublayer(posterGradient) + card.addSubview(poster) + + glyph.translatesAutoresizingMaskIntoConstraints = false + glyph.imageScaling = .scaleProportionallyUpOrDown + glyph.contentTintColor = .white + glyph.image = NSImage(systemSymbolName: "play.circle.fill", accessibilityDescription: "Play story") + poster.addSubview(glyph) + + spinner.translatesAutoresizingMaskIntoConstraints = false + spinner.style = .spinning + spinner.controlSize = .regular + spinner.isDisplayedWhenStopped = false + spinner.appearance = NSAppearance(named: .darkAqua) + poster.addSubview(spinner) + + titleLabel.translatesAutoresizingMaskIntoConstraints = false + titleLabel.font = NSFont.systemFont(ofSize: 13, weight: .semibold) + titleLabel.maximumNumberOfLines = 2 + titleLabel.lineBreakMode = .byTruncatingTail + titleLabel.cell?.wraps = true + card.addSubview(titleLabel) + + subtitleLabel.translatesAutoresizingMaskIntoConstraints = false + subtitleLabel.font = NSFont.systemFont(ofSize: 11) + subtitleLabel.textColor = .secondaryLabelColor + subtitleLabel.maximumNumberOfLines = 1 + subtitleLabel.lineBreakMode = .byTruncatingTail + card.addSubview(subtitleLabel) + + errorLabel.translatesAutoresizingMaskIntoConstraints = false + errorLabel.font = NSFont.systemFont(ofSize: 11) + errorLabel.textColor = .systemRed + errorLabel.maximumNumberOfLines = 3 + errorLabel.cell?.wraps = true + errorLabel.isHidden = true + card.addSubview(errorLabel) + + retryButton.bezelStyle = .rounded + retryButton.controlSize = .small + retryButton.target = self + retryButton.action = #selector(retryTapped) + retryButton.translatesAutoresizingMaskIntoConstraints = false + retryButton.isHidden = true + card.addSubview(retryButton) + + // 132×235 poster ≈ 9:16; the whole card hugs that width. + let posterW: CGFloat = 132 + let posterH: CGFloat = 235 + NSLayoutConstraint.activate([ + card.topAnchor.constraint(equalTo: topAnchor), + card.leadingAnchor.constraint(equalTo: leadingAnchor), + card.trailingAnchor.constraint(equalTo: trailingAnchor), + card.bottomAnchor.constraint(equalTo: bottomAnchor), + + poster.topAnchor.constraint(equalTo: card.topAnchor, constant: 12), + poster.leadingAnchor.constraint(equalTo: card.leadingAnchor, constant: 12), + poster.trailingAnchor.constraint(equalTo: card.trailingAnchor, constant: -12), + poster.widthAnchor.constraint(equalToConstant: posterW), + poster.heightAnchor.constraint(equalToConstant: posterH), + + glyph.centerXAnchor.constraint(equalTo: poster.centerXAnchor), + glyph.centerYAnchor.constraint(equalTo: poster.centerYAnchor), + glyph.widthAnchor.constraint(equalToConstant: 44), + glyph.heightAnchor.constraint(equalToConstant: 44), + + spinner.centerXAnchor.constraint(equalTo: poster.centerXAnchor), + spinner.centerYAnchor.constraint(equalTo: poster.centerYAnchor), + + titleLabel.leadingAnchor.constraint(equalTo: poster.leadingAnchor), + titleLabel.trailingAnchor.constraint(equalTo: poster.trailingAnchor), + titleLabel.topAnchor.constraint(equalTo: poster.bottomAnchor, constant: 8), + + subtitleLabel.leadingAnchor.constraint(equalTo: titleLabel.leadingAnchor), + subtitleLabel.trailingAnchor.constraint(equalTo: titleLabel.trailingAnchor), + subtitleLabel.topAnchor.constraint(equalTo: titleLabel.bottomAnchor, constant: 2), + + errorLabel.leadingAnchor.constraint(equalTo: titleLabel.leadingAnchor), + errorLabel.trailingAnchor.constraint(equalTo: titleLabel.trailingAnchor), + errorLabel.topAnchor.constraint(equalTo: subtitleLabel.bottomAnchor, constant: 4), + + retryButton.leadingAnchor.constraint(equalTo: titleLabel.leadingAnchor), + retryButton.topAnchor.constraint(equalTo: errorLabel.bottomAnchor, constant: 8), + + card.bottomAnchor.constraint(greaterThanOrEqualTo: subtitleLabel.bottomAnchor, constant: 12), + card.bottomAnchor.constraint(greaterThanOrEqualTo: retryButton.bottomAnchor, constant: 12), + ]) + + let click = NSClickGestureRecognizer(target: self, action: #selector(cardTapped)) + poster.addGestureRecognizer(click) + } + + override func layout() { + super.layout() + posterGradient.frame = poster.bounds + } + + func update(attachment: StoryAttachment) { + self.attachment = attachment + titleLabel.stringValue = attachment.title.isEmpty ? "Story" : attachment.title + + switch attachment.status { + case .generating: + spinner.startAnimation(nil) + glyph.isHidden = true + subtitleLabel.stringValue = "Creating story…" + errorLabel.isHidden = true + retryButton.isHidden = true + case .ready: + spinner.stopAnimation(nil) + glyph.isHidden = false + subtitleLabel.stringValue = "Tap to play" + errorLabel.isHidden = true + retryButton.isHidden = true + case .failed: + spinner.stopAnimation(nil) + glyph.isHidden = true + subtitleLabel.stringValue = "Couldn't create story" + errorLabel.stringValue = attachment.failureReason ?? "Unknown error." + errorLabel.isHidden = false + retryButton.isHidden = false + } + } + + @objc private func cardTapped() { + guard attachment.status == .ready else { return } + onOpen(attachment) + } + + @objc private func retryTapped() { onRetry(attachment) } +} + +// MARK: - Full-screen player + +/// Hosts the rendered story HTML in a portrait window. Click the right 70% to +/// advance, the left 30% to go back; arrow keys + space also work. Drives the +/// same `window.StoryBridge` API as the iOS player. +final class StoryPlayerWindowController: NSWindowController { + private let fileURL: URL + private let webView: WKWebView + private let contentView: StoryPlayerContentView + + init(attachment: StoryAttachment) { + // Stories render at 1080×1920; show a half-ish-scale portrait window. + let size = NSSize(width: 405, height: 720) + self.fileURL = attachment.fileURL ?? URL(fileURLWithPath: "/dev/null") + + let config = WKWebViewConfiguration() + let web = WKWebView(frame: NSRect(origin: .zero, size: size), configuration: config) + web.setValue(false, forKey: "drawsBackground") + // 1080pt content scaled to the window width. + web.pageZoom = size.width / 1080.0 + self.webView = web + + let content = StoryPlayerContentView(frame: NSRect(origin: .zero, size: size)) + content.webView = web + web.translatesAutoresizingMaskIntoConstraints = false + content.addSubview(web) + NSLayoutConstraint.activate([ + web.leadingAnchor.constraint(equalTo: content.leadingAnchor), + web.trailingAnchor.constraint(equalTo: content.trailingAnchor), + web.topAnchor.constraint(equalTo: content.topAnchor), + web.bottomAnchor.constraint(equalTo: content.bottomAnchor), + ]) + self.contentView = content + + let window = NSWindow( + contentRect: NSRect(origin: .zero, size: size), + styleMask: [.titled, .closable, .fullSizeContentView], + backing: .buffered, + defer: false + ) + window.title = attachment.title.isEmpty ? "Story" : attachment.title + // The controller owns the window; without this AppKit would over- + // release it on close and crash under ARC. + window.isReleasedWhenClosed = false + window.titlebarAppearsTransparent = true + window.titleVisibility = .hidden + window.isMovableByWindowBackground = true + window.backgroundColor = .black + window.contentView = content + window.center() + window.contentAspectRatio = NSSize(width: 9, height: 16) + + super.init(window: window) + window.delegate = self + } + + required init?(coder: NSCoder) { fatalError() } + + func present() { + if FileManager.default.fileExists(atPath: fileURL.path) { + webView.loadFileURL(fileURL, allowingReadAccessTo: fileURL.deletingLastPathComponent()) + } + showWindow(nil) + window?.makeKeyAndOrderFront(nil) + window?.makeFirstResponder(contentView) + NSApp.activate(ignoringOtherApps: true) + } +} + +extension StoryPlayerWindowController: NSWindowDelegate { + func windowWillClose(_ notification: Notification) { + // Retained by ConversationWindowController; drop the reference there so + // the player + WKWebView deallocate. Posting is simplest given the + // controller is stored weakly-by-intent on the conversation window. + NotificationCenter.default.post(name: .storyPlayerDidClose, object: self) + } +} + +extension Notification.Name { + static let storyPlayerDidClose = Notification.Name("LoopMacStoryPlayerDidClose") +} + +/// First-responder content view that turns clicks + arrow keys into +/// StoryBridge navigation. Left 30% = back, right 70% = forward. +final class StoryPlayerContentView: NSView { + weak var webView: WKWebView? + + override var acceptsFirstResponder: Bool { true } + override func acceptsFirstMouse(for event: NSEvent?) -> Bool { true } + + override func mouseDown(with event: NSEvent) { + let x = convert(event.locationInWindow, from: nil).x + if x < bounds.width * 0.3 { + retreat() + } else { + advance() + } + } + + override func keyDown(with event: NSEvent) { + switch event.keyCode { + case 123: // left arrow + retreat() + case 124, 49: // right arrow, space + advance() + case 53: // esc + window?.performClose(nil) + default: + super.keyDown(with: event) + } + } + + private func advance() { + webView?.evaluateJavaScript("window.StoryBridge && window.StoryBridge.advance()", completionHandler: nil) + } + + private func retreat() { + webView?.evaluateJavaScript("window.StoryBridge && window.StoryBridge.retreat()", completionHandler: nil) + } +} diff --git a/LoopMac/VoiceLoopCoordinator.swift b/LoopMac/VoiceLoopCoordinator.swift index 57fc7f7..5a0e0e9 100644 --- a/LoopMac/VoiceLoopCoordinator.swift +++ b/LoopMac/VoiceLoopCoordinator.swift @@ -92,6 +92,12 @@ final class VoiceLoopCoordinator { var onEmptyTranscript: (() -> Void)? enum State { case idle, recording, transcribing, thinking, speaking } + + /// Which speech-to-text backend is driving the current (or most recent) + /// transcription session. `nil` before the first recording. + enum STTEngine { case deepgram, apple } + private(set) var activeSTTEngine: STTEngine? + private(set) var state: State = .idle { didSet { DispatchQueue.main.async { [weak self] in @@ -317,12 +323,14 @@ The current date and time is \(now). AppleSTT.requestAuthorization { [weak self] granted in guard let self = self else { return } if granted { + self.activeSTTEngine = .apple self.beginEngineApple() } else { print("⚠️ Speech recognition permission denied. Approve in System Settings → Privacy & Security → Speech Recognition.") } } } else if let key = Self.deepgramAPIKey { + activeSTTEngine = .deepgram beginEngine(apiKey: key) } } @@ -810,6 +818,9 @@ The current date and time is \(now). if ExaSkill.shared.handles(functionName: function.name) { ExaSkill.shared.handle(functionCall: function, completion: cont); return } + if SerpImageSearchSkill.shared.handles(functionName: function.name) { + SerpImageSearchSkill.shared.handle(functionCall: function, completion: cont); return + } if URLFetchSkill.shared.handles(functionName: function.name) { URLFetchSkill.shared.handle(functionCall: function, completion: cont); return } @@ -831,6 +842,18 @@ The current date and time is \(now). if LocationSkill.shared.handles(functionName: function.name) { LocationSkill.shared.handle(functionCall: function, completion: cont); return } + if MapsSkill.shared.handles(functionName: function.name) { + MapsSkill.shared.handle(functionCall: function, completion: cont); return + } + if GeocodingSkill.shared.handles(functionName: function.name) { + GeocodingSkill.shared.handle(functionCall: function, completion: cont); return + } + if NavigationSkill.shared.handles(functionName: function.name) { + NavigationSkill.shared.handle(functionCall: function, completion: cont); return + } + if MuniRealtimeSkill.shared.handles(functionName: function.name) { + MuniRealtimeSkill.shared.handle(functionCall: function, completion: cont); return + } if MacAppSkill.shared.handles(functionName: function.name) { MacAppSkill.shared.handle(functionCall: function, completion: cont); return } @@ -870,6 +893,14 @@ The current date and time is \(now). PDFSkill.shared.handle(functionCall: function, completion: cont) return } + if StorySkill.shared.handles(functionName: function.name) { + // Submit-and-return like Image/PDF: StorySkill hands the render to + // StoryGenerationService and returns a queued stub immediately; the + // card fills in via StorySkillHost on the conversation window when + // the WKWebView finishes rendering. + StorySkill.shared.handle(functionCall: function, completion: cont) + return + } if SubAgentSkill.shared.handles(functionName: function.name) { SubAgentSkill.shared.handle(functionCall: function, completion: cont) return @@ -880,6 +911,30 @@ The current date and time is \(now). if CursorSkill.shared.handles(functionName: function.name) { CursorSkill.shared.handle(functionCall: function, completion: cont); return } + if TwitterSkill.shared.handles(functionName: function.name) { + TwitterSkill.shared.handle(functionCall: function, completion: cont); return + } + if SSHSkill.shared.handles(functionName: function.name) { + SSHSkill.shared.handle(functionCall: function, completion: cont); return + } + if GoogleDriveSkill.shared.handles(functionName: function.name) { + GoogleDriveSkill.shared.handle(functionCall: function, completion: cont); return + } + if GoogleGmailSkill.shared.handles(functionName: function.name) { + GoogleGmailSkill.shared.handle(functionCall: function, completion: cont); return + } + if GoogleCalendarSkill.shared.handles(functionName: function.name) { + GoogleCalendarSkill.shared.handle(functionCall: function, completion: cont); return + } + if AgentMailSkill.shared.handles(functionName: function.name) { + AgentMailSkill.shared.handle(functionCall: function, completion: cont); return + } + // Remote MCP servers the user has installed — routed before the + // dynamic JS registry since both land in the same trailing section + // of the advertised tool schemas. + if MCPRegistry.shared.handles(functionName: function.name) { + MCPRegistry.shared.handle(functionCall: function, completion: cont); return + } // Dynamic (user-authored JS) skills last — hot-loaded so the // registry is the source of truth for what's currently available. if DynamicSkillRegistry.shared.handles(functionName: function.name) { @@ -892,6 +947,7 @@ The current date and time is \(now). private func statusText(for call: FunctionCallStruct) -> String { if let s = ExaSkill.shared.statusText(for: call) { return s } + if let s = SerpImageSearchSkill.shared.statusText(for: call) { return s } if let s = NotionSkill.shared.statusText(for: call) { return s } if let s = SlackSkill.shared.statusText(for: call) { return s } if let s = SchedulerSkill.shared.statusText(for: call) { return s } @@ -906,9 +962,21 @@ The current date and time is \(now). if let s = SkillBuilderSkill.shared.statusText(for: call) { return s } if let s = GitHubSkill.shared.statusText(for: call) { return s } if let s = PDFSkill.shared.statusText(for: call) { return s } + if let s = StorySkill.shared.statusText(for: call) { return s } if let s = SubAgentSkill.shared.statusText(for: call) { return s } if let s = DevinSkill.shared.statusText(for: call) { return s } if let s = CursorSkill.shared.statusText(for: call) { return s } + if let s = GoogleDriveSkill.shared.statusText(for: call) { return s } + if let s = GoogleGmailSkill.shared.statusText(for: call) { return s } + if let s = GoogleCalendarSkill.shared.statusText(for: call) { return s } + if let s = AgentMailSkill.shared.statusText(for: call) { return s } + if let s = MapsSkill.shared.statusText(for: call) { return s } + if let s = GeocodingSkill.shared.statusText(for: call) { return s } + if let s = NavigationSkill.shared.statusText(for: call) { return s } + if let s = MuniRealtimeSkill.shared.statusText(for: call) { return s } + if let s = TwitterSkill.shared.statusText(for: call) { return s } + if let s = SSHSkill.shared.statusText(for: call) { return s } + if let s = MCPRegistry.shared.statusText(for: call) { return s } if let s = DynamicSkillRegistry.shared.statusText(for: call) { return s } return "running \(call.name.replacingOccurrences(of: "_", with: " "))" } diff --git a/LoopVision/ConversationView.swift b/LoopVision/ConversationView.swift index 4049a05..e47bdfc 100644 --- a/LoopVision/ConversationView.swift +++ b/LoopVision/ConversationView.swift @@ -209,36 +209,53 @@ private struct FileAttachmentBubble: View { @Environment(\.openURL) private var openURL var body: some View { - HStack(alignment: .top, spacing: 12) { - Image(systemName: iconName) - .font(.title2) - .foregroundStyle(.secondary) - .frame(width: 32, height: 32) - VStack(alignment: .leading, spacing: 4) { - Text(attachment.fileName) - .font(.subheadline.weight(.semibold)) - .lineLimit(2) - Text(subtitle) - .font(.caption) + VStack(spacing: 0) { + HStack(alignment: .top, spacing: 12) { + Image(systemName: iconName) + .font(.title2) .foregroundStyle(.secondary) - if let snippet = snippetPreview, !snippet.isEmpty { - Text(snippet) - .font(.caption2) + .frame(width: 32, height: 32) + VStack(alignment: .leading, spacing: 4) { + Text(attachment.fileName) + .font(.subheadline.weight(.semibold)) + .lineLimit(2) + Text(subtitle) + .font(.caption) .foregroundStyle(.secondary) - .lineLimit(3) - .padding(.top, 2) + if let snippet = snippetPreview, !snippet.isEmpty { + Text(snippet) + .font(.caption2) + .foregroundStyle(.secondary) + .lineLimit(3) + .padding(.top, 2) + } + } + Spacer(minLength: 0) + } + .padding(14) + .contentShape(Rectangle()) + .onTapGesture { open() } + + if hasTextContent { + Divider() + HStack { + Spacer() + ShareLink(item: shareText) { + Image(systemName: "square.and.arrow.up") + .font(.caption) + .foregroundStyle(.secondary) + } + .buttonStyle(.plain) + .padding(.horizontal, 12) + .padding(.vertical, 6) } } - Spacer(minLength: 0) } - .padding(14) .background(.thinMaterial, in: RoundedRectangle(cornerRadius: 14, style: .continuous)) .overlay( RoundedRectangle(cornerRadius: 14, style: .continuous) .strokeBorder(Color.primary.opacity(0.08), lineWidth: 0.5) ) - .contentShape(Rectangle()) - .onTapGesture { open() } } private var iconName: String { @@ -276,6 +293,17 @@ private struct FileAttachmentBubble: View { } } + private var hasTextContent: Bool { + switch attachment.kind { + case .markdown, .text: return true + default: return false + } + } + + private var shareText: String { + (try? String(contentsOf: attachment.fileURL, encoding: .utf8)) ?? "" + } + private func open() { // SwiftUI's openURL handles file:// URLs on visionOS — the system // routes them to QuickLook (or a registered handler) without us diff --git a/LoopVision/VisionVoiceCoordinator.swift b/LoopVision/VisionVoiceCoordinator.swift index 0033a39..c7a7935 100644 --- a/LoopVision/VisionVoiceCoordinator.swift +++ b/LoopVision/VisionVoiceCoordinator.swift @@ -337,6 +337,7 @@ final class VisionVoiceCoordinator { audioEngine = nil deepgramSTT?.disconnect(); deepgramSTT = nil appleSTT?.cancel(); appleSTT = nil + deactivateAudioSession() } // MARK: - Transcript → Cloud → store → speak @@ -513,7 +514,10 @@ final class VisionVoiceCoordinator { // a late failure mid-stream). Do NOT fall back — that // would replay the whole reply on top of what was heard. self.speechToken = nil - if self.state == .speaking { self.state = .idle } + if self.state == .speaking { + self.state = .idle + self.deactivateAudioSession() + } } else { // True failure before any audio (bad key, blocked // network, Aura down) — the system voice is the safety net. @@ -530,7 +534,10 @@ final class VisionVoiceCoordinator { guard self.deepgramTTS === tts else { return } self.deepgramTTS = nil self.speechToken = nil - if self.state == .speaking { self.state = .idle } + if self.state == .speaking { + self.state = .idle + self.deactivateAudioSession() + } } } @@ -550,7 +557,21 @@ final class VisionVoiceCoordinator { } fileprivate func speechDidFinish() { - if state == .speaking { state = .idle } + if state == .speaking { + state = .idle + deactivateAudioSession() + } + } + + /// Deactivate the audio session with `.notifyOthersOnDeactivation` so + /// system media (Apple Music, Spotify, podcasts) auto-resumes after + /// Loop's voice activity ends. visionOS has AVAudioSession (unlike macOS). + private func deactivateAudioSession() { + do { + try AVAudioSession.sharedInstance().setActive(false, options: [.notifyOthersOnDeactivation]) + } catch { + // best-effort; session may already be inactive + } } // MARK: - Conversation selection (drives the split-view window) diff --git a/Secrets.xcconfig.example b/Secrets.xcconfig.example index 50408bd..af41d60 100644 --- a/Secrets.xcconfig.example +++ b/Secrets.xcconfig.example @@ -35,3 +35,42 @@ FIREWORKS_API_KEY = CURSOR_API_KEY = OBSIDIAN_API_KEY = OBSIDIAN_BASE_URL = +// AgentMail (agentmail.to) — lets Loop read its email inbox and send email +// with attachments. Get a key from agentmail.to. AGENT_MAIL_INBOX pins the +// sender inbox (e.g. loop_email@agentmail.to); leave it blank and Loop reuses +// your first inbox or creates one on first send. Both can also be set in-app +// at Settings → Keys → AgentMail. +AGENT_MAIL_API_KEY = +AGENT_MAIL_INBOX = + +// ───────────────────────────────────────────────────────────────────── +// SSH connection (optional) — bakes a default SSH connection into the +// build. On first launch (when no connection has been saved in-app yet) +// it's seeded into Settings → SSH, where it can be edited or deleted. +// Editing/deleting in-app wins; the seed only runs once. +// +// SSH_HOST + SSH_USERNAME are the minimum needed to seed a connection. +// +// The private key is multi-line, but xcconfig values are single-line, so +// pass it BASE64-ENCODED. xcconfig treats `//` as a comment, so encode +// with base64url (no `/`) to be safe: +// +// cat ~/.ssh/id_ed25519 | base64 | tr '+/' '-_' | tr -d '\n' +// +// Paste the one-line result as SSH_PRIVATE_KEY_B64. Standard base64 also +// works as long as it contains no `//`. +// ───────────────────────────────────────────────────────────────────── +SSH_HOST = +SSH_PORT = 22 +SSH_USERNAME = +SSH_NAME = +SSH_PRIVATE_KEY_B64 = +SSH_PASSPHRASE = + +// ───────────────────────────────────────────────────────────────────── +// Managed mode (optional). Set LOOP_FLAG to ANY non-empty value to put +// the build in "managed" mode (read via AppFlags.isManaged). The execution +// backend is then pinned — the navbar indicator shows "Managed" and can't +// be tapped/switched. Leave blank for normal behavior. +// ───────────────────────────────────────────────────────────────────── +LOOP_FLAG = diff --git a/runtime/go/agent/agent.go b/runtime/go/agent/agent.go index 2923e63..444f4e7 100644 --- a/runtime/go/agent/agent.go +++ b/runtime/go/agent/agent.go @@ -106,33 +106,55 @@ func buildToolDefs() []ToolDef { } } -// RunTurn executes a full agent turn: call LLM, dispatch tools, repeat until final message. -func (a *Agent) RunTurn(ctx context.Context, messages []Message, conversationID string, streamFn func(string)) (string, error) { +// PrepareTurn creates and persists a turn without running it, returning the new +// turn id. The async (handoff) path uses this to return the id to the client +// immediately, then runs the loop in the background via RunPreparedTurn. +// userID / conversationID are persisted with the turn (empty for interactive +// turns) so the async completion path can push back to the originating device. +func (a *Agent) PrepareTurn(messages []Message, userID, conversationID string) (string, error) { turnID := newID() - messagesJSON, _ := json.Marshal(messages) - if err := a.store.CreateTurn(turnID, messagesJSON); err != nil { + if err := a.store.CreateTurn(turnID, messagesJSON, userID, conversationID); err != nil { return "", fmt.Errorf("persisting turn: %w", err) } + return turnID, nil +} + +// RunTurn executes a full agent turn: call LLM, dispatch tools, repeat until final message. +func (a *Agent) RunTurn(ctx context.Context, messages []Message, userID, conversationID string, streamFn func(string)) (string, error) { + turnID, err := a.PrepareTurn(messages, userID, conversationID) + if err != nil { + return "", err + } + if err := a.RunPreparedTurn(ctx, turnID, messages, streamFn); err != nil { + return "", err + } + return turnID, nil +} +// RunPreparedTurn runs the agent loop for an already-created turn (see +// PrepareTurn). It drives the turn to completion and records the result in +// storage. Callers that ran PrepareTurn in a request handler can invoke this +// in a goroutine with a detached context so the turn survives client disconnect. +func (a *Agent) RunPreparedTurn(ctx context.Context, turnID string, messages []Message, streamFn func(string)) error { // The agent loop: call LLM, handle tool calls, repeat for { reader, err := a.client.ChatCompletionStream(ctx, messages, a.tools) if err != nil { a.store.CompleteTurn(turnID, "", err.Error()) - return "", fmt.Errorf("llm call: %w", err) + return fmt.Errorf("llm call: %w", err) } msg, err := reader.Collect(streamFn) if err != nil { a.store.CompleteTurn(turnID, "", err.Error()) - return "", fmt.Errorf("reading stream: %w", err) + return fmt.Errorf("reading stream: %w", err) } // If no tool calls, this is the final response if len(msg.ToolCalls) == 0 { a.store.CompleteTurn(turnID, msg.Content, "") - return turnID, nil + return nil } // Dispatch tool calls in parallel diff --git a/runtime/go/config.go b/runtime/go/config.go index a37a2a2..3de1e91 100644 --- a/runtime/go/config.go +++ b/runtime/go/config.go @@ -16,6 +16,11 @@ type Config struct { ModelAPIKey string `json:"model_api_key"` SharedSecret string `json:"shared_secret"` ListenPort int `json:"listen_port"` + + // PushSendURL is the central push backend endpoint the runner POSTs to when + // an async (handoff) turn completes, so the originating device gets an APNs + // alert. Defaults to the loopharness push service when unset. + PushSendURL string `json:"push_send_url"` } func LoadConfig(path string) (*Config, error) { @@ -30,5 +35,8 @@ func LoadConfig(path string) (*Config, error) { if cfg.ListenPort == 0 { cfg.ListenPort = 8080 } + if cfg.PushSendURL == "" { + cfg.PushSendURL = "https://dev.generalbackend.com/loopharness/push/send" + } return &cfg, nil } diff --git a/runtime/go/handlers.go b/runtime/go/handlers.go index 8cf9d3a..9a2f2c4 100644 --- a/runtime/go/handlers.go +++ b/runtime/go/handlers.go @@ -1,8 +1,11 @@ package main import ( + "bytes" + "context" "encoding/json" "fmt" + "log" "net/http" "strconv" "strings" @@ -15,21 +18,27 @@ import ( func authMiddleware(secret string, next http.HandlerFunc) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { - auth := r.Header.Get("Authorization") - if !strings.HasPrefix(auth, "Bearer ") || strings.TrimPrefix(auth, "Bearer ") != secret { - http.Error(w, `{"error":"unauthorized"}`, http.StatusUnauthorized) - return + // An empty shared_secret disables auth — convenient for a single-user + // runner reached only over a private SSH tunnel (no public exposure). + if secret != "" { + auth := r.Header.Get("Authorization") + if !strings.HasPrefix(auth, "Bearer ") || strings.TrimPrefix(auth, "Bearer ") != secret { + http.Error(w, `{"error":"unauthorized"}`, http.StatusUnauthorized) + return + } } next(w, r) } } // POST /turn -func handleTurn(ag *agent.Agent, store *storage.Store) http.HandlerFunc { +func handleTurn(ag *agent.Agent, store *storage.Store, pushSendURL string) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { var req struct { Messages []agent.Message `json:"messages"` ConversationID string `json:"conversation_id"` + UserID string `json:"user_id"` + Async bool `json:"async"` } if err := json.NewDecoder(r.Body).Decode(&req); err != nil { http.Error(w, fmt.Sprintf(`{"error":%q}`, err.Error()), http.StatusBadRequest) @@ -40,7 +49,32 @@ func handleTurn(ag *agent.Agent, store *storage.Store) http.HandlerFunc { return } - // Stream response via SSE + // Async (handoff) mode: persist the turn, return its id immediately, and + // run the agent loop in the background. The client (a backgrounding iOS + // app) disconnects right after the 202, so the loop MUST use a detached + // context — r.Context() is cancelled on disconnect. On completion we push + // an APNs alert back to the originating device via the central backend. + if req.Async { + turnID, err := ag.PrepareTurn(req.Messages, req.UserID, req.ConversationID) + if err != nil { + http.Error(w, fmt.Sprintf(`{"error":%q}`, err.Error()), http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusAccepted) + fmt.Fprintf(w, `{"id":%q}`, turnID) + + go func() { + runErr := ag.RunPreparedTurn(context.Background(), turnID, req.Messages, nil) + if runErr != nil { + log.Printf("async turn %s failed: %v", turnID, runErr) + } + sendCompletionPush(pushSendURL, store, turnID) + }() + return + } + + // Interactive mode: stream the response via SSE. w.Header().Set("Content-Type", "text/event-stream") w.Header().Set("Cache-Control", "no-cache") w.Header().Set("Connection", "keep-alive") @@ -50,7 +84,7 @@ func handleTurn(ag *agent.Agent, store *storage.Store) http.HandlerFunc { return } - turnID, err := ag.RunTurn(r.Context(), req.Messages, req.ConversationID, func(token string) { + turnID, err := ag.RunTurn(r.Context(), req.Messages, req.UserID, req.ConversationID, func(token string) { fmt.Fprintf(w, "data: %s\n\n", token) flusher.Flush() }) @@ -64,6 +98,75 @@ func handleTurn(ag *agent.Agent, store *storage.Store) http.HandlerFunc { } } +// sendCompletionPush notifies the originating device that an async turn finished. +// It reads the completed turn (for user_id, conversation_id, and the response +// text) and POSTs an alert to the central push backend, targeting the user. A +// turn with no user_id (interactive turn) is skipped. Best-effort: failures are +// logged, not surfaced. +func sendCompletionPush(pushSendURL string, store *storage.Store, turnID string) { + if pushSendURL == "" { + return + } + turn, err := store.GetTurn(turnID) + if err != nil { + log.Printf("completion push: turn %s not found: %v", turnID, err) + return + } + if turn.UserID == "" { + return // interactive turn — nobody to push to + } + + title := "Loop" + body := truncate(turn.FinalResponse, 180) + if turn.Error != "" { + body = "Your agent hit an error finishing this reply." + } else if body == "" { + body = "Your agent finished." + } + + payload := map[string]interface{}{ + "user_id": turn.UserID, + "title": title, + "body": body, + "data": map[string]interface{}{ + "type": "runner_turn", + "turn_id": turn.ID, + "conversation_id": turn.ConversationID, + }, + } + bodyJSON, err := json.Marshal(payload) + if err != nil { + log.Printf("completion push: marshal: %v", err) + return + } + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + req, err := http.NewRequestWithContext(ctx, "POST", pushSendURL, bytes.NewReader(bodyJSON)) + if err != nil { + log.Printf("completion push: new request: %v", err) + return + } + req.Header.Set("Content-Type", "application/json") + resp, err := http.DefaultClient.Do(req) + if err != nil { + log.Printf("completion push: POST %s: %v", pushSendURL, err) + return + } + defer resp.Body.Close() + if resp.StatusCode >= 300 { + log.Printf("completion push: backend returned %d for turn %s", resp.StatusCode, turnID) + } +} + +func truncate(s string, n int) string { + r := []rune(s) + if len(r) <= n { + return s + } + return string(r[:n]) +} + // POST /result func handleResult(brg *bridge.Bridge) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { @@ -140,12 +243,13 @@ func parsePollParams(r *http.Request) (since *time.Time, status string, limit in } type turnPollItem struct { - ID string `json:"id"` - CreatedAt time.Time `json:"created_at"` - UpdatedAt time.Time `json:"updated_at"` - Status string `json:"status"` - FinalResponse string `json:"final_response"` - Error string `json:"error,omitempty"` + ID string `json:"id"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + Status string `json:"status"` + FinalResponse string `json:"final_response"` + Error string `json:"error,omitempty"` + ConversationID string `json:"conversation_id,omitempty"` } type jobPollItem struct { @@ -173,12 +277,13 @@ func handleListTurns(store *storage.Store) http.HandlerFunc { items := make([]turnPollItem, len(turns)) for i, t := range turns { items[i] = turnPollItem{ - ID: t.ID, - CreatedAt: t.CreatedAt, - UpdatedAt: t.UpdatedAt, - Status: t.Status, - FinalResponse: t.FinalResponse, - Error: t.Error, + ID: t.ID, + CreatedAt: t.CreatedAt, + UpdatedAt: t.UpdatedAt, + Status: t.Status, + FinalResponse: t.FinalResponse, + Error: t.Error, + ConversationID: t.ConversationID, } } w.Header().Set("Content-Type", "application/json") diff --git a/runtime/go/integration_test.go b/runtime/go/integration_test.go index 58bcb76..2cc609c 100644 --- a/runtime/go/integration_test.go +++ b/runtime/go/integration_test.go @@ -47,6 +47,134 @@ data: [DONE] return agent.NewStreamReaderFromReader(io.NopCloser(strings.NewReader(sseData))), nil } +// simpleLLM returns a final message immediately (no tool calls). Safe for the +// async test where the loop runs once. +type simpleLLM struct{} + +func (m *simpleLLM) ChatCompletionStream(_ context.Context, _ []agent.Message, _ []agent.ToolDef) (*agent.StreamReader, error) { + sseData := "data: {\"choices\":[{\"delta\":{\"content\":\"hello there friend\"}}]}\n\ndata: [DONE]\n\n" + return agent.NewStreamReaderFromReader(io.NopCloser(strings.NewReader(sseData))), nil +} + +// TestAsyncHandoffTurn exercises the background-handoff path: POST /turn with +// async:true must return 202 + {id} immediately, run the turn in the background, +// and POST a completion push to the configured backend carrying user_id + +// data{type,turn_id,conversation_id}. +func TestAsyncHandoffTurn(t *testing.T) { + dbPath := fmt.Sprintf("/tmp/loop_async_test_%d.db", time.Now().UnixNano()) + defer os.Remove(dbPath) + + store, err := storage.New(dbPath) + if err != nil { + t.Fatalf("storage: %v", err) + } + defer store.Close() + + reg := registry.New(bridge.NewStubbed()) + localtools.Register(reg) + ag := agent.NewWithClient(&simpleLLM{}, reg, store) + + // Capture the completion push. + type pushed struct { + UserID string `json:"user_id"` + Title string `json:"title"` + Body string `json:"body"` + Data map[string]interface{} `json:"data"` + } + pushCh := make(chan pushed, 1) + pushSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var p pushed + json.NewDecoder(r.Body).Decode(&p) + pushCh <- p + w.Write([]byte(`{"operation":"success","sent":1}`)) + })) + defer pushSrv.Close() + + secret := "test-secret" + mux := http.NewServeMux() + mux.HandleFunc("POST /turn", authMiddleware(secret, handleTurn(ag, store, pushSrv.URL))) + mux.HandleFunc("GET /turn/{id}", authMiddleware(secret, handleGetTurn(store))) + srv := httptest.NewServer(mux) + defer srv.Close() + + // Submit async. + turnBody, _ := json.Marshal(map[string]interface{}{ + "async": true, + "user_id": "test-user-123", + "conversation_id": "conv-abc", + "messages": []map[string]string{{"role": "user", "content": "hi"}}, + }) + req, _ := http.NewRequest("POST", srv.URL+"/turn", bytes.NewReader(turnBody)) + req.Header.Set("Authorization", "Bearer "+secret) + req.Header.Set("Content-Type", "application/json") + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("async turn request: %v", err) + } + if resp.StatusCode != http.StatusAccepted { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("expected 202, got %d: %s", resp.StatusCode, body) + } + var accepted struct { + ID string `json:"id"` + } + json.NewDecoder(resp.Body).Decode(&accepted) + resp.Body.Close() + if accepted.ID == "" { + t.Fatal("expected turn id in 202 response") + } + + // The completion push must arrive with the right targeting + data. + select { + case p := <-pushCh: + if p.UserID != "test-user-123" { + t.Errorf("push user_id = %q, want test-user-123", p.UserID) + } + if p.Data["type"] != "runner_turn" { + t.Errorf("push data.type = %v, want runner_turn", p.Data["type"]) + } + if p.Data["turn_id"] != accepted.ID { + t.Errorf("push data.turn_id = %v, want %s", p.Data["turn_id"], accepted.ID) + } + if p.Data["conversation_id"] != "conv-abc" { + t.Errorf("push data.conversation_id = %v, want conv-abc", p.Data["conversation_id"]) + } + if !strings.Contains(p.Body, "hello there friend") { + t.Errorf("push body = %q, want it to contain the reply", p.Body) + } + case <-time.After(5 * time.Second): + t.Fatal("timed out waiting for completion push") + } + + // The persisted turn carries conversation_id + final_response. + req, _ = http.NewRequest("GET", srv.URL+"/turn/"+accepted.ID, nil) + req.Header.Set("Authorization", "Bearer "+secret) + resp, err = http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("get turn: %v", err) + } + defer resp.Body.Close() + var turn struct { + Status string `json:"status"` + FinalResponse string `json:"final_response"` + ConversationID string `json:"conversation_id"` + UserID string `json:"user_id"` + } + json.NewDecoder(resp.Body).Decode(&turn) + if turn.Status != "completed" { + t.Errorf("turn status = %q, want completed", turn.Status) + } + if turn.ConversationID != "conv-abc" { + t.Errorf("turn conversation_id = %q, want conv-abc", turn.ConversationID) + } + if turn.UserID != "test-user-123" { + t.Errorf("turn user_id = %q, want test-user-123", turn.UserID) + } + if !strings.Contains(turn.FinalResponse, "hello there friend") { + t.Errorf("turn final_response = %q", turn.FinalResponse) + } +} + func TestIntegrationEchoRoundTrip(t *testing.T) { // Setup storage with temp file dbPath := fmt.Sprintf("/tmp/loop_test_%d.db", time.Now().UnixNano()) @@ -77,7 +205,7 @@ func TestIntegrationEchoRoundTrip(t *testing.T) { w.Header().Set("Content-Type", "application/json") fmt.Fprintf(w, `{"ok":true,"version":"test","uptime_seconds":%.0f}`, time.Since(startTime).Seconds()) }) - mux.HandleFunc("POST /turn", authMiddleware(secret, handleTurn(ag, store))) + mux.HandleFunc("POST /turn", authMiddleware(secret, handleTurn(ag, store, ""))) mux.HandleFunc("GET /turn/{id}", authMiddleware(secret, handleGetTurn(store))) mux.HandleFunc("GET /job/{job_id}", authMiddleware(secret, handleGetJob(store))) diff --git a/runtime/go/main.go b/runtime/go/main.go index 58f60ef..a0740c1 100644 --- a/runtime/go/main.go +++ b/runtime/go/main.go @@ -54,6 +54,10 @@ func main() { localtools.Register(reg) devicetools.Register(reg) + if cfg.SharedSecret == "" { + log.Println("WARNING: shared_secret is empty — auth is DISABLED (only safe behind a private tunnel)") + } + // Agent ag := agent.New(cfg.ModelAPIKey, reg, store) @@ -68,7 +72,7 @@ func main() { }) // Auth-protected endpoints - mux.HandleFunc("POST /turn", authMiddleware(cfg.SharedSecret, handleTurn(ag, store))) + mux.HandleFunc("POST /turn", authMiddleware(cfg.SharedSecret, handleTurn(ag, store, cfg.PushSendURL))) mux.HandleFunc("POST /result", authMiddleware(cfg.SharedSecret, handleResult(brg))) mux.HandleFunc("GET /turn/{id}", authMiddleware(cfg.SharedSecret, handleGetTurn(store))) mux.HandleFunc("GET /job/{job_id}", authMiddleware(cfg.SharedSecret, handleGetJob(store))) diff --git a/runtime/go/polling_test.go b/runtime/go/polling_test.go index 0f685e2..374e97c 100644 --- a/runtime/go/polling_test.go +++ b/runtime/go/polling_test.go @@ -61,13 +61,13 @@ func TestPollTurnsSinceFilter(t *testing.T) { defer cleanup() // Insert turns at staggered timestamps - store.CreateTurn("t1", json.RawMessage(`[{"role":"user","content":"a"}]`)) + store.CreateTurn("t1", json.RawMessage(`[{"role":"user","content":"a"}]`), "", "") time.Sleep(10 * time.Millisecond) midpoint := time.Now().UTC() time.Sleep(10 * time.Millisecond) - store.CreateTurn("t2", json.RawMessage(`[{"role":"user","content":"b"}]`)) + store.CreateTurn("t2", json.RawMessage(`[{"role":"user","content":"b"}]`), "", "") time.Sleep(10 * time.Millisecond) - store.CreateTurn("t3", json.RawMessage(`[{"role":"user","content":"c"}]`)) + store.CreateTurn("t3", json.RawMessage(`[{"role":"user","content":"c"}]`), "", "") // Poll without since — should return all 3 result := doGet(t, srv, "/turns") @@ -96,9 +96,9 @@ func TestPollTurnsStatusFilter(t *testing.T) { store, srv, cleanup := setupPollingTest(t) defer cleanup() - store.CreateTurn("t1", json.RawMessage(`[]`)) - store.CreateTurn("t2", json.RawMessage(`[]`)) - store.CreateTurn("t3", json.RawMessage(`[]`)) + store.CreateTurn("t1", json.RawMessage(`[]`), "", "") + store.CreateTurn("t2", json.RawMessage(`[]`), "", "") + store.CreateTurn("t3", json.RawMessage(`[]`), "", "") store.CompleteTurn("t1", "done", "") store.CompleteTurn("t2", "", "oops") @@ -160,7 +160,7 @@ func TestPollTurnsLimit(t *testing.T) { defer cleanup() for i := 0; i < 5; i++ { - store.CreateTurn(fmt.Sprintf("t%d", i), json.RawMessage(`[]`)) + store.CreateTurn(fmt.Sprintf("t%d", i), json.RawMessage(`[]`), "", "") } result := doGet(t, srv, "/turns?limit=2") @@ -175,7 +175,7 @@ func TestPollJobsSinceFilter(t *testing.T) { store, srv, cleanup := setupPollingTest(t) defer cleanup() - store.CreateTurn("t1", json.RawMessage(`[]`)) + store.CreateTurn("t1", json.RawMessage(`[]`), "", "") store.CreateJob("j1", "t1", "echo", json.RawMessage(`{"text":"a"}`)) time.Sleep(10 * time.Millisecond) @@ -205,7 +205,7 @@ func TestPollJobsStatusFilter(t *testing.T) { store, srv, cleanup := setupPollingTest(t) defer cleanup() - store.CreateTurn("t1", json.RawMessage(`[]`)) + store.CreateTurn("t1", json.RawMessage(`[]`), "", "") store.CreateJob("j1", "t1", "echo", json.RawMessage(`{}`)) store.CreateJob("j2", "t1", "echo", json.RawMessage(`{}`)) store.CreateJob("j3", "t1", "echo", json.RawMessage(`{}`)) diff --git a/runtime/go/storage/storage.go b/runtime/go/storage/storage.go index a8a9001..b3b258b 100644 --- a/runtime/go/storage/storage.go +++ b/runtime/go/storage/storage.go @@ -4,6 +4,7 @@ import ( "database/sql" "encoding/json" "fmt" + "strings" "time" _ "modernc.org/sqlite" @@ -25,6 +26,13 @@ type Turn struct { MessagesJSON json.RawMessage `json:"messages_json"` FinalResponse string `json:"final_response"` Error string `json:"error,omitempty"` + // UserID / ConversationID identify the device + chat that a turn belongs + // to. They are only populated for async (handoff) turns — interactive SSE + // turns leave them empty. UserID lets the completion path push back to the + // originating device; ConversationID lets the client reconcile the reply + // into the right conversation. + UserID string `json:"user_id,omitempty"` + ConversationID string `json:"conversation_id,omitempty"` } type Job struct { @@ -85,19 +93,34 @@ func migrate(db *sql.DB) error { CREATE INDEX IF NOT EXISTS idx_turns_updated_at ON turns(updated_at); CREATE INDEX IF NOT EXISTS idx_jobs_updated_at ON jobs(updated_at); ` - _, err := db.Exec(schema) - return err + if _, err := db.Exec(schema); err != nil { + return err + } + + // Additive column migrations for already-deployed DBs. CREATE TABLE IF NOT + // EXISTS above never alters an existing table, so new columns must be added + // explicitly. SQLite has no "ADD COLUMN IF NOT EXISTS", so we tolerate the + // "duplicate column name" error to keep this idempotent. + for _, stmt := range []string{ + "ALTER TABLE turns ADD COLUMN user_id TEXT NOT NULL DEFAULT ''", + "ALTER TABLE turns ADD COLUMN conversation_id TEXT NOT NULL DEFAULT ''", + } { + if _, err := db.Exec(stmt); err != nil && !strings.Contains(err.Error(), "duplicate column name") { + return err + } + } + return nil } func (s *Store) Close() error { return s.db.Close() } -func (s *Store) CreateTurn(id string, messagesJSON json.RawMessage) error { +func (s *Store) CreateTurn(id string, messagesJSON json.RawMessage, userID, conversationID string) error { now := time.Now().UTC().Format(tsFormat) _, err := s.db.Exec( - "INSERT INTO turns (id, created_at, updated_at, status, messages_json) VALUES (?, ?, ?, 'running', ?)", - id, now, now, string(messagesJSON), + "INSERT INTO turns (id, created_at, updated_at, status, messages_json, user_id, conversation_id) VALUES (?, ?, ?, 'running', ?, ?, ?)", + id, now, now, string(messagesJSON), userID, conversationID, ) return err } @@ -115,10 +138,10 @@ func (s *Store) CompleteTurn(id, finalResponse, errMsg string) error { } func (s *Store) GetTurn(id string) (*Turn, error) { - row := s.db.QueryRow("SELECT id, created_at, updated_at, status, messages_json, final_response, error FROM turns WHERE id = ?", id) + row := s.db.QueryRow("SELECT id, created_at, updated_at, status, messages_json, final_response, error, user_id, conversation_id FROM turns WHERE id = ?", id) var t Turn var createdStr, updatedStr, messagesStr string - if err := row.Scan(&t.ID, &createdStr, &updatedStr, &t.Status, &messagesStr, &t.FinalResponse, &t.Error); err != nil { + if err := row.Scan(&t.ID, &createdStr, &updatedStr, &t.Status, &messagesStr, &t.FinalResponse, &t.Error, &t.UserID, &t.ConversationID); err != nil { return nil, fmt.Errorf("turn not found: %w", err) } t.CreatedAt, _ = time.Parse(tsFormat, createdStr) @@ -180,7 +203,7 @@ func (s *Store) GetJob(jobID string) (*Job, error) { // ListTurns returns turns optionally filtered by since time and status, // ordered by updated_at descending, capped at limit. func (s *Store) ListTurns(since *time.Time, status string, limit int) ([]Turn, error) { - query := "SELECT id, created_at, updated_at, status, final_response, error FROM turns WHERE 1=1" + query := "SELECT id, created_at, updated_at, status, final_response, error, conversation_id FROM turns WHERE 1=1" var args []interface{} if since != nil { query += " AND updated_at > ?" @@ -203,7 +226,7 @@ func (s *Store) ListTurns(since *time.Time, status string, limit int) ([]Turn, e for rows.Next() { var t Turn var createdStr, updatedStr string - if err := rows.Scan(&t.ID, &createdStr, &updatedStr, &t.Status, &t.FinalResponse, &t.Error); err != nil { + if err := rows.Scan(&t.ID, &createdStr, &updatedStr, &t.Status, &t.FinalResponse, &t.Error, &t.ConversationID); err != nil { return nil, fmt.Errorf("scanning turn: %w", err) } t.CreatedAt, _ = time.Parse(tsFormat, createdStr) diff --git a/specs/LOOP_FEED_CARDS_SPEC.md b/specs/LOOP_FEED_CARDS_SPEC.md new file mode 100644 index 0000000..71ad281 --- /dev/null +++ b/specs/LOOP_FEED_CARDS_SPEC.md @@ -0,0 +1,90 @@ +# Loop Feed + Cards — v1 Spec + +## Overview + +A new **Feed tab** in the side drawer surfaces AI-generated visual "cards" the +user can swipe through, keep, or archive. Cards are produced by a new +`generate_card` tool the agent calls during conversation. + +## 1. New Tool: `generate_card` + +Registered like other Loop tools. Inputs: + +| Field | Type | Required | Notes | +|-------|------|----------|-------| +| `kind` | `"image"` \| `"markdown"` | yes | Determines renderer | +| `title` | string | yes | Short (≤6 words) | +| `body` | string | yes | Content/subtitle | +| `image_prompt` | string | when kind=image | Vivid image generation prompt | +| `source` | string | no | Attribution | +| `tags` | string[] | no | Lowercase keywords | + +Output: a persisted Card (JSON at `workspace://cards/.json`). + +## 2. Card Schema + +```json +{ + "id": "uuid", + "kind": "image|markdown", + "title": "...", + "body": "...", + "image_url": "cards/assets/.png", + "source": "calendar", + "tags": ["morning", "routine"], + "created_at": "2026-06-14T05:00:00.000Z", + "state": "new|kept|archived" +} +``` + +Image assets: `workspace://cards/assets/.png` + +## 3. Pluggable Renderer Interface + +`CardRendering` protocol with `render(card:completion:)`. + +- **v1 `image` renderer**: pipes `image_prompt` through OpenAI image generation + at 4:3 landscape (1536×1024). +- **v1 `markdown` renderer**: renders title + body to a 4:3 poster PNG via + UIKit offscreen render. Dark background, clean typography, Loop-branded. + +Future backends (HTML→image, Higgsfield, vectors) conform and register without +changing the tool surface. + +## 4. Feed UX (no sidebar tab) + +The side drawer stays **Conversations | Files | Skills** — there is *no* Feed +tab. Cards surface in two places instead: + +**New-chat swipe stack** (`FeedCardStackView`, shown by `MainVC`): +- On a blank/new chat, the swipe stack *replaces* the hero orb (the orb steps + back to the nav bar). When the deck empties, the orb returns to the hero slot. +- Tinder-style: drag the top card, swipe **right → Keep**, **left → Archive**, + **tap → detail**. Cards behind peek out and rise as the top card flies off. +- The deck is dealt from `CardStore.feedCards` (`new` before `kept`, newest + first); `archived` excluded. A fresh deck is dealt each new chat. + +**Card detail** (`CardDetailViewController`): full poster, title, body, +metadata, Keep / Archive buttons. Presented modally (Done button) from the pill +or the stack; pushed when navigated. + +## 5. Pill Alert + +When `generate_card` completes mid-conversation, a lightweight pill appears: +"✨ new card". It **persists until tapped** (no auto-dismiss); multiple cards +stack downward. Tapping opens that card's **detail view**. + +## 6. Out of Scope (v1) + +- External sharing/export +- Multi-page/scrolling cards +- Heartbeat-driven proactive card generation + +## Key Results + +- (A) A new chat with cards opens to the swipe stack (orb in nav bar), not a + bare input. No Feed tab in the sidebar. +- (B) "Generate a card on my day tomorrow" → markdown card from calendar. +- (C) "Generate a card of teaching Leo loose-leash walking" → image card. +- (D) Swipe to Keep/Archive; kept cards persist + sync via workspace. +- (E) `renderCard` cleanly factored for future kinds. diff --git a/specs/music_mini_player_banner.md b/specs/music_mini_player_banner.md new file mode 100644 index 0000000..9f4b97d --- /dev/null +++ b/specs/music_mini_player_banner.md @@ -0,0 +1,47 @@ +# Music Mini-Player Banner + +## Overview + +Convert the existing top banner area in the chat view (currently used for running-agent indicators) into a horizontally scrollable collection view. Add a music mini-player item that appears when music is actively playing via the Apple Music integration. + +## States + +### Minimized (Pill) +A compact horizontal pill showing small album art, track title, artist, and a play/pause icon. Tap to expand. + +### Expanded (Card) +A larger square/rounded card with bigger album art, full track info, progress/scrubber, and full controls (play/pause, skip, minimize chevron). Expands inline or as an overlay within the chat view. + +## Gestures + +- Swipe down on expanded card → collapse to minimized pill +- Swipe away or tap dismiss → hide entirely +- Tap on minimized pill → expand to card + +## Data Integration + +Wire up to the existing music status system (`get_music_status` / `MusicController.shared.status()`) to feed current track info, artwork, and playback state. Reflect changes from Control Center. + +## Controls + +- Play/Pause/Skip commands back through `MusicController` +- Tap track/art to deep-link to Apple Music + +## Visibility Logic + +- Only show when music is playing or paused within last 5 minutes +- Auto-dismiss on stop +- Auto-minimize when voice recording begins +- Persist across conversation tabs if music is still playing + +## Design + +Feel like iOS Live Activity or Spotify mini-player — glanceable, ambient, lightweight. Should visually complement existing agent indicator pills. + +## Key Results + +- (A) Music pill appears in top banner alongside running-agent indicators when playing +- (B) Tap expands to full card with controls without leaving chat +- (C) Swipe down collapses back to pill; swipe away dismisses +- (D) Banner area scrolls horizontally when multiple agents + music coexist +- (E) Auto-hides on stop; auto-minimizes on voice record diff --git a/website/essay.html b/website/essay.html index 89ed134..b22bdfd 100644 --- a/website/essay.html +++ b/website/essay.html @@ -172,7 +172,7 @@ diff --git a/website/index.html b/website/index.html index 9705be9..173ad03 100644 --- a/website/index.html +++ b/website/index.html @@ -45,7 +45,7 @@ @@ -70,10 +70,10 @@