diff --git a/CHANGELOG.md b/CHANGELOG.md index f5489c4..6782590 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on Keep a Changelog, and Esh follows Semantic Versioning. ## [Unreleased] +### Added +- Backend capability reports for MLX and llama.cpp runtime feature detection. +- Normalized prompt cache keys on new cache manifests for future cache lookup and reuse policy. + ## [0.1.37] - 2026-04-30 ### Added diff --git a/README.md b/README.md index 19c154b..383f2cc 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ Inspect required and optional engines: ### External callers -Use `esh capabilities` to get a JSON map of supported backends, installed models, and whether each path supports direct inference, cache build, and cache load. +Use `esh capabilities` to get a JSON map of supported backends, installed models, and whether each path supports direct inference, cache build, and cache load. Internally, backends also expose capability reports for runtime readiness and feature support; MLX currently reports direct inference, token streaming, and prompt cache build/load, while llama.cpp reports direct inference and token streaming with GGUF cache features marked unavailable. Use `esh infer` for machine-friendly inference. It returns JSON for both MLX and GGUF models, and MLX cache load stays optional rather than being the only supported integration path. @@ -184,7 +184,7 @@ Supported routes in v1: Notes: - unsupported request fields are ignored when safe -- `stream` is not supported yet +- `stream: true` is supported for OpenAI-compatible chat/responses and Anthropic-compatible messages; backend token streaming remains runtime-dependent - text inputs are supported for chat/responses in v1 - `/v1/models` includes installed text models only for strict OpenAI-compatible clients such as Xcode - `/v1/audio/models` returns the reusable MLX TTS model catalog with voices, languages, output formats, and capabilities so external agents can present and reuse voice choices @@ -509,6 +509,7 @@ Resume from a saved cache: Important: - cache artifacts are backend-specific - cache artifacts are model-specific +- new cache artifacts include a normalized prompt cache key that is backend-, model-, tokenizer-, runtime-, and tool-signature-aware - Esh reuses one cache pipeline, but artifacts are not portable across runtimes/models ## Typical Use Cases diff --git a/Sources/EshCore/Backends/GGUF/LlamaCppBackend.swift b/Sources/EshCore/Backends/GGUF/LlamaCppBackend.swift index 537c528..fdab795 100644 --- a/Sources/EshCore/Backends/GGUF/LlamaCppBackend.swift +++ b/Sources/EshCore/Backends/GGUF/LlamaCppBackend.swift @@ -4,9 +4,16 @@ public struct LlamaCppBackend: InferenceBackend, Sendable { public let kind: BackendKind = .gguf public let runtimeVersion: String public static let runtimeNotFoundMessage = "llama.cpp runtime not found. Install it with `brew install llama.cpp`, or set ESH_LLAMA_CPP_CLI to your `llama-cli` path." + private let executableResolver: @Sendable () throws -> URL - public init(runtimeVersion: String = "llama.cpp-cli-v1") { + public init( + runtimeVersion: String = "llama.cpp-cli-v1", + executableResolver: (@Sendable () throws -> URL)? = nil + ) { self.runtimeVersion = runtimeVersion + self.executableResolver = executableResolver ?? { + try LlamaCppBackend.defaultResolveExecutable() + } } public func loadRuntime(for install: ModelInstall) async throws -> BackendRuntime { @@ -26,6 +33,53 @@ public struct LlamaCppBackend: InferenceBackend, Sendable { return nil } + public func capabilityReport(for install: ModelInstall) -> BackendCapabilityReport { + var warnings: [String] = [] + var unavailable: [UnavailableBackendFeature] = [ + .init( + feature: .promptCacheBuild, + reason: "GGUF cache build is not supported by the llama.cpp backend yet." + ), + .init( + feature: .promptCacheLoad, + reason: "GGUF cache load is not supported by the llama.cpp backend yet." + ), + .init( + feature: .promptCacheBenchmark, + reason: "GGUF cache benchmarking hooks are not implemented yet." + ) + ] + + do { + _ = try locateModelFile(for: install) + _ = try resolveExecutable() + } catch { + let reason = error.localizedDescription + warnings.append(reason) + unavailable.append(.init(feature: .directInference, reason: reason)) + unavailable.append(.init(feature: .tokenStreaming, reason: reason)) + return BackendCapabilityReport( + backend: kind, + runtimeVersion: runtimeVersion, + ready: false, + supportedFeatures: [], + unavailableFeatures: unavailable, + warnings: warnings + ) + } + + return BackendCapabilityReport( + backend: kind, + runtimeVersion: runtimeVersion, + ready: true, + supportedFeatures: [ + .directInference, + .tokenStreaming + ], + unavailableFeatures: unavailable + ) + } + public func makeCompatibilityChecker(for install: ModelInstall) -> CompatibilityChecking { LlamaCppCompatibilityChecker(install: install, runtimeVersion: runtimeVersion) } @@ -53,6 +107,10 @@ public struct LlamaCppBackend: InferenceBackend, Sendable { } func resolveExecutable() throws -> URL { + try executableResolver() + } + + private static func defaultResolveExecutable() throws -> URL { let env = ProcessInfo.processInfo.environment let executable = URL(fileURLWithPath: CommandLine.arguments[0]).resolvingSymlinksInPath() let bundledCandidate = executable @@ -85,7 +143,6 @@ public struct LlamaCppBackend: InferenceBackend, Sendable { throw StoreError.invalidManifest(Self.runtimeNotFoundMessage) } - } private struct LlamaCppCompatibilityChecker: CompatibilityChecking, Sendable { diff --git a/Sources/EshCore/Backends/MLX/MLXBackend.swift b/Sources/EshCore/Backends/MLX/MLXBackend.swift index 462a18c..e4185d9 100644 --- a/Sources/EshCore/Backends/MLX/MLXBackend.swift +++ b/Sources/EshCore/Backends/MLX/MLXBackend.swift @@ -21,6 +21,44 @@ public struct MLXBackend: InferenceBackend, RemoteModelConfigValidating, Sendabl return MLXRuntime(bridge: bridge, install: install) } + public func capabilityReport(for install: ModelInstall) -> BackendCapabilityReport { + do { + _ = try locator.resolveModelPath(for: install) + return BackendCapabilityReport( + backend: kind, + runtimeVersion: runtimeVersion, + ready: true, + supportedFeatures: [ + .directInference, + .tokenStreaming, + .promptCacheBuild, + .promptCacheLoad + ], + unavailableFeatures: [ + UnavailableBackendFeature( + feature: .promptCacheBenchmark, + reason: "MLX prompt cache benchmarking is not exposed through the backend capability API yet." + ) + ] + ) + } catch { + let reason = error.localizedDescription + return BackendCapabilityReport( + backend: kind, + runtimeVersion: runtimeVersion, + ready: false, + supportedFeatures: [], + unavailableFeatures: [ + .init(feature: .directInference, reason: reason), + .init(feature: .tokenStreaming, reason: reason), + .init(feature: .promptCacheBuild, reason: reason), + .init(feature: .promptCacheLoad, reason: reason) + ], + warnings: [reason] + ) + } + } + public func validateChatModel(for install: ModelInstall) throws -> String? { let path = try locator.resolveModelPath(for: install) let response: MLXModelValidationResponse = try bridge.run( diff --git a/Sources/EshCore/Domain/BackendCapabilities.swift b/Sources/EshCore/Domain/BackendCapabilities.swift new file mode 100644 index 0000000..87179a6 --- /dev/null +++ b/Sources/EshCore/Domain/BackendCapabilities.swift @@ -0,0 +1,59 @@ +import Foundation + +public enum BackendRuntimeFeature: String, Codable, Hashable, Sendable, CaseIterable { + case directInference = "direct-inference" + case tokenStreaming = "token-streaming" + case promptCacheBuild = "prompt-cache-build" + case promptCacheLoad = "prompt-cache-load" + case promptCacheBenchmark = "prompt-cache-benchmark" + case toolMessages = "tool-messages" + case multimodalInput = "multimodal-input" +} + +public struct UnavailableBackendFeature: Codable, Hashable, Sendable { + public var feature: BackendRuntimeFeature + public var reason: String + + public init(feature: BackendRuntimeFeature, reason: String) { + self.feature = feature + self.reason = reason + } +} + +public struct BackendCapabilityReport: Codable, Hashable, Sendable { + public var backend: BackendKind + public var runtimeVersion: String + public var ready: Bool + public var supportedFeatures: [BackendRuntimeFeature] + public var unavailableFeatures: [UnavailableBackendFeature] + public var warnings: [String] + + public init( + backend: BackendKind, + runtimeVersion: String, + ready: Bool, + supportedFeatures: [BackendRuntimeFeature], + unavailableFeatures: [UnavailableBackendFeature] = [], + warnings: [String] = [] + ) { + self.backend = backend + self.runtimeVersion = runtimeVersion + self.ready = ready + self.supportedFeatures = orderedUnique(supportedFeatures) + self.unavailableFeatures = unavailableFeatures + self.warnings = orderedUnique(warnings) + } + + public func supports(_ feature: BackendRuntimeFeature) -> Bool { + supportedFeatures.contains(feature) + } + + public func unavailableFeature(_ feature: BackendRuntimeFeature) -> UnavailableBackendFeature? { + unavailableFeatures.first { $0.feature == feature } + } +} + +private func orderedUnique(_ values: [T]) -> [T] { + var seen: Set = [] + return values.filter { seen.insert($0).inserted } +} diff --git a/Sources/EshCore/Domain/CacheManifest.swift b/Sources/EshCore/Domain/CacheManifest.swift index a43ae0b..f010714 100644 --- a/Sources/EshCore/Domain/CacheManifest.swift +++ b/Sources/EshCore/Domain/CacheManifest.swift @@ -18,6 +18,7 @@ public struct CacheManifest: Codable, Hashable, Sendable { public var contextFileCount: Int? public var contextReused: Bool? public var policyReason: String? + public var promptCacheKey: PromptCacheKey? public init( backend: BackendKind, @@ -36,7 +37,8 @@ public struct CacheManifest: Codable, Hashable, Sendable { contextTaskFingerprint: String? = nil, contextFileCount: Int? = nil, contextReused: Bool? = nil, - policyReason: String? = nil + policyReason: String? = nil, + promptCacheKey: PromptCacheKey? = nil ) { self.backend = backend self.modelID = modelID @@ -55,5 +57,6 @@ public struct CacheManifest: Codable, Hashable, Sendable { self.contextFileCount = contextFileCount self.contextReused = contextReused self.policyReason = policyReason + self.promptCacheKey = promptCacheKey } } diff --git a/Sources/EshCore/Domain/PromptCacheKey.swift b/Sources/EshCore/Domain/PromptCacheKey.swift new file mode 100644 index 0000000..56e832f --- /dev/null +++ b/Sources/EshCore/Domain/PromptCacheKey.swift @@ -0,0 +1,89 @@ +import Foundation + +public struct PromptCacheKey: Codable, Hashable, Sendable { + public static let schemaVersion = "esh.prompt-cache-key.v1" + + public var schemaVersion: String + public var hash: String + public var backend: BackendKind + public var modelID: String + public var tokenizerID: String? + public var runtimeVersion: String + public var toolSignature: String + public var normalizedMessageCount: Int + + public init( + schemaVersion: String = PromptCacheKey.schemaVersion, + hash: String, + backend: BackendKind, + modelID: String, + tokenizerID: String? = nil, + runtimeVersion: String, + toolSignature: String, + normalizedMessageCount: Int + ) { + self.schemaVersion = schemaVersion + self.hash = hash + self.backend = backend + self.modelID = modelID + self.tokenizerID = tokenizerID + self.runtimeVersion = runtimeVersion + self.toolSignature = toolSignature + self.normalizedMessageCount = normalizedMessageCount + } + + static func make( + backend: BackendKind, + modelID: String, + tokenizerID: String?, + runtimeVersion: String, + toolSignature: String?, + messages: [Message] + ) -> PromptCacheKey { + let effectiveToolSignature = toolSignature?.trimmingCharacters(in: .whitespacesAndNewlines).nonEmpty + ?? "tools:none" + let payload = PromptCacheKeyPayload( + schemaVersion: schemaVersion, + backend: backend, + modelID: modelID, + tokenizerID: tokenizerID, + runtimeVersion: runtimeVersion, + toolSignature: effectiveToolSignature, + messages: messages.map { message in + PromptCacheKeyMessage(role: message.role, text: message.text) + } + ) + let data = (try? JSONCoding.encoder.encode(payload)) ?? Data() + let canonical = String(decoding: data, as: UTF8.self) + return PromptCacheKey( + hash: Fingerprint.sha256([canonical]), + backend: backend, + modelID: modelID, + tokenizerID: tokenizerID, + runtimeVersion: runtimeVersion, + toolSignature: effectiveToolSignature, + normalizedMessageCount: messages.count + ) + } +} + +private struct PromptCacheKeyPayload: Codable, Hashable, Sendable { + var schemaVersion: String + var backend: BackendKind + var modelID: String + var tokenizerID: String? + var runtimeVersion: String + var toolSignature: String + var messages: [PromptCacheKeyMessage] +} + +private struct PromptCacheKeyMessage: Codable, Hashable, Sendable { + var role: Message.Role + var text: String +} + +private extension String { + var nonEmpty: String? { + isEmpty ? nil : self + } +} diff --git a/Sources/EshCore/Protocols/InferenceBackend.swift b/Sources/EshCore/Protocols/InferenceBackend.swift index be3d17d..44c84ba 100644 --- a/Sources/EshCore/Protocols/InferenceBackend.swift +++ b/Sources/EshCore/Protocols/InferenceBackend.swift @@ -4,6 +4,19 @@ public protocol InferenceBackend: Sendable { var kind: BackendKind { get } var runtimeVersion: String { get } + func capabilityReport(for install: ModelInstall) -> BackendCapabilityReport func loadRuntime(for install: ModelInstall) async throws -> BackendRuntime func makeCompatibilityChecker(for install: ModelInstall) -> CompatibilityChecking } + +public extension InferenceBackend { + func capabilityReport(for install: ModelInstall) -> BackendCapabilityReport { + _ = install + return BackendCapabilityReport( + backend: kind, + runtimeVersion: runtimeVersion, + ready: true, + supportedFeatures: [.directInference] + ) + } +} diff --git a/Sources/EshCore/Services/CacheService.swift b/Sources/EshCore/Services/CacheService.swift index b006aff..f413c86 100644 --- a/Sources/EshCore/Services/CacheService.swift +++ b/Sources/EshCore/Services/CacheService.swift @@ -55,12 +55,20 @@ public struct CacheService: Sendable { let snapshot = try await runtime.exportRuntimeCache() let encodedSnapshot = try codec.encode(snapshot: snapshot) let compression = try await compressor.compress(encodedSnapshot) + let manifestRuntimeVersion = install.runtimeVersion ?? "mlx-vlm-0.4.3+mlx-lm-bridge-v2" + let promptCacheKey = PromptSessionNormalizer().promptCacheKey( + for: session, + backend: runtime.backend, + modelID: install.id, + tokenizerID: install.spec.tokenizerID, + runtimeVersion: manifestRuntimeVersion + ) let manifest = CacheManifest( backend: runtime.backend, modelID: install.id, tokenizerID: install.spec.tokenizerID, architectureFingerprint: install.spec.architectureFingerprint ?? Fingerprint.sha256([install.id, install.backendFormat]), - runtimeVersion: install.runtimeVersion ?? "mlx-vlm-0.4.3+mlx-lm-bridge-v2", + runtimeVersion: manifestRuntimeVersion, cacheFormatVersion: codec.formatVersion, compressorVersion: compressor.version, cacheMode: artifactMode ?? compressor.mode, @@ -71,7 +79,8 @@ public struct CacheService: Sendable { contextTaskFingerprint: context?.taskFingerprint, contextFileCount: context?.fileCount, contextReused: context?.reused, - policyReason: context?.policyReason + policyReason: context?.policyReason, + promptCacheKey: promptCacheKey ) let artifact = CacheArtifact( manifest: manifest, diff --git a/Sources/EshCore/Services/PromptSessionNormalizer.swift b/Sources/EshCore/Services/PromptSessionNormalizer.swift index 7dcbde6..e190aba 100644 --- a/Sources/EshCore/Services/PromptSessionNormalizer.swift +++ b/Sources/EshCore/Services/PromptSessionNormalizer.swift @@ -36,4 +36,23 @@ public struct PromptSessionNormalizer: Sendable { .joined(separator: "\n") .trimmingCharacters(in: CharacterSet.whitespacesAndNewlines) } + + public func promptCacheKey( + for session: ChatSession, + backend: BackendKind, + modelID: String, + tokenizerID: String?, + runtimeVersion: String, + toolSignature: String? = nil + ) -> PromptCacheKey { + let normalizedSession = normalized(session: session) + return PromptCacheKey.make( + backend: backend, + modelID: modelID, + tokenizerID: tokenizerID, + runtimeVersion: runtimeVersion, + toolSignature: toolSignature, + messages: normalizedSession.messages + ) + } } diff --git a/Tests/EshCoreTests/BackendCapabilityTests.swift b/Tests/EshCoreTests/BackendCapabilityTests.swift new file mode 100644 index 0000000..7bb0b4a --- /dev/null +++ b/Tests/EshCoreTests/BackendCapabilityTests.swift @@ -0,0 +1,102 @@ +import Foundation +import Testing +@testable import EshCore + +@Suite +struct BackendCapabilityTests { + @Test + func mlxReportsStreamingAndPromptCacheCapabilitiesWhenInstallPathExists() { + let installURL = temporaryDirectory() + let install = modelInstall(id: "qwen-mlx", backend: .mlx, installURL: installURL, backendFormat: "mlx") + + let report = MLXBackend(runtimeVersion: "mlx-test").capabilityReport(for: install) + + #expect(report.backend == .mlx) + #expect(report.runtimeVersion == "mlx-test") + #expect(report.ready) + #expect(report.supports(.directInference)) + #expect(report.supports(.tokenStreaming)) + #expect(report.supports(.promptCacheBuild)) + #expect(report.supports(.promptCacheLoad)) + } + + @Test + func llamaCppReportsStreamingButMarksPromptCacheUnavailable() throws { + let installURL = temporaryDirectory() + try Data().write(to: installURL.appendingPathComponent("model.gguf")) + let executableURL = try executable(named: "llama-cli") + let install = modelInstall(id: "qwen-gguf", backend: .gguf, installURL: installURL, backendFormat: "gguf") + let backend = LlamaCppBackend( + runtimeVersion: "llama-test", + executableResolver: { executableURL } + ) + + let report = backend.capabilityReport(for: install) + + #expect(report.backend == .gguf) + #expect(report.runtimeVersion == "llama-test") + #expect(report.ready) + #expect(report.supports(.directInference)) + #expect(report.supports(.tokenStreaming)) + #expect(!report.supports(.promptCacheBuild)) + #expect(!report.supports(.promptCacheLoad)) + #expect(report.unavailableFeature(.promptCacheBuild)?.reason.contains("GGUF cache") == true) + } + + @Test + func llamaCppCapabilityReportKeepsErrorMessageWhenRuntimeIsMissing() throws { + let installURL = temporaryDirectory() + try Data().write(to: installURL.appendingPathComponent("model.gguf")) + let install = modelInstall(id: "qwen-gguf", backend: .gguf, installURL: installURL, backendFormat: "gguf") + let backend = LlamaCppBackend( + runtimeVersion: "llama-test", + executableResolver: { + throw StoreError.invalidManifest("missing llama-cli") + } + ) + + let report = backend.capabilityReport(for: install) + + #expect(!report.ready) + #expect(report.warnings.contains { $0.contains("missing llama-cli") }) + #expect(!report.supports(.directInference)) + #expect(report.unavailableFeature(.directInference)?.reason.contains("missing llama-cli") == true) + } + + private func modelInstall( + id: String, + backend: BackendKind, + installURL: URL, + backendFormat: String + ) -> ModelInstall { + ModelInstall( + id: id, + spec: ModelSpec( + id: id, + displayName: id, + backend: backend, + source: ModelSource(kind: .localPath, reference: installURL.path), + tokenizerID: "tok-\(id)", + architectureFingerprint: "arch-\(id)" + ), + installPath: installURL.path, + sizeBytes: 42, + backendFormat: backendFormat + ) + } + + private func temporaryDirectory() -> URL { + let url = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString, isDirectory: true) + try? FileManager.default.createDirectory(at: url, withIntermediateDirectories: true) + return url + } + + private func executable(named name: String) throws -> URL { + let directory = temporaryDirectory() + let url = directory.appendingPathComponent(name) + try Data("#!/bin/sh\nexit 0\n".utf8).write(to: url) + try FileManager.default.setAttributes([.posixPermissions: 0o755], ofItemAtPath: url.path) + return url + } +} diff --git a/Tests/EshCoreTests/CacheManifestTests.swift b/Tests/EshCoreTests/CacheManifestTests.swift index 05ba11d..6d0d82e 100644 --- a/Tests/EshCoreTests/CacheManifestTests.swift +++ b/Tests/EshCoreTests/CacheManifestTests.swift @@ -20,3 +20,54 @@ func cacheManifestCapturesModeAndVersioning() { #expect(manifest.cacheMode == .turbo) #expect(manifest.compressorVersion == "turboquant-1") } + +@Test +func cacheManifestCarriesOptionalPromptCacheKey() throws { + let key = PromptCacheKey( + hash: "abc123", + backend: .mlx, + modelID: "demo", + tokenizerID: "tok", + runtimeVersion: "1.0", + toolSignature: "tools:none", + normalizedMessageCount: 2 + ) + let manifest = CacheManifest( + backend: .mlx, + modelID: "demo", + tokenizerID: "tok", + architectureFingerprint: "abc", + runtimeVersion: "1.0", + cacheFormatVersion: "v1", + cacheMode: .raw, + sessionID: UUID(), + sessionName: "default", + promptCacheKey: key + ) + + let data = try JSONCoding.encoder.encode(manifest) + let decoded = try JSONCoding.decoder.decode(CacheManifest.self, from: data) + + #expect(decoded.promptCacheKey == key) +} + +@Test +func cacheManifestDecodesWithoutPromptCacheKeyForBackwardCompatibility() throws { + let json = """ + { + "backend" : "mlx", + "modelID" : "demo", + "architectureFingerprint" : "abc", + "runtimeVersion" : "1.0", + "cacheFormatVersion" : "v1", + "cacheMode" : "raw", + "createdAt" : "2026-04-30T00:00:00Z", + "sessionID" : "11111111-1111-1111-1111-111111111111", + "sessionName" : "default" + } + """ + + let decoded = try JSONCoding.decoder.decode(CacheManifest.self, from: Data(json.utf8)) + + #expect(decoded.promptCacheKey == nil) +} diff --git a/Tests/EshCoreTests/PromptSessionNormalizerTests.swift b/Tests/EshCoreTests/PromptSessionNormalizerTests.swift index 2b4e564..cf12b63 100644 --- a/Tests/EshCoreTests/PromptSessionNormalizerTests.swift +++ b/Tests/EshCoreTests/PromptSessionNormalizerTests.swift @@ -42,3 +42,55 @@ func promptNormalizerDropsEmptyMessages() { #expect(normalized.messages.count == 1) #expect(normalized.messages.first?.text == "hello") } + +@Test +func promptCacheKeyStabilizesEquivalentSessionsAndIncludesModelAndTools() { + let normalizer = PromptSessionNormalizer() + let left = ChatSession( + id: UUID(uuidString: "11111111-1111-1111-1111-111111111111")!, + name: "demo", + messages: [ + Message(role: .system, text: " system line \r\n"), + Message(role: .user, text: " hello\t") + ] + ) + let right = ChatSession( + id: UUID(uuidString: "22222222-2222-2222-2222-222222222222")!, + name: "renamed", + messages: [ + Message(role: .system, text: "system line"), + Message(role: .user, text: "hello") + ] + ) + + let leftKey = normalizer.promptCacheKey( + for: left, + backend: .mlx, + modelID: "qwen", + tokenizerID: "tok", + runtimeVersion: "mlx-test", + toolSignature: "tools:none" + ) + let rightKey = normalizer.promptCacheKey( + for: right, + backend: .mlx, + modelID: "qwen", + tokenizerID: "tok", + runtimeVersion: "mlx-test", + toolSignature: "tools:none" + ) + let differentToolsKey = normalizer.promptCacheKey( + for: right, + backend: .mlx, + modelID: "qwen", + tokenizerID: "tok", + runtimeVersion: "mlx-test", + toolSignature: "tools:read_file@v1" + ) + + #expect(leftKey == rightKey) + #expect(leftKey.hash != differentToolsKey.hash) + #expect(leftKey.normalizedMessageCount == 2) + #expect(leftKey.backend == .mlx) + #expect(leftKey.modelID == "qwen") +} diff --git a/docs/PLANNING.md b/docs/PLANNING.md index 384cf3f..9305916 100644 --- a/docs/PLANNING.md +++ b/docs/PLANNING.md @@ -10,10 +10,14 @@ This file is the index for durable engineering notes, audits, and implementation ## Near-Term Implementation Queue 1. Prompt cache normalization -2. Tool loop safety guard -3. Structured tool replay log -4. Scenario-style agent workflow tests -5. Thin MCP boundary design note + - foundation landed: normalized, model-aware, backend-aware, tokenizer-aware, runtime-aware, and tool-aware cache keys are recorded on new cache manifests + - remaining: cache lookup/reuse policy that can select artifacts by prompt key +2. Runtime capability reporting + - foundation landed: MLX and llama.cpp expose additive backend capability reports for readiness, token streaming, and prompt cache feature support + - remaining: surface richer capability details through user-facing commands where useful +3. Structured execution/request logs +4. Scenario workflow tests for local model runs with mocked runtimes +5. Thin MCP/provider boundary design note, only for tool-call/provider integration points that directly support local execution ## Autonomous Coding Agent Roadmap diff --git a/docs/USAGE.md b/docs/USAGE.md index 14d8c64..a32d49f 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -126,6 +126,8 @@ Check engine readiness: `llama-cli` detection is passive: Esh checks `ESH_LLAMA_CPP_CLI`, `LLAMA_CPP_CLI`, Homebrew paths, and `PATH`, but does not install llama.cpp automatically. Optional engines such as `llamafile`, Ollama, Transformers, and `llama.cpp_server` are listed as detection/configuration adapters only until explicitly enabled and wired for routing. +Backend capability reports are additive and model-aware. MLX reports direct inference, token streaming, and prompt cache build/load when the install path is available. llama.cpp reports direct inference and token streaming when `llama-cli` and a GGUF file are available, and explicitly marks GGUF prompt cache build/load/benchmark support unavailable until those hooks exist. + Validate a local model path or installed model id: ```bash @@ -186,6 +188,7 @@ Notes: - direct inference works for both MLX and GGUF installs - `cacheArtifactID` is optional and keeps MLX cache-load as an extra capability, not the only integration path - `esh capabilities` reports which backends and installed models support direct inference versus cache build/load +- cache artifacts created from normalized prompts now carry a deterministic prompt cache key for future cache lookup and reuse policy ## 3. Find and Install Models diff --git a/docs/superpowers/plans/2026-04-30-esh-orchestrator-runtime.md b/docs/superpowers/plans/2026-04-30-esh-orchestrator-runtime.md index 5cdbfa7..358760f 100644 --- a/docs/superpowers/plans/2026-04-30-esh-orchestrator-runtime.md +++ b/docs/superpowers/plans/2026-04-30-esh-orchestrator-runtime.md @@ -19,11 +19,11 @@ - Modify: `Sources/esh/main.swift` - Test: `Tests/EshCoreTests/OrchestratorConfigTests.swift` -- [ ] Write failing tests for default TOML, parse round-trip, `config init`, `config show`, and `config path`. -- [ ] Implement `EshConfig` with conservative TOML parsing/writing for the roadmap keys only. -- [ ] Implement `EshConfigStore` rooted at `PersistenceRoot.rootURL/config.toml`. -- [ ] Add `ConfigCommand` and route `esh config init|show|path`. -- [ ] Run targeted config tests. +- [x] Write failing tests for default TOML, parse round-trip, `config init`, `config show`, and `config path`. +- [x] Implement `EshConfig` with conservative TOML parsing/writing for the roadmap keys only. +- [x] Implement `EshConfigStore` rooted at `PersistenceRoot.rootURL/config.toml`. +- [x] Add `ConfigCommand` and route `esh config init|show|path`. +- [x] Run targeted config tests. ### Task 2: Engine Detection Surface @@ -36,12 +36,12 @@ - Test: `Tests/EshCoreTests/EngineOrchestratorTests.swift` - Test: `Tests/EshUITests/OrchestratorCommandTests.swift` -- [ ] Write failing tests for passive `llama-cli` detection, disabled optional engines, MLX doctor failure reporting, and no Homebrew install attempt from `LlamaCppBackend`. -- [ ] Implement required engine statuses for `llama.cpp` and `mlx`. -- [ ] Implement optional detection/config status for `llamafile`, `ollama`, `transformers`, and `llama.cpp_server`. -- [ ] Update `esh doctor` and add `esh engines list|doctor`. -- [ ] Remove automatic `brew install llama.cpp` fallback from runtime resolution. -- [ ] Run targeted engine tests. +- [x] Write failing tests for passive `llama-cli` detection, disabled optional engines, MLX doctor failure reporting, and no Homebrew install attempt from `LlamaCppBackend`. +- [x] Implement required engine statuses for `llama.cpp` and `mlx`. +- [x] Implement optional detection/config status for `llamafile`, `ollama`, `transformers`, and `llama.cpp_server`. +- [x] Update `esh doctor` and add `esh engines list|doctor`. +- [x] Remove automatic `brew install llama.cpp` fallback from runtime resolution. +- [x] Run targeted engine tests. ### Task 3: Local Model Validation @@ -53,12 +53,12 @@ - Test: `Tests/EshCoreTests/LocalModelValidationTests.swift` - Test: `Tests/EshUITests/OrchestratorCommandTests.swift` -- [ ] Write failing tests for GGUF file validation, MLX directory validation, engine filtering, JSON output, and missing dependency suggestions. -- [ ] Implement local path and installed-model resolution. -- [ ] Detect GGUF files and MLX directories without loading model weights. -- [ ] Report compatible engines, ready engine selection, warnings, and suggested fixes. -- [ ] Wire `esh validate [--engine llama.cpp|mlx] [--json]`. -- [ ] Run targeted validation tests. +- [x] Write failing tests for GGUF file validation, MLX directory validation, engine filtering, JSON output, and missing dependency suggestions. +- [x] Implement local path and installed-model resolution. +- [x] Detect GGUF files and MLX directories without loading model weights. +- [x] Report compatible engines, ready engine selection, warnings, and suggested fixes. +- [x] Wire `esh validate [--engine llama.cpp|mlx] [--json]`. +- [x] Run targeted validation tests. ### Task 4: Docs, Verification, Publish @@ -68,8 +68,8 @@ - Modify: `CHANGELOG.md` - Modify: `VERSION` only during release. -- [ ] Document the orchestrator commands and passive engine behavior. -- [ ] Run `swift test`. -- [ ] Run CLI smoke checks for `config`, `engines`, `doctor`, and `validate`. -- [ ] Run package smoke/release checks. -- [ ] Stage only in-scope files, commit, push branch, and create a release tag/artifact when verification passes. +- [x] Document the orchestrator commands and passive engine behavior. +- [x] Run `swift test`. +- [x] Run CLI smoke checks for `config`, `engines`, `doctor`, and `validate`. +- [x] Run package smoke/release checks. +- [x] Stage only in-scope files, commit, push branch, and create a release tag/artifact when verification passes. diff --git a/docs/superpowers/plans/2026-04-30-esh-runtime-foundations.md b/docs/superpowers/plans/2026-04-30-esh-runtime-foundations.md new file mode 100644 index 0000000..7519d0b --- /dev/null +++ b/docs/superpowers/plans/2026-04-30-esh-runtime-foundations.md @@ -0,0 +1,53 @@ +# Esh Runtime Foundations Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add small orchestration-focused foundations for backend capability detection and normalized prompt cache identities. + +**Architecture:** Keep esh as a local runtime orchestrator. Add additive domain types and protocol defaults so MLX and llama.cpp can report readiness and supported execution features without loading model weights. Add deterministic prompt cache keys to cache manifests while preserving compatibility with older manifests. + +**Tech Stack:** SwiftPM, Swift Testing, Foundation, existing EshCore backend/cache services. + +--- + +### Task 1: Backend Capability Reports + +**Files:** +- Create: `Sources/EshCore/Domain/BackendCapabilities.swift` +- Modify: `Sources/EshCore/Protocols/InferenceBackend.swift` +- Modify: `Sources/EshCore/Backends/MLX/MLXBackend.swift` +- Modify: `Sources/EshCore/Backends/GGUF/LlamaCppBackend.swift` +- Test: `Tests/EshCoreTests/BackendCapabilityTests.swift` + +- [x] Write failing tests for MLX and llama.cpp feature reporting. +- [x] Add stable feature enums and a capability report type. +- [x] Add a default protocol method so existing test backends remain source-compatible. +- [x] Implement MLX capability reporting for direct inference, token streaming, and prompt cache build/load. +- [x] Implement llama.cpp capability reporting for direct inference and token streaming, with cache features marked unavailable. +- [x] Run targeted capability tests. + +### Task 2: Prompt Cache Normalization + +**Files:** +- Create: `Sources/EshCore/Domain/PromptCacheKey.swift` +- Modify: `Sources/EshCore/Services/PromptSessionNormalizer.swift` +- Modify: `Sources/EshCore/Domain/CacheManifest.swift` +- Modify: `Sources/EshCore/Services/CacheService.swift` +- Test: `Tests/EshCoreTests/PromptSessionNormalizerTests.swift` +- Test: `Tests/EshCoreTests/CacheManifestTests.swift` + +- [x] Write failing tests for deterministic, model-aware, backend-aware, tool-aware prompt cache keys. +- [x] Add a canonical prompt cache key payload and SHA-256 hashing. +- [x] Include the key on new cache manifests as an optional field for backward compatibility. +- [x] Run targeted prompt/cache tests. + +### Task 3: Documentation And Verification + +**Files:** +- Modify: `docs/PLANNING.md` +- Modify: `docs/USAGE.md` +- Modify: `README.md` + +- [x] Document backend capability reports and normalized prompt cache keys. +- [x] Run `swift test`. +- [x] Commit as `feat: add runtime capability and prompt cache foundations`.