Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 36 additions & 4 deletions Examples/CoreMLLLMChat/CoreMLLLMChat/ChatView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,11 @@ struct ChatView: View {
if runner.isLoaded {
ToolbarItem(placement: .topBarTrailing) {
Menu("Bench") {
Button("5 min") { startBenchmark(minutes: 5) }
Button("10 min") { startBenchmark(minutes: 10) }
Button("30 min") { startBenchmark(minutes: 30) }
Button("2 min (speed)") { startBenchmark(minutes: 2) }
Button("5 min") { startBenchmark(minutes: 5) }
Button("15 min (power)") { startBenchmark(minutes: 15) }
Button("30 min") { startBenchmark(minutes: 30) }
Button("60 min") { startBenchmark(minutes: 60) }
}
.disabled(runner.isGenerating || benchmarkRunning)
}
Expand Down Expand Up @@ -565,16 +567,31 @@ struct ChatView: View {
let logLines = result.batteryLog.map { entry in
" \(String(format: "%5.0f", entry.0))s → \(Int(entry.1 * 100))%"
}.joined(separator: "\n")
let thermalLines = result.thermalTrajectory.map { s in
" \(String(format: "%5.0f", s.t))s → \(LLMRunner.thermalString(s.state)) bat=\(s.batteryLevel >= 0 ? "\(Int(s.batteryLevel * 100))%" : "?")"
}.joined(separator: "\n")
let ttf = result.timeToFair.map { "\(Int($0))s" } ?? "never"
let tts = result.timeToSerious.map { "\(Int($0))s" } ?? "never"
let mJ = result.mJPerToken
let mJStr = mJ > 0 ? String(format: "%.1f mJ/tok", mJ) : "n/a (gauge noise, need >=10 min run)"
let csvPath = saveBenchmarkCSV(result)
let csvLine = csvPath.map { "CSV : \($0)" } ?? "CSV : (save failed)"
let summary = """
[Benchmark RESULT]
Duration : \(Int(result.duration))s (\(String(format: "%.1f", result.duration / 60.0)) min)
Rounds : \(result.rounds)
Total tokens : \(result.totalTokens)
Avg tok/s : \(String(format: "%.2f", result.avgTokPerSec))
Battery : \(bs)% → \(be)% (Δ \(String(format: "%.2f", result.drainedPercent))%)
Drain rate : \(String(format: "%.3f", result.drainedPerMinute))%/min
Drain rate : \(String(format: "%.3f", result.drainedPerMinute))%/min (~\(String(format: "%.1f", result.drainedPerHour))%/hr)
Tokens/%SoC : \(String(format: "%.0f", result.tokensPerPercent))
Energy/token : \(mJStr)
Thermal : \(LLMRunner.thermalString(result.thermalStart)) → \(LLMRunner.thermalString(result.thermalEnd))\(abortNote)
Time→fair : \(ttf)
Time→serious : \(tts)
\(csvLine)
Thermal trajectory:
\(thermalLines)
Battery log:
\(logLines)
"""
Expand All @@ -589,6 +606,21 @@ struct ChatView: View {
}
}

private func saveBenchmarkCSV(_ result: LLMRunner.BenchmarkResult) -> String? {
let fm = FileManager.default
guard let docs = fm.urls(for: .documentDirectory, in: .userDomainMask).first else { return nil }
let ts = Int(Date().timeIntervalSince1970)
let url = docs.appendingPathComponent("bench-\(ts).csv")
do {
try result.csv().write(to: url, atomically: true, encoding: .utf8)
print("[Benchmark] CSV saved: \(url.path)")
return url.lastPathComponent
} catch {
print("[Benchmark] CSV save failed: \(error)")
return nil
}
}

private func verifyANE() {
messages.append(ChatMessage(role: .system, content: "Checking MLComputePlan device placement..."))
Task.detached(priority: .userInitiated) {
Expand Down
72 changes: 70 additions & 2 deletions Examples/CoreMLLLMChat/CoreMLLLMChat/LLMRunner.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1221,6 +1221,12 @@ final class LLMRunner {
var thermal: ProcessInfo.ThermalState
}

struct ThermalSample {
var t: TimeInterval
var state: ProcessInfo.ThermalState
var batteryLevel: Float
}

struct BenchmarkResult {
var duration: TimeInterval
var totalTokens: Int
Expand All @@ -1232,11 +1238,59 @@ final class LLMRunner {
var thermalEnd: ProcessInfo.ThermalState
var abortedThermal: Bool = false
var batteryLog: [(TimeInterval, Float)] = []
var thermalTrajectory: [LLMRunner.ThermalSample] = []

// iPhone 17 Pro nominal battery capacity. Override for other devices.
// Source: Apple spec sheet (14.03 Wh = 50508 J).
var batteryCapacityWh: Double = 14.03

var batteryDelta: Float { batteryStart - batteryEnd }
var drainedPercent: Double { Double(batteryDelta) * 100.0 }
var drainedPerMinute: Double { duration > 0 ? drainedPercent / (duration / 60.0) : 0 }
var drainedPerHour: Double { drainedPerMinute * 60.0 }
var tokensPerPercent: Double { drainedPercent > 0 ? Double(totalTokens) / drainedPercent : 0 }

/// Energy per decoded token in millijoules, derived from battery-gauge delta.
/// Coarse (1% gauge resolution); trust only for runs >= 10 min.
var mJPerToken: Double {
guard totalTokens > 0, drainedPercent > 0 else { return 0 }
let joules = drainedPercent / 100.0 * batteryCapacityWh * 3600.0
return joules * 1000.0 / Double(totalTokens)
}

var timeToFair: TimeInterval? {
thermalTrajectory.first { $0.state == .fair || $0.state == .serious || $0.state == .critical }?.t
}
var timeToSerious: TimeInterval? {
thermalTrajectory.first { $0.state == .serious || $0.state == .critical }?.t
}

func csv() -> String {
var lines = ["t_seconds,battery_pct,thermal_state,source"]
for s in thermalTrajectory {
let pct = s.batteryLevel >= 0 ? Int(s.batteryLevel * 100) : -1
lines.append("\(Int(s.t)),\(pct),\(LLMRunner.thermalString(s.state)),thermal")
}
for (t, lvl) in batteryLog {
let pct = lvl >= 0 ? Int(lvl * 100) : -1
lines.append("\(Int(t)),\(pct),,battery")
}
lines.append("")
lines.append("# summary")
lines.append("# duration_s=\(Int(duration))")
lines.append("# total_tokens=\(totalTokens)")
lines.append("# avg_tok_per_sec=\(String(format: "%.2f", avgTokPerSec))")
lines.append("# drained_percent=\(String(format: "%.2f", drainedPercent))")
lines.append("# drained_per_hour=\(String(format: "%.2f", drainedPerHour))")
lines.append("# mJ_per_token=\(String(format: "%.2f", mJPerToken))")
lines.append("# time_to_fair_s=\(timeToFair.map { String(Int($0)) } ?? "never")")
lines.append("# time_to_serious_s=\(timeToSerious.map { String(Int($0)) } ?? "never")")
lines.append("# thermal_start=\(LLMRunner.thermalString(thermalStart))")
lines.append("# thermal_end=\(LLMRunner.thermalString(thermalEnd))")
lines.append("# aborted_thermal=\(abortedThermal)")
lines.append("# battery_capacity_wh=\(batteryCapacityWh)")
return lines.joined(separator: "\n")
}
}

private static let benchmarkPrompt =
Expand All @@ -1258,6 +1312,10 @@ final class LLMRunner {
var abortedThermal = false
var batteryLog: [(TimeInterval, Float)] = [(0, startBat)]
var lastLoggedLevel = startBat
var thermalTrajectory: [ThermalSample] = [
ThermalSample(t: 0, state: startThermal, batteryLevel: startBat)
]
var nextThermalSampleAt: TimeInterval = 30
let prompt = ChatMessage(role: .user, content: Self.benchmarkPrompt)

func isThermalUnsafe() -> Bool {
Expand All @@ -1277,6 +1335,13 @@ final class LLMRunner {
batteryLog.append((elapsed, currentLevel))
lastLoggedLevel = currentLevel
}
if elapsed >= nextThermalSampleAt {
thermalTrajectory.append(ThermalSample(
t: elapsed,
state: ProcessInfo.processInfo.thermalState,
batteryLevel: currentLevel))
nextThermalSampleAt += 30
}
if totalTokens % 20 == 0 {
onProgress(BenchmarkProgress(
elapsed: elapsed, totalTokens: totalTokens, round: round,
Expand All @@ -1293,14 +1358,17 @@ final class LLMRunner {

let endTime = Date()
let endBat = UIDevice.current.batteryLevel
let endThermal = ProcessInfo.processInfo.thermalState
let dur = endTime.timeIntervalSince(startTime)
batteryLog.append((dur, endBat))
thermalTrajectory.append(ThermalSample(t: dur, state: endThermal, batteryLevel: endBat))
return BenchmarkResult(
duration: dur, totalTokens: totalTokens, rounds: round,
avgTokPerSec: dur > 0 ? Double(totalTokens) / dur : 0,
batteryStart: startBat, batteryEnd: endBat,
thermalStart: startThermal, thermalEnd: ProcessInfo.processInfo.thermalState,
abortedThermal: abortedThermal, batteryLog: batteryLog)
thermalStart: startThermal, thermalEnd: endThermal,
abortedThermal: abortedThermal, batteryLog: batteryLog,
thermalTrajectory: thermalTrajectory)
}
#endif

Expand Down
33 changes: 32 additions & 1 deletion docs/BENCHMARKING.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,38 @@ We report **peak** in the README performance table because that matches how comp
- Mid-decode: ~981 MB
- Headroom (`os_proc_available`): ~5 GB

## Energy (`J/tok`)
## Energy (`mJ/tok`, `%/hour`, thermal trajectory)

The sample app's **Bench** menu now exposes three presets aimed at
power reporting:

- **2 min (speed)** — quick peak tok/s check
- **15 min (power)** — minimum duration for a defensible `mJ/tok`
number given the iOS battery gauge's 1 % resolution
- **60 min** — long-haul thermal profile, useful for "will this
throttle in a real session" questions

After each run the app writes a CSV to `Documents/bench-<unix_ts>.csv`
with the per-30s thermal trajectory, battery log, and a `# summary`
block. The CSV filename is printed in the in-app result and to the
console. Retrieve via Files app (the target already has
document-sharing entitlements).

`BenchmarkResult` exposes:

- `mJPerToken` — `drainedPercent × batteryCapacityWh × 36000 / totalTokens`.
iPhone 17 Pro nominal capacity is 14.03 Wh; override
`batteryCapacityWh` for other devices.
- `drainedPerHour` — extrapolated from the run duration.
- `timeToFair`, `timeToSerious` — first elapsed second at which
`ProcessInfo.thermalState` transitioned.
- `thermalTrajectory` — array of `ThermalSample(t, state, batteryLevel)`
at 30-second intervals.

For the methodology, metric tiers, and head-to-head protocol against
other engines, see [POWER_BENCHMARK_PLAN.md](POWER_BENCHMARK_PLAN.md).

## Energy (`J/tok`) — legacy derivation

The ~0.07 J/tok figure in `docs/RESEARCH.md` is **derived**, not directly measured:

Expand Down
Loading
Loading