Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
cf0bf25
Fix multi-turn tool-calling checkpoint starvation for recurrent model…
claude Jul 2, 2026
7ddb792
patch 0005: extract checkpoint conditions into named booleans
claude Jul 3, 2026
926cd3c
server: add -b/-ub/-tb/-ctk/-ctv/--jinja/--chat-template-kwargs to Op…
claude Jul 3, 2026
3c8aeb8
server: add NativeServer — run the full llama.cpp server (WebUI) in-D…
claude Jul 3, 2026
8a1a68f
server: make NativeServer the default fat-jar Main-Class (keep OpenAi…
claude Jul 3, 2026
c288e43
server: add ServerLauncher — one fat-jar entry, pick mode via --open-…
claude Jul 3, 2026
b274520
server: simplify ServerLauncher dispatch to one primitive; rename fla…
claude Jul 3, 2026
8a4b1ff
Namespace the ServerLauncher selector flag as --jllama-openai-compat
claude Jul 3, 2026
58628fd
Upgrade llama.cpp from b9859 to b9862
claude Jul 3, 2026
e8abfc1
Surface model ftype (quantization) through the Java layer and /v1/models
claude Jul 3, 2026
14df14a
Add Linux Vulkan classifiers + Windows arm64 CPU to the build matrix
claude Jul 3, 2026
f76cb23
Emit a per-job sccache statistics table to the GitHub job summary
claude Jul 3, 2026
8eb55ff
Upgrade llama.cpp from b9862 to b9864
claude Jul 3, 2026
dca1b20
Expose sse_ping_interval + audited completion params on InferencePara…
claude Jul 3, 2026
5664957
CI fixes: SpotBugs suppressions, spirv-headers, clang-cl for Windows …
claude Jul 3, 2026
d9a6a83
Fix ArchUnit violations from the new server code (layering + no-sleep)
claude Jul 3, 2026
bfc766c
Windows arm64: disable OpenMP so the clang-cl build is self-contained
claude Jul 3, 2026
c6ac704
Upgrade llama.cpp from b9864 to b9866
claude Jul 3, 2026
19303d0
Automate llama.cpp version-bump target selection and chunking
claude Jul 3, 2026
2a84787
Upgrade llama.cpp from b9866 to b9867
claude Jul 3, 2026
215db79
Add REUSE license header to the version-bump runbook doc
claude Jul 3, 2026
890da0f
Upgrade llama.cpp from b9867 to b9870
claude Jul 4, 2026
fce103b
Add 8 GPU-backend classifiers: ROCm/HIP, SYCL, Win-arm64 OpenCL, Open…
claude Jul 4, 2026
4102826
Fix vendor-toolchain installs for the 4 failing GPU classifier jobs
claude Jul 4, 2026
3119efd
Fix GPU classifier build errors: HIP MSVC, SYCL/OpenVINO CRT, OpenVIN…
claude Jul 4, 2026
fb88b12
OpenVINO: bump to 2026.2.1 + fix OpenCL headers; raise cmake min to 3.22
claude Jul 4, 2026
c83bfe0
Linux OpenVINO: install 2026.2.1 from archive, not the (nonexistent) …
claude Jul 4, 2026
c0f2d1a
Add Linux s390x (big-endian) build with a qemu-user C++ test gate
claude Jul 4, 2026
9b0f50b
NativeServer.main: own the server in try/finally (SonarQube S2095)
claude Jul 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .github/build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,28 @@ REM was wired in as the launcher.
if defined LAUNCH (
echo build.bat: sccache --show-stats
sccache --show-stats
REM KISS per-job cache summary in the GitHub Actions job summary (like upstream llama.cpp's
REM ccache-action table). Parse the text stats: the top-level "Compile requests" line is the
REM total and the top-level "Cache hits" line is the hits (the per-language "Cache hits (C/C++)"
REM line has "(" after the label, so the digit-anchored findstr regex skips it). Only in CI
REM (GITHUB_STEP_SUMMARY set); local runs are untouched. Best-effort -- skipped if the two
REM numbers can't be parsed or there were no requests. Integer math with rounding to one decimal.
if defined GITHUB_STEP_SUMMARY (
set "SCC_REQ="
set "SCC_HITS="
for /f "tokens=3" %%a in ('sccache --show-stats 2^>nul ^| findstr /r /c:"^Compile requests *[0-9]"') do set "SCC_REQ=%%a"
for /f "tokens=3" %%a in ('sccache --show-stats 2^>nul ^| findstr /r /c:"^Cache hits *[0-9]"') do set "SCC_HITS=%%a"
if defined SCC_REQ if defined SCC_HITS if !SCC_REQ! gtr 0 (
set /a SCC_RATE10=^(!SCC_HITS! * 1000 + !SCC_REQ! / 2^) / !SCC_REQ!
set /a SCC_WHOLE=!SCC_RATE10! / 10
set /a SCC_DEC=!SCC_RATE10! %% 10
>>"%GITHUB_STEP_SUMMARY%" echo ### sccache statistics
>>"%GITHUB_STEP_SUMMARY%" echo.
>>"%GITHUB_STEP_SUMMARY%" echo ^| Cache hits ^| Requests ^| Hit rate ^|
>>"%GITHUB_STEP_SUMMARY%" echo ^|------------^|----------^|----------^|
>>"%GITHUB_STEP_SUMMARY%" echo ^| !SCC_HITS! ^| !SCC_REQ! ^| !SCC_WHOLE!.!SCC_DEC!%% ^|
)
)
)

REM Propagate a build failure as a non-zero exit (a prior bug let a failed `cmake
Expand Down
23 changes: 22 additions & 1 deletion .github/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -160,5 +160,26 @@ rm -f "$build_log"
# crashing sccache (or the mid-build retry disabled it), re-invoking it here would just repeat
# the crash output (harmless but noisy).
if [ -n "$LAUNCH" ] && command -v sccache >/dev/null 2>&1; then
sccache --show-stats || true
sccache_stats="$(sccache --show-stats 2>/dev/null || true)"
printf '%s\n' "$sccache_stats"
# KISS per-job cache summary in the GitHub Actions job summary (like upstream llama.cpp's
# ccache-action table). Parse the text stats: the top-level "Compile requests" line is the
# total and the top-level "Cache hits" line is the hits (the per-language "Cache hits (C/C++)"
# line has "(" after the label, so the digit-anchored regex skips it). Only runs in CI
# (GITHUB_STEP_SUMMARY set); local runs are untouched. Best-effort — skips silently if the two
# numbers can't be parsed or there were no requests.
if [ -n "${GITHUB_STEP_SUMMARY:-}" ] && [ -n "$sccache_stats" ]; then
sccache_req="$(printf '%s\n' "$sccache_stats" | awk '/^Compile requests[[:space:]]+[0-9]/{print $NF; exit}')"
sccache_hits="$(printf '%s\n' "$sccache_stats" | awk '/^Cache hits[[:space:]]+[0-9]/{print $NF; exit}')"
if [ -n "$sccache_req" ] && [ -n "$sccache_hits" ] && [ "$sccache_req" -gt 0 ] 2>/dev/null; then
sccache_rate="$(awk "BEGIN{printf \"%.1f\", ($sccache_hits/$sccache_req)*100}")"
{
echo "### sccache statistics"
echo ""
echo "| Cache hits | Requests | Hit rate |"
echo "|------------|----------|----------|"
echo "| ${sccache_hits} | ${sccache_req} | ${sccache_rate}% |"
} >> "$GITHUB_STEP_SUMMARY"
fi
fi
fi
123 changes: 123 additions & 0 deletions .github/scripts/llama-next-version.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/usr/bin/env bash
# SPDX-FileCopyrightText: 2026 Bernard Ladenthin <bernard.ladenthin@gmail.com>
#
# SPDX-License-Identifier: MIT
#
# Pick the NEXT llama.cpp tag to bump the pin to, one reviewable chunk at a time.
#
# The runbook this supports is docs/upgrade/llama-cpp-version-bump.md. Strategy:
# * TARGET = the topmost RELEASE on the GitHub releases page (read from the release atom feed),
# or an explicit "b<nnnn>" passed as $1.
# * CURRENT = the pinned tag in llama/CMakeLists.txt (GIT_TAG b<nnnn>).
# * If `git diff CURRENT..TARGET` is smaller than the threshold (default 100 KiB), bump straight
# to TARGET. Otherwise CHUNK: pick the largest intermediate b<nnnn> tag whose diff from CURRENT
# is still under the threshold, so each bump stays a small, reviewable patch. Re-run after each
# bump to walk the remaining chunks up to TARGET.
#
# This tool only READS (a cached mirror clone + the pin file); it never edits the repo. Apply the
# bump by hand per the runbook. It prints the compare/.patch URLs for the chosen step.
#
# Env:
# LLAMA_BUMP_MAX_DIFF_KB per-step diff-size threshold in KiB (default 100)
# LLAMA_BUMP_EXCLUDE_WEBUI if "1", size the diff EXCLUDING tools/ui (the auto-followed WebUI, which
# does not need per-bump review); default 0 = the full diff you paste/review
# LLAMA_BUMP_CACHE mirror-clone location (default ~/.cache/jllama-llamacpp-mirror)
#
# Network: needs read access to github.com (git clone/fetch + the release atom feed). No token.

set -euo pipefail

THRESHOLD_KB="${LLAMA_BUMP_MAX_DIFF_KB:-100}"
THRESHOLD=$((THRESHOLD_KB * 1024))
EXCLUDE_WEBUI="${LLAMA_BUMP_EXCLUDE_WEBUI:-0}"
REPO="ggml-org/llama.cpp"
GIT_URL="https://github.com/${REPO}.git"
CACHE="${LLAMA_BUMP_CACHE:-$HOME/.cache/jllama-llamacpp-mirror}"
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
CMAKELISTS="$ROOT/llama/CMakeLists.txt"

# --- current pinned tag number, e.g. "GIT_TAG b9866" -> 9866 -----------------------------------
cur="$(grep -oE 'GIT_TAG[[:space:]]+b[0-9]+' "$CMAKELISTS" | grep -oE '[0-9]+' | head -1 || true)"
[ -n "$cur" ] || { echo "ERROR: could not read 'GIT_TAG b<nnnn>' from $CMAKELISTS" >&2; exit 1; }

# --- cached blobless mirror of llama.cpp (clone once, then fetch tags) --------------------------
if [ -d "$CACHE/.git" ]; then
git -C "$CACHE" fetch --quiet --tags --prune origin || true
else
echo "cloning ${REPO} (blobless) into $CACHE (one-time) ..." >&2
git clone --filter=blob:none --no-checkout --quiet "$GIT_URL" "$CACHE"
fi

# --- target: explicit "$1" (b<nnnn>) or the latest RELEASE from the atom feed -------------------
if [ "${1:-}" != "" ]; then
target="$(printf '%s' "$1" | grep -oE '[0-9]+' | head -1)"
[ -n "$target" ] || { echo "ERROR: '$1' is not a b<nnnn> tag" >&2; exit 1; }
else
feed="$(curl -sSL --fail --retry 4 --retry-delay 2 "https://github.com/${REPO}/releases.atom" 2>/dev/null || true)"
[ -n "$feed" ] || { echo "ERROR: cannot fetch the releases feed (network/rate limit). Read the topmost release at https://github.com/${REPO}/releases and pass it: $0 b<nnnn>" >&2; exit 2; }
target="$(printf '%s' "$feed" | grep -oE 'releases/tag/b[0-9]+' | grep -oE '[0-9]+' | sort -un | tail -1)"
[ -n "$target" ] || { echo "ERROR: parsed no release tags from the feed." >&2; exit 3; }
fi

git -C "$CACHE" rev-parse -q --verify "b${cur}^{commit}" >/dev/null 2>&1 || { echo "ERROR: b$cur is not a tag in the mirror" >&2; exit 3; }
git -C "$CACHE" rev-parse -q --verify "b${target}^{commit}" >/dev/null 2>&1 || { echo "ERROR: b$target is not a tag in the mirror" >&2; exit 3; }

# diff byte size between two tag numbers, honoring the WebUI-exclusion toggle
diffsize() {
if [ "$EXCLUDE_WEBUI" = "1" ]; then
git -C "$CACHE" diff "b$1" "b$2" -- . ':(exclude)tools/ui' 2>/dev/null | wc -c
else
git -C "$CACHE" diff "b$1" "b$2" 2>/dev/null | wc -c
fi
}

scope="full diff"
[ "$EXCLUDE_WEBUI" = "1" ] && scope="diff excluding tools/ui"
echo "current pin : b$cur"
echo "latest release : b$target"
echo "threshold : ${THRESHOLD_KB} KiB per step (${scope})"

if [ "$cur" -ge "$target" ]; then
echo "=> up to date — no bump needed."
exit 0
fi

# --- choose next step: TARGET if it fits, else the largest intermediate tag under the threshold -
if [ "$(diffsize "$cur" "$target")" -lt "$THRESHOLD" ]; then
next="$target"
else
# existing b-tags strictly after cur, up to and including target, ascending
# shellcheck disable=SC2207
cands=($(git -C "$CACHE" tag -l 'b*' | grep -oE 'b[0-9]+' | grep -oE '[0-9]+' | sort -un \
| awk -v c="$cur" -v t="$target" '$1 > c && $1 <= t'))
# binary search for the largest candidate whose diff from cur is under the threshold
# (diff size grows monotonically enough with the tag number for this to be a safe heuristic)
lo=0; hi=$(( ${#cands[@]} - 1 )); best=""
while [ "$lo" -le "$hi" ]; do
mid=$(( (lo + hi) / 2 )); T="${cands[$mid]}"
if [ "$(diffsize "$cur" "$T")" -lt "$THRESHOLD" ]; then best="$T"; lo=$(( mid + 1 )); else hi=$(( mid - 1 )); fi
done
if [ -n "$best" ]; then
next="$best"
else
next="${cands[0]}"
echo "NOTE: even b$cur..b$next exceeds ${THRESHOLD_KB} KiB — a single-commit step this large is unavoidable." >&2
fi
fi

full=$(git -C "$CACHE" diff "b$cur" "b$next" | wc -c)
noui=$(git -C "$CACHE" diff "b$cur" "b$next" -- . ':(exclude)tools/ui' | wc -c)
commits=$(git -C "$CACHE" rev-list --count "b$cur".."b$next")
echo
echo "next step : b$cur -> b$next"
echo " diff size : $((full / 1024)) KiB full / $((noui / 1024)) KiB excluding tools/ui (auto-followed WebUI)"
echo " commits : $commits"
if [ "$next" -eq "$target" ]; then
echo " progress : reaches the latest release — final chunk"
else
echo " progress : intermediate chunk — re-run this script after the bump for the next one"
fi
echo " review diff : https://github.com/${REPO}/compare/b$cur...b$next"
echo " raw .patch : https://github.com/${REPO}/compare/b$cur...b$next.patch"
echo
echo "Apply this bump per docs/upgrade/llama-cpp-version-bump.md (b$cur -> b$next)."
Loading
Loading