From 1b8ea195282eab3a2567f197a5859a08d22cd1bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Andr=C3=A9=20Gomes=20Marques?= Date: Tue, 16 Jun 2026 19:16:09 +0200 Subject: [PATCH] Add sub-1.5bpe frontier + entropy-bpe Kaggle kernels and result sidecars (Mistral/Llama/Qwen3 + Mistral NIAH) Lands four reproducibility kernels + six result JSONs: - nq_mistral_subbpe: Mistral-7B-Inst-v0.3 AQUA-iso PPL + live zstd-L22/Shannon bpe (K3V2 to K1V1 pb=0) - nq_llama31_subbpe: Llama-3.1-8B-Inst same protocol; rope_scaling propagated via prepare_rope_scaling() - nq_qwen3_subbpe_entropy: Qwen3-8B (NF4 weights) K3V2/K2V2 pb=0/pb=1 with entropy-coded bpe measurement - nq_mistral_niah_frontier: Mistral-7B-Inst-v0.3 chat-template NIAH at 4K+8K across six quant configs Result sidecars in experiments/kaggle/results/ confirm the sub-1.5bpe PPL frontier and NIAH cliff data cited in the paper. All kernels use ungated model mirrors; no HF_TOKEN required. --- .../nq_llama31_subbpe/kernel-metadata.json | 20 + .../nq_llama31_subbpe/nq_llama31_subbpe.py | 602 ++++++ .../kernel-metadata.json | 20 + .../nq_mistral_niah_frontier.py | 445 +++++ .../nq_mistral_subbpe/kernel-metadata.json | 20 + .../nq_mistral_subbpe/nq_mistral_subbpe.py | 597 ++++++ .../kernel-metadata.json | 20 + .../nq_qwen3_subbpe_entropy.py | 611 ++++++ .../kaggle/results/nq_hqmq_qwen3_v3.json | 621 ++++++ .../kaggle/results/nq_llama31_subbpe.json | 1003 ++++++++++ .../results/nq_mistral_niah_frontier.json | 621 ++++++ .../results/nq_mistral_subbpe_frontier.json | 1094 +++++++++++ .../results/nq_qwen3_subbpe_entropy.json | 1031 ++++++++++ .../kaggle/results/nq_yi_subbpe_niah.json | 1736 +++++++++++++++++ 14 files changed, 8441 insertions(+) create mode 100644 experiments/kaggle/nq_llama31_subbpe/kernel-metadata.json create mode 100644 experiments/kaggle/nq_llama31_subbpe/nq_llama31_subbpe.py create mode 100644 experiments/kaggle/nq_mistral_niah_frontier/kernel-metadata.json create mode 100644 experiments/kaggle/nq_mistral_niah_frontier/nq_mistral_niah_frontier.py create mode 100644 experiments/kaggle/nq_mistral_subbpe/kernel-metadata.json create mode 100644 experiments/kaggle/nq_mistral_subbpe/nq_mistral_subbpe.py create mode 100644 experiments/kaggle/nq_qwen3_subbpe_entropy/kernel-metadata.json create mode 100644 experiments/kaggle/nq_qwen3_subbpe_entropy/nq_qwen3_subbpe_entropy.py create mode 100644 experiments/kaggle/results/nq_hqmq_qwen3_v3.json create mode 100644 experiments/kaggle/results/nq_llama31_subbpe.json create mode 100644 experiments/kaggle/results/nq_mistral_niah_frontier.json create mode 100644 experiments/kaggle/results/nq_mistral_subbpe_frontier.json create mode 100644 experiments/kaggle/results/nq_qwen3_subbpe_entropy.json create mode 100644 experiments/kaggle/results/nq_yi_subbpe_niah.json diff --git a/experiments/kaggle/nq_llama31_subbpe/kernel-metadata.json b/experiments/kaggle/nq_llama31_subbpe/kernel-metadata.json new file mode 100644 index 0000000..f76aedc --- /dev/null +++ b/experiments/kaggle/nq_llama31_subbpe/kernel-metadata.json @@ -0,0 +1,20 @@ +{ + "id": "jagmardrop/nq-llama31-subbpe", + "title": "nq-llama31-subbpe", + "code_file": "nq_llama31_subbpe.py", + "language": "python", + "kernel_type": "script", + "is_private": true, + "enable_gpu": true, + "enable_tpu": false, + "enable_internet": true, + "keywords": [ + "gpu" + ], + "dataset_sources": [], + "kernel_sources": [], + "competition_sources": [], + "model_sources": [], + "docker_image": "gcr.io/kaggle-private-byod/python@sha256:00377cd1b3d470a605bc5b0ceca79969e369644e9b36802242a1c70e627372f9", + "machine_shape": "NvidiaTeslaT4" +} diff --git a/experiments/kaggle/nq_llama31_subbpe/nq_llama31_subbpe.py b/experiments/kaggle/nq_llama31_subbpe/nq_llama31_subbpe.py new file mode 100644 index 0000000..2ad1b08 --- /dev/null +++ b/experiments/kaggle/nq_llama31_subbpe/nq_llama31_subbpe.py @@ -0,0 +1,602 @@ +# NexusQuant Llama-3.1-8B-Instruct sub-1.5bpe frontier sweep. +# Metric: paired AQUA-iso PPL (n>=60 chunks, prefix=1024, cont=1024) + +# live entropy-coded bpe (zstd-L22 + Shannon) for each config. +# Sub-bpe frontier: K4V2, K3V2, K2V2, K2V1, K1V2 all at pb=0. +# Honest framing: K1* configs expected to be catastrophic; that is a valid +# frontier datapoint, not a failure. NIAH cliff at <4-bit keys is expected. +# Weights: FP16 via NousResearch/Meta-Llama-3.1-8B-Instruct (ungated). +# No HF_TOKEN required: model is ungated. +# rope_scaling: Llama-3.1 uses rope_type=llama3 piecewise; loaded from +# model config via prepare_rope_scaling() and propagated to all +# inverse_rope/forward_rope calls. Never stripped. + +import sys, os, gc, math, time, json, traceback, subprocess + +os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True") + +print("Installing deps + nexusquant ...", flush=True) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "numpy<2"]) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-U", + "transformers>=5.5.3", "accelerate>=1.1.1", "datasets", "zstandard"]) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", + "git+https://github.com/jagmarques/nexusquant.git@main"]) + +import numpy as np +import torch +import torch.nn.functional as F +from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, DynamicCache +from datasets import load_dataset + +from nexusquant.core.e8_lattice import E8Lattice +from nexusquant.core.hadamard import hadamard_matrix +from nexusquant.core.rope_utils import inverse_rope, forward_rope, prepare_rope_scaling + +torch.manual_seed(42) + +# Model: ungated (NousResearch mirror, no HF_TOKEN). Primary choice; on load +# failure the except block in main() will print the error and exit loudly. +MODEL_ID = "NousResearch/Meta-Llama-3.1-8B-Instruct" +MODEL_NAME = "Llama-3.1-8B-Instruct" + +# rope_scaling for Llama-3.1: loaded from config via prepare_rope_scaling(). +# Do NOT hardcode; let the live config drive it to stay accurate. +ROPE_THETA = 500_000.0 # Llama-3.1 rope_theta (overridden from config if present) +ROPE_SCALING = None # populated after config load + +PREFIX_LEN = 1024 +CONT_LEN = 1024 +SEG_LEN = PREFIX_LEN + CONT_LEN +N_SEGS_TARGET = 161 +N_SEGS_MIN = 60 +LOGIT_CHUNK = 256 +CONT_CHUNK = 256 +MAX_MEMORY = {0: "11GiB", 1: "14GiB"} +OUT_PATH = "/kaggle/working/nq_llama31_subbpe.json" + +# Configs: (label, k_bits, v_bits, pb). +# K4V2 added because Llama-3.1 needs >=4-bit keys for stable NIAH; +# K1* configs are honest frontier datapoints (expected cliff). +CONFIGS = [ + ("FP16", 0, 0, 0), # baseline; bpe=16.0 + ("K4V2_pb0", 4, 2, 0), # bpe ~3.0 raw + ("K3V2_pb0", 3, 2, 0), # bpe ~2.5 raw + ("K2V2_pb0", 2, 2, 0), # bpe ~2.0 raw + ("K2V1_pb0", 2, 1, 0), # bpe ~1.5 raw + ("K1V2_pb0", 1, 2, 0), # bpe ~1.5 raw +] + +result = { + "model_name": MODEL_NAME, + "model_id": MODEL_ID, + "weights": "FP16 (NousResearch/Meta-Llama-3.1-8B-Instruct, ungated)", + "protocol": ( + "AQUA-iso paired-PPL: wikitext-2-raw-v1 test split sliced into " + f"{SEG_LEN}-token windows (prefix={PREFIX_LEN}, cont={CONT_LEN}). " + "Per window: prefill prefix ONCE (FP16 KV), score cont -> base PPL. " + "Then for each quant config re-quantize SAME cached prefix KV and " + "rescore SAME cont. Paired delta% = (quant_ppl - base_ppl)/base_ppl*100. " + "rope_scaling loaded from model config via prepare_rope_scaling(), " + "propagated to inverse_rope/forward_rope for bit-exact llama3-type " + "piecewise frequency scaling." + ), + "frontier_note": ( + "Sub-1.5bpe frontier on Llama-3.1-8B-Instruct. K1* configs expected " + "to be catastrophic (PPL cliff at <4-bit keys). Catastrophic PPL is a " + "valid frontier datapoint. NIAH viability requires >=4-bit keys on this " + "architecture. Lead with bpe<->PPL frontier chart." + ), + "prefix_len": PREFIX_LEN, + "cont_len": CONT_LEN, + "n_segs_target": N_SEGS_TARGET, + "n_segs_min": N_SEGS_MIN, + "run_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "configs_meta": [], + "ppl_results": {}, + "bpe_results": {}, + "frontier": [], + "base_fp16_ppl": None, + "n_segments": 0, + "model_config": {}, + "rope_scaling_loaded": None, + "gpu": None, + "transformers": None, + "errors": {}, +} + + +def save(): + with open(OUT_PATH, "w") as f: + json.dump(result, f, indent=2) + + +def free(): + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + +def to_dyn(c): + return DynamicCache.from_legacy_cache(c) if isinstance(c, tuple) else c + + +def get_kv(c, l): + # transformers >=5.12 DynamicCache may use .layers; <5.12 uses .key_cache. + if hasattr(c, "key_cache"): + return c.key_cache[l], c.value_cache[l] + return c.layers[l].keys, c.layers[l].values + + +def set_kv(c, l, k, v): + if hasattr(c, "key_cache"): + c.key_cache[l] = k + c.value_cache[l] = v + else: + c.layers[l].keys = k + c.layers[l].values = v + + +def n_layers_kv(c): + return len(c.layers) if hasattr(c, "layers") else len(c.key_cache) + + +def cache_len(c): + lens = [] + for l in range(n_layers_kv(c)): + kl, _ = get_kv(c, l) + if kl is not None: + lens.append(kl.shape[2]) + return max(lens) if lens else 0 + + +def clone_cache(kv): + new = DynamicCache() + nl = n_layers_kv(kv) + for l in range(nl): + k, v = get_kv(kv, l) + kk = k.detach().clone() if k is not None else None + vv = v.detach().clone() if v is not None else None + if hasattr(new, "key_cache"): + new.key_cache.append(kk) + new.value_cache.append(vv) + else: + new.update(kk, vv, l) + return new + + +def nq_quantize_kv(kv, kb, vb, rope_theta, rope_scaling): + """Hadamard + E8 per-head with rope_scaling propagated (llama3-type).""" + Hcache = {} + + def H(d): + p2 = 1 + while p2 < d: + p2 *= 2 + if p2 not in Hcache: + Hcache[p2] = hadamard_matrix(p2).cpu() + return Hcache[p2], p2 + + nl = n_layers_kv(kv) + quant_count = 0 + codeword_streams = {"K": [], "V": []} + + for l in range(nl): + kl, vl = get_kv(kv, l) + if kl is None: + continue + d_l = kl.shape[-1] + nk = kl.shape[1] + Hm, p2 = H(d_l) + h_pad = p2 - d_l + + for is_k, t_in, b_bits, stream_key in [ + (True, kl, kb, "K"), + (False, vl, vb, "V"), + ]: + t = t_in[0].float().cpu().clone() + lvl = 2 ** b_bits + layer_codes = [] + for h in range(nk): + if is_k: + # Remove RoPE with llama3-type piecewise scaling propagated. + th = inverse_rope(t[h:h+1], base=rope_theta, + rope_scaling=rope_scaling)[0] + else: + th = t[h] + pad_in = F.pad(th, (0, h_pad)) if h_pad > 0 else th + rot = pad_in @ Hm.T + amax = rot.abs().amax(dim=-1, keepdim=True).clamp(min=1e-8) + sc = amax / (lvl / 2) + norm = rot / sc + e8p = (8 - p2 % 8) % 8 + if e8p > 0: + norm = F.pad(norm, (0, e8p)) + grp = norm.reshape(-1, 8) + qnt = E8Lattice.nearest_point(grp).clamp(-lvl / 2, lvl / 2) + codes_np = qnt.to(torch.int8).flatten().cpu().numpy() + layer_codes.append(codes_np) + co = qnt.reshape(-1, norm.shape[-1]) + if e8p > 0: + co = co[..., :p2] + q = (co * sc @ Hm)[..., :d_l] + if is_k: + # Re-apply RoPE with llama3-type piecewise scaling. + t[h] = forward_rope(q.unsqueeze(0), base=rope_theta, + rope_scaling=rope_scaling)[0] + else: + t[h] = q + codeword_streams[stream_key].extend(layer_codes) + tdev = t_in.device + tdt = t_in.dtype + if is_k: + set_kv(kv, l, t.unsqueeze(0).to(dtype=tdt, device=tdev), vl) + else: + kn, _ = get_kv(kv, l) + set_kv(kv, l, kn, t.unsqueeze(0).to(dtype=tdt, device=tdev)) + quant_count += 1 + + return kv, quant_count, codeword_streams + + +def measure_bpe_live(codeword_streams, kb, vb, n_kv_elems): + """Compute live entropy-coded bpe: zstd-L22 + Shannon over codeword stream.""" + import zstandard + all_codes_k = np.concatenate(codeword_streams["K"]).astype(np.int8) + all_codes_v = np.concatenate(codeword_streams["V"]).astype(np.int8) + all_codes = np.concatenate([all_codes_k, all_codes_v]).astype(np.int8) + n_total = all_codes.size + + if n_total == 0: + return {"error": "empty codeword stream"} + + stream_bytes = all_codes.tobytes() + + cctx = zstandard.ZstdCompressor(level=22) + zstd_bytes = len(cctx.compress(stream_bytes)) + bpe_zstd = zstd_bytes * 8.0 / n_total + + vals, counts = np.unique(all_codes.astype(np.int32) + 128, return_counts=True) + p = counts / counts.sum() + bpe_shannon = float(-(p * np.log2(p + 1e-12)).sum()) + + # head_dim=128 for Llama-3.1-8B: scale overhead = 16/128 = 0.125 bpe. + bpe_nominal_raw = (kb + vb) / 2.0 + bpe_honest_raw = (kb + 0.125 + vb + 0.125) / 2.0 + + return { + "n_codewords": int(n_total), + "bpe_nominal_raw": round(bpe_nominal_raw, 4), + "bpe_honest_raw": round(bpe_honest_raw, 4), + "bpe_zstd_l22": round(bpe_zstd, 4), + "bpe_shannon": round(bpe_shannon, 4), + "zstd_bytes": int(zstd_bytes), + "raw_int8_bytes": int(len(stream_bytes)), + } + + +def score_continuation(model, kv, cont_t, device): + seg_nll, seg_tok = 0.0, 0 + cur = 0 + while cur < cont_t.shape[1] - 1: + end = min(cur + CONT_CHUNK, cont_t.shape[1]) + chunk = cont_t[:, cur:end] + past_len = (kv.get_seq_length() if hasattr(kv, "get_seq_length") else cache_len(kv)) + cache_pos = torch.arange(past_len, past_len + chunk.shape[1], device=device) + with torch.no_grad(): + out2 = model(chunk, past_key_values=kv, cache_position=cache_pos, use_cache=True) + logits = out2.logits[0, :-1, :] + kv = to_dyn(out2.past_key_values) + tgts = chunk[0, 1:] + m = tgts.shape[0] + pos = 0 + while pos < m: + e2 = min(pos + LOGIT_CHUNK, m) + lc = logits[pos:e2].float() + wsum = F.cross_entropy(lc, tgts[pos:e2].to(device), reduction="sum").item() + if math.isfinite(wsum): + seg_nll += wsum + seg_tok += (e2 - pos) + del lc + pos = e2 + del out2, logits + cur = end + free() + return seg_nll, seg_tok + + +def ppl_stats(deltas): + import statistics as st + n = len(deltas) + if n == 0: + return {"n": 0, "mean_delta_pct": None, "sem_pct": None, "z": None} + mean = sum(deltas) / n + sigma = st.stdev(deltas) if n > 1 else 0.0 + sem = sigma / math.sqrt(n) if n > 1 else 0.0 + z = (mean / sem) if sem > 0 else None + n_neg = sum(1 for d in deltas if d < 0) + return { + "n": n, + "mean_delta_pct": round(mean, 4), + "paired_sigma_pct": round(sigma, 4), + "sem_pct": round(sem, 4), + "z": round(z, 2) if z is not None else None, + "significant_at_2sigma": (abs(z) >= 2.0) if z is not None else False, + "n_negative_segments": n_neg, + } + + +def main(): + global ROPE_THETA, ROPE_SCALING + + import transformers + import zstandard # verify present before any GPU work + + gpu_info = None + if torch.cuda.is_available(): + p0 = torch.cuda.get_device_properties(0) + gpu_info = { + "name": p0.name, + "sm": f"{p0.major}{p0.minor}", + "n_gpus": torch.cuda.device_count(), + "total_gb": round(p0.total_memory / 1024**3, 1), + } + print(f"[start] GPU={gpu_info} transformers={transformers.__version__}", flush=True) + result["gpu"] = gpu_info + result["transformers"] = transformers.__version__ + save() + + # Load model config; read rope_theta and rope_scaling from it (never hardcode). + print(f"[config] resolving {MODEL_ID} ...", flush=True) + _cfg = AutoConfig.from_pretrained(MODEL_ID) + n_kv = getattr(_cfg, "num_key_value_heads", 8) + head_dim = (getattr(_cfg, "head_dim", None) + or _cfg.hidden_size // _cfg.num_attention_heads) + rope_theta_cfg = float(getattr(_cfg, "rope_theta", ROPE_THETA)) + rope_scaling_cfg = prepare_rope_scaling(_cfg) + n_layers = _cfg.num_hidden_layers + ROPE_THETA = rope_theta_cfg + ROPE_SCALING = rope_scaling_cfg + rs_type = (rope_scaling_cfg.get("rope_type") or rope_scaling_cfg.get("type") + if rope_scaling_cfg else None) + print(f" n_kv_heads={n_kv} head_dim={head_dim} rope_theta={rope_theta_cfg} " + f"rope_scaling_type={rs_type} n_layers={n_layers}", flush=True) + result["model_config"] = { + "n_kv_heads": n_kv, "head_dim": head_dim, + "rope_theta": rope_theta_cfg, "n_layers": n_layers, + "rope_scaling_type": rs_type, + } + result["rope_scaling_loaded"] = rope_scaling_cfg + save() + + # Configs metadata. + for label, kb, vb, pb in CONFIGS: + if label == "FP16": + meta = {"label": label, "kb": 0, "vb": 0, "pb": 0, + "bpe_nominal_raw": 16.0, "bpe_honest_raw": 16.0} + else: + meta = {"label": label, "kb": kb, "vb": vb, "pb": pb, + "bpe_nominal_raw": (kb + vb) / 2.0, + "bpe_honest_raw": (kb + 0.125 + vb + 0.125) / 2.0} + result["configs_meta"].append(meta) + save() + + # Load wikitext-2 test set. + print("[data] loading wikitext-2-raw test ...", flush=True) + tok = AutoTokenizer.from_pretrained(MODEL_ID) + ds = load_dataset("Salesforce/wikitext", "wikitext-2-raw-v1", split="test") + text = "\n\n".join(r["text"] for r in ds if r["text"].strip()) + bos_id = tok.bos_token_id + all_ids = tok(text, return_tensors="pt", add_special_tokens=False).input_ids[0] + n_possible = all_ids.shape[0] // SEG_LEN + n_use = min(N_SEGS_TARGET, n_possible) + if n_use < N_SEGS_MIN: + msg = (f"only {n_possible} non-overlapping {SEG_LEN}-token windows; " + f"need >= {N_SEGS_MIN}") + result["errors"]["window_count"] = msg + save() + sys.exit(f"FAIL: {msg}") + segs = [all_ids[i * SEG_LEN:(i + 1) * SEG_LEN] for i in range(n_use)] + segs = [s for s in segs if s.shape[0] == SEG_LEN] + print(f" {all_ids.shape[0]} tokens -> {n_possible} windows, " + f"using {len(segs)}", flush=True) + result["n_segments"] = len(segs) + save() + + # Load model FP16. + print(f"[model] loading {MODEL_ID} fp16 device_map=auto ...", flush=True) + t0 = time.time() + model = AutoModelForCausalLM.from_pretrained( + MODEL_ID, + torch_dtype=torch.float16, + device_map="auto", + max_memory=MAX_MEMORY, + attn_implementation="eager", + low_cpu_mem_usage=True, + ) + model.eval() + print(f" loaded in {time.time()-t0:.1f}s " + f"mem0={torch.cuda.memory_allocated(0)/1e9:.1f}GB " + f"mem1={torch.cuda.memory_allocated(1)/1e9:.1f}GB", flush=True) + device = next(model.parameters()).device + dmap = getattr(model, "hf_device_map", None) + result["device_map"] = ({str(k): str(v) for k, v in dmap.items()} + if dmap else None) + save() + + # PPL sweep. + base_nll_total, base_tok_total = 0.0, 0 + base_ppls = [] + quant_deltas = {lbl: [] for lbl, kb, vb, pb in CONFIGS if lbl != "FP16"} + bpe_codewords_collected = {lbl: None for lbl, kb, vb, pb in CONFIGS if lbl != "FP16"} + bpe_segment_done = False + diag_done = False + + for si, seg in enumerate(segs): + try: + prefix = seg[:PREFIX_LEN] + cont = seg[PREFIX_LEN:] + if bos_id is not None: + prefix = torch.cat([torch.tensor([bos_id], dtype=prefix.dtype), prefix]) + pre = prefix.unsqueeze(0).to(device) + cont_t = cont.unsqueeze(0).to(device) + if cont_t.shape[1] <= 1: + continue + + with torch.no_grad(): + out_pre = model(pre, use_cache=True) + base_kv_master = to_dyn(out_pre.past_key_values) + del out_pre + free() + + b_kv = clone_cache(base_kv_master) + b_nll, b_tok = score_continuation(model, b_kv, cont_t, device) + del b_kv + free() + if b_tok == 0: + del base_kv_master + free() + continue + base_seg_ppl = math.exp(b_nll / b_tok) + base_ppls.append(base_seg_ppl) + base_nll_total += b_nll + base_tok_total += b_tok + + if not diag_done: + print(f" [diag] prefix_with_bos={pre.shape[1]} cont={cont_t.shape[1]} " + f"b_tok={b_tok} base_seg_ppl={base_seg_ppl:.3f}", flush=True) + + for label, kb, vb, pb in CONFIGS: + if label == "FP16": + continue + qkv = clone_cache(base_kv_master) + qkv, qc, cw_streams = nq_quantize_kv( + qkv, kb, vb, ROPE_THETA, ROPE_SCALING) + if not diag_done: + print(f" [{label}] quantized {qc} layers", flush=True) + q_nll, q_tok = score_continuation(model, qkv, cont_t, device) + del qkv + free() + if q_tok == 0: + continue + q_seg_ppl = math.exp(q_nll / q_tok) + delta = 100.0 * (q_seg_ppl - base_seg_ppl) / base_seg_ppl + quant_deltas[label].append(delta) + if not bpe_segment_done and bpe_codewords_collected[label] is None: + n_kv_elems = (sum(c.size for c in cw_streams["K"]) + + sum(c.size for c in cw_streams["V"])) + bpe_codewords_collected[label] = (cw_streams, kb, vb, n_kv_elems) + + diag_done = True + if not bpe_segment_done: + bpe_segment_done = True + + del base_kv_master + free() + + if si % 10 == 0 or si == len(segs) - 1: + parts = [f"seg {si}: base_ppl={base_seg_ppl:.3f}"] + for lbl, *_ in CONFIGS: + if lbl == "FP16": + continue + dlist = quant_deltas[lbl] + last_d = (f"{dlist[-1]:.3f}%" if dlist else "n/a") + parts.append(f"{lbl}={last_d}") + print(" " + " ".join(parts), flush=True) + + if si % 20 == 0: + if base_tok_total > 0: + result["base_fp16_ppl"] = round( + math.exp(base_nll_total / base_tok_total), 4) + result["n_segments"] = len(base_ppls) + for lbl, *_ in CONFIGS: + if lbl == "FP16": + continue + result["ppl_results"][lbl] = ppl_stats(quant_deltas[lbl]) + save() + + except Exception: + traceback.print_exc() + result["errors"][f"seg_{si}"] = traceback.format_exc()[-600:] + free() + + # Final PPL stats. + if base_tok_total > 0: + result["base_fp16_ppl"] = round(math.exp(base_nll_total / base_tok_total), 4) + result["n_segments"] = len(base_ppls) + for label, kb, vb, pb in CONFIGS: + if label == "FP16": + continue + st_r = ppl_stats(quant_deltas[label]) + st_r["per_segment_delta_pct"] = quant_deltas[label] + result["ppl_results"][label] = st_r + save() + + # BPE measurement. + print("\n[phase] live bpe measurement ...", flush=True) + result["bpe_results"]["FP16"] = {"bpe_nominal_raw": 16.0, "bpe_honest_raw": 16.0, + "bpe_zstd_l22": 16.0, "bpe_shannon": 16.0} + for label, kb, vb, pb in CONFIGS: + if label == "FP16": + continue + collected = bpe_codewords_collected.get(label) + if collected is None: + result["bpe_results"][label] = {"error": "no codewords collected"} + continue + cw_streams, kb_, vb_, n_kv_elems = collected + try: + bpe_m = measure_bpe_live(cw_streams, kb_, vb_, n_kv_elems) + result["bpe_results"][label] = bpe_m + print(f" [{label}] bpe_honest_raw={bpe_m['bpe_honest_raw']:.3f} " + f"bpe_zstd={bpe_m['bpe_zstd_l22']:.3f} " + f"bpe_shannon={bpe_m['bpe_shannon']:.3f}", flush=True) + except Exception: + traceback.print_exc() + result["bpe_results"][label] = {"error": traceback.format_exc()[-300:]} + save() + + # Build frontier table. + print("\n[phase] building frontier table ...", flush=True) + frontier = [] + for label, kb, vb, pb in CONFIGS: + bpe_m = result["bpe_results"].get(label, {}) + ppl_m = result["ppl_results"].get(label, {}) + bpe_h = bpe_m.get("bpe_honest_raw", 16.0 if label == "FP16" else None) + bpe_z = bpe_m.get("bpe_zstd_l22") + bpe_s = bpe_m.get("bpe_shannon") + mean_d = ppl_m.get("mean_delta_pct") if label != "FP16" else 0.0 + sem_d = ppl_m.get("sem_pct") if label != "FP16" else 0.0 + row = { + "label": label, + "kb": kb, + "vb": vb, + "bpe_honest_raw": bpe_h, + "bpe_zstd_l22": bpe_z, + "bpe_shannon": bpe_s, + "mean_ppl_delta_pct": mean_d, + "sem_pct": sem_d, + } + frontier.append(row) + bpe_str = f"{bpe_h:.3f}" if bpe_h is not None else "n/a" + bpez_str = f"{bpe_z:.3f}" if bpe_z is not None else "n/a" + bpes_str = f"{bpe_s:.3f}" if bpe_s is not None else "n/a" + dmean_str = f"{mean_d:.4f}%" if mean_d is not None else "n/a" + dsem_str = f"{sem_d:.4f}%" if sem_d is not None else "n/a" + print(f" {label:12s} bpe_honest={bpe_str} " + f"bpe_zstd={bpez_str} bpe_shannon={bpes_str} " + f"ppl_delta={dmean_str} +/-{dsem_str}", flush=True) + result["frontier"] = frontier + result["run_complete_utc"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + + # SAVE JSON before any summary print (robustness requirement). + save() + + print("\n========== LLAMA-3.1-8B-INSTRUCT SUB-BPE FRONTIER ==========", flush=True) + print(json.dumps({ + k: v for k, v in result.items() + if k not in ("errors", "ppl_results") + }, indent=2), flush=True) + print(f"\n[done] -> {OUT_PATH} n_segs={result['n_segments']} " + f"base_fp16_ppl={result['base_fp16_ppl']}", flush=True) + + +if __name__ == "__main__": + main() diff --git a/experiments/kaggle/nq_mistral_niah_frontier/kernel-metadata.json b/experiments/kaggle/nq_mistral_niah_frontier/kernel-metadata.json new file mode 100644 index 0000000..6585ef2 --- /dev/null +++ b/experiments/kaggle/nq_mistral_niah_frontier/kernel-metadata.json @@ -0,0 +1,20 @@ +{ + "id": "jagmarques/nq-mistral-niah-frontier", + "title": "nq-mistral-niah-frontier", + "code_file": "nq_mistral_niah_frontier.py", + "language": "python", + "kernel_type": "script", + "is_private": true, + "enable_gpu": true, + "enable_tpu": false, + "enable_internet": true, + "keywords": [ + "gpu" + ], + "dataset_sources": [], + "kernel_sources": [], + "competition_sources": [], + "model_sources": [], + "docker_image": "gcr.io/kaggle-private-byod/python@sha256:00377cd1b3d470a605bc5b0ceca79969e369644e9b36802242a1c70e627372f9", + "machine_shape": "NvidiaTeslaT4" +} diff --git a/experiments/kaggle/nq_mistral_niah_frontier/nq_mistral_niah_frontier.py b/experiments/kaggle/nq_mistral_niah_frontier/nq_mistral_niah_frontier.py new file mode 100644 index 0000000..74fc6d8 --- /dev/null +++ b/experiments/kaggle/nq_mistral_niah_frontier/nq_mistral_niah_frontier.py @@ -0,0 +1,445 @@ +# NexusQuant NIAH frontier: Mistral-7B-Instruct-v0.3, Kaggle T4x2 (sm_75). +# Configs: FP16, K4V2, K3V2, K2V2, K2V1, K1V2 all at pb=0. +# Harness: chat-template NIAH, ctx=4K and 8K, 5 depths each. +# Gate: FP16 must score >=4/5 at 4K before quant configs run. +# Mistral-Inst-v0.3: 8 KV heads, head_dim=128, rope_theta=1e6. +# Weights: ungated unsloth/mistral-7b-instruct-v0.3 (no HF_TOKEN required). + +import sys, os, gc, json, math, time, re, random, traceback, subprocess + +os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True") + +print("Installing deps + nexusquant ...", flush=True) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "numpy<2"]) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-U", + "transformers>=5.5.3", "accelerate>=1.1.1"]) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", + "git+https://github.com/jagmarques/nexusquant.git@main"]) + +import torch +import torch.nn.functional as F +from transformers import AutoModelForCausalLM, AutoTokenizer, DynamicCache +from datasets import load_dataset + +from nexusquant.core.e8_lattice import E8Lattice +from nexusquant.core.hadamard import hadamard_matrix +from nexusquant.core.rope_utils import inverse_rope, forward_rope + +SEED = 42 +random.seed(SEED) +torch.manual_seed(SEED) + +MODEL_ID = "unsloth/mistral-7b-instruct-v0.3" +MODEL_NAME = "Mistral-7B-Instruct-v0.3" + +# Mistral-Inst-v0.3: 8 KV heads, head_dim=128, standard RoPE theta=1e6. +ROPE_THETA = 1_000_000.0 +ROPE_PRF = None # full rotary (not partial) + +CONTEXTS = [4096, 8192] # 4K gate first, then 8K +N_DEPTHS = 5 # 5 depth positions per context +MAX_NEW_TOKENS = 64 +MAX_MEMORY = {0: "11GiB", 1: "14GiB"} +OUT_PATH = "/kaggle/working/nq_mistral_niah_frontier.json" + +# Configs: (label, k_bits, v_bits, do_quant). +# Honest raw bpe = (k_bits + v_bits) / 2; FP16 bpe = 16.0 by definition. +# K1* configs are expected to cliff; that is a valid frontier datapoint. +CONFIGS = [ + ("FP16", 0, 0, False), + ("K4V2_pb0", 4, 2, True), + ("K3V2_pb0", 3, 2, True), + ("K2V2_pb0", 2, 2, True), + ("K2V1_pb0", 2, 1, True), + ("K1V2_pb0", 1, 2, True), +] + +# Honest bpe per config: raw (k+v)/2; FP16=16.0. +BPE_RAW = { + "FP16": 16.0, + "K4V2_pb0": (4 + 2) / 2.0, # 3.0 + "K3V2_pb0": (3 + 2) / 2.0, # 2.5 + "K2V2_pb0": (2 + 2) / 2.0, # 2.0 + "K2V1_pb0": (2 + 1) / 2.0, # 1.5 + "K1V2_pb0": (1 + 2) / 2.0, # 1.5 +} + +result = { + "model_name": MODEL_NAME, + "model_id": MODEL_ID, + "weights": "FP16 (unsloth/mistral-7b-instruct-v0.3, ungated)", + "harness": "chat-template NIAH, 5 depths per context, prefill+generate", + "contexts": CONTEXTS, + "n_depths": N_DEPTHS, + "bpe_raw": BPE_RAW, + "frontier_note": ( + "K1* configs expected to cliff. Catastrophic NIAH is a valid " + "frontier datapoint; label and report honestly." + ), + "run_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "niah_results": {}, + "fp16_gate": None, + "errors": {}, +} + + +def save(): + with open(OUT_PATH, "w") as f: + json.dump(result, f, indent=2) + + +def free(): + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + +def to_dyn(c): + return DynamicCache.from_legacy_cache(c) if isinstance(c, tuple) else c + + +def get_kv(c, l): + if hasattr(c, "key_cache"): + return c.key_cache[l], c.value_cache[l] + return c.layers[l].keys, c.layers[l].values + + +def set_kv(c, l, k, v): + if hasattr(c, "key_cache"): + c.key_cache[l] = k + c.value_cache[l] = v + else: + c.layers[l].keys = k + c.layers[l].values = v + + +def n_layers_kv(c): + return len(c.layers) if hasattr(c, "layers") else len(c.key_cache) + + +def cache_seq_len(c): + lens = [] + for l in range(n_layers_kv(c)): + kl, _ = get_kv(c, l) + if kl is not None: + lens.append(kl.shape[2]) + return max(lens) if lens else 0 + + +def quantize_kv(kv_cache, k_bits, v_bits): + """Hadamard + E8 VQ, all layers, no boundary protection (pb=0).""" + Hcache = {} + + def get_H(d): + p2 = 1 + while p2 < d: + p2 *= 2 + if p2 not in Hcache: + Hcache[p2] = hadamard_matrix(p2).cpu() + return Hcache[p2], p2 + + nl = n_layers_kv(kv_cache) + quant_count = 0 + for l in range(nl): + kl, vl = get_kv(kv_cache, l) + if kl is None: + continue + d_l = kl.shape[-1] + nk = kl.shape[1] + Hm, p2 = get_H(d_l) + h_pad = p2 - d_l + + for is_k, t_in, b_bits in [(True, kl, k_bits), (False, vl, v_bits)]: + t = t_in[0].float().cpu().clone() # (n_kv_heads, seq, head_dim) + if is_k: + # Remove RoPE: standard split-half, theta=1e6, no partial rotary. + t = inverse_rope(t, seq_offset=0, base=ROPE_THETA, + partial_rotary_factor=ROPE_PRF) + lvl = 2 ** b_bits + for h in range(nk): + th = t[h] + pad_in = F.pad(th, (0, h_pad)) if h_pad > 0 else th + rot = pad_in @ Hm.T + amax = rot.abs().amax(dim=-1, keepdim=True).clamp(min=1e-8) + sc = amax / (lvl / 2) + norm = rot / sc + e8p = (8 - p2 % 8) % 8 + if e8p > 0: + norm = F.pad(norm, (0, e8p)) + grp = norm.reshape(-1, 8) + qnt = E8Lattice.nearest_point(grp).clamp(-lvl / 2, lvl / 2) + co = qnt.reshape(-1, norm.shape[-1]) + if e8p > 0: + co = co[..., :p2] + t[h] = (co * sc @ Hm)[..., :d_l] + if is_k: + t = forward_rope(t, seq_offset=0, base=ROPE_THETA, + partial_rotary_factor=ROPE_PRF) + tdev = t_in.device + tdt = t_in.dtype + new_t = t.unsqueeze(0).to(dtype=tdt, device=tdev) + if is_k: + set_kv(kv_cache, l, new_t, vl) + else: + kn, _ = get_kv(kv_cache, l) + set_kv(kv_cache, l, kn, new_t) + quant_count += 1 + + return kv_cache, quant_count + + +def run_niah_ctx(model, tok, label, k_bits, v_bits, do_quant, + hay_ids, device, ctx, rng_base): + """Run NIAH at one context length with N_DEPTHS depth positions.""" + depths = [round((i + 1) / (N_DEPTHS + 1), 4) for i in range(N_DEPTHS)] + cells = [] + t0 = time.time() + + for di, depth in enumerate(depths): + kv = None + try: + rng = random.Random(SEED + di * 1000 + hash(label) % 1000) + + # 7-digit key, 3-digit value (avoids haystack substring collision). + target_key = str(rng.randint(1_000_000, 9_999_999)) + target_val = str(rng.randint(100, 999)) + needle_str = f"The value of {target_key} is {target_val}." + needle_ids = tok(needle_str, return_tensors="pt", + add_special_tokens=False).input_ids[0] + + question = f"What is the value of {target_key}?" + msgs = [{"role": "user", "content": question}] + try: + q_text = tok.apply_chat_template( + msgs, add_generation_prompt=True, tokenize=False) + except Exception: + q_text = question + "\n" + qids = tok(q_text, return_tensors="pt", + add_special_tokens=False).input_ids + bos = tok.bos_token_id + if bos is not None and qids.shape[1] > 0 and qids[0, 0].item() == bos: + qids = qids[:, 1:] + qids = qids.to(device) + + hay_budget = ctx - needle_ids.shape[0] - qids.shape[1] - 4 + if hay_budget <= 0: + cells.append({"depth": depth, "error": "context_too_small", + "recall": None}) + continue + + hay = hay_ids[:hay_budget] + cut = int(depth * hay.shape[0]) + prefix = torch.cat([hay[:cut], needle_ids, hay[cut:]]).unsqueeze(0).to(device) + + with torch.no_grad(): + out1 = model(prefix, use_cache=True) + kv = to_dyn(out1.past_key_values) + del out1 + free() + + if do_quant: + kv, qc = quantize_kv(kv, k_bits, v_bits) + if di == 0: + print(f" [{label}] ctx={ctx} quantized {qc} layers " + f"k_bits={k_bits} v_bits={v_bits}", flush=True) + + past_len = cache_seq_len(kv) + am = torch.ones(1, past_len + qids.shape[1], + dtype=torch.long, device=device) + with torch.no_grad(): + gen = model.generate( + qids, past_key_values=kv, attention_mask=am, + max_new_tokens=MAX_NEW_TOKENS, min_new_tokens=4, + do_sample=False, + pad_token_id=(tok.pad_token_id or tok.eos_token_id), + use_cache=True, + ) + gen_ids = gen[0, qids.shape[1]:] + ans = tok.decode(gen_ids, skip_special_tokens=True).strip() + al = ans.lower() + # Prefer "...is <3-digit>" to avoid collision with 7-digit key substrings. + m = re.search(r"\bis\s+(\d{3})\b", al) + if m is not None: + recall = (m.group(1) == target_val) + else: + recall = target_val in re.findall(r"\b\d{3}\b", al) + del gen + except Exception as e: + traceback.print_exc() + recall = None + ans = f"{type(e).__name__}: {str(e)[:120]}" + + tag = "YES" if recall is True else ("NO" if recall is False else "ERR") + print(f" [{label}] ctx={ctx} depth={depth:.2f} key={target_key} " + f"val={target_val} {tag} ans={ans[:60]!r}", flush=True) + cells.append({ + "depth": depth, + "target_key": target_key, + "target_value": target_val, + "answer": ans[:160], + "recall": recall, + }) + if kv is not None: + del kv + free() + + hits = sum(1 for c in cells if c.get("recall") is True) + elapsed = time.time() - t0 + # Guard None in bpe formatting. + bpe_val = BPE_RAW.get(label) + bpe_str = f"{bpe_val:.3f}" if bpe_val is not None else "n/a" + print(f" [{label}] ctx={ctx} hits={hits}/{N_DEPTHS} bpe_raw={bpe_str} " + f"elapsed={elapsed:.0f}s", flush=True) + return { + "config": label, + "ctx": ctx, + "hits": hits, + "n": N_DEPTHS, + "bpe_raw": bpe_val, + "elapsed_s": int(elapsed), + "cells": cells, + } + + +def main(): + import transformers + gpu_info = None + if torch.cuda.is_available(): + p = torch.cuda.get_device_properties(0) + gpu_info = { + "name": p.name, + "sm": f"{p.major}{p.minor}", + "n_gpus": torch.cuda.device_count(), + "total_gb": round(p.total_memory / 1024**3, 1), + } + print(f"[start] GPU={gpu_info} transformers={transformers.__version__} " + f"alloc_conf={os.environ.get('PYTORCH_CUDA_ALLOC_CONF')}", flush=True) + + result["gpu"] = gpu_info + result["transformers"] = transformers.__version__ + + print(f"[model] loading {MODEL_ID} fp16 ...", flush=True) + tok = AutoTokenizer.from_pretrained(MODEL_ID) + if tok.pad_token is None: + tok.pad_token = tok.eos_token + + t0 = time.time() + model = AutoModelForCausalLM.from_pretrained( + MODEL_ID, + torch_dtype=torch.float16, + device_map="auto", + max_memory=MAX_MEMORY, + attn_implementation="eager", + low_cpu_mem_usage=True, + ) + model.eval() + print(f" loaded in {time.time()-t0:.1f}s " + f"mem0={torch.cuda.memory_allocated(0)/1e9:.2f}GB " + f"mem1={torch.cuda.memory_allocated(1)/1e9:.2f}GB", flush=True) + + dmap = getattr(model, "hf_device_map", None) + result["device_map"] = ( + {str(k): str(v) for k, v in dmap.items()} if dmap else None) + device = next(model.parameters()).device + + cfg = getattr(model.config, "text_config", model.config) + n_kv = getattr(cfg, "num_key_value_heads", None) + hd = getattr(cfg, "head_dim", None) + theta = getattr(cfg, "rope_theta", None) + n_lay = getattr(cfg, "num_hidden_layers", None) + print(f"[config] n_kv_heads={n_kv} head_dim={hd} " + f"rope_theta={theta} n_layers={n_lay}", flush=True) + result["model_config"] = { + "n_kv_heads": n_kv, "head_dim": hd, + "rope_theta": theta, "n_layers": n_lay, + } + save() + + print("[data] loading wikitext-2 haystack ...", flush=True) + ds = load_dataset("Salesforce/wikitext", "wikitext-2-raw-v1", split="train") + hay_text = "\n\n".join(r["text"] for r in ds if r["text"].strip()) + hay_ids = tok(hay_text, return_tensors="pt", + truncation=True, max_length=80_000).input_ids[0] + print(f" haystack tokenized: {hay_ids.shape[0]} tokens", flush=True) + save() + + # ----------------------------------------------------------------------- + # Phase 1: FP16 gate at 4K. Fail loud if <4/5. + # ----------------------------------------------------------------------- + print("\n[phase 1] FP16 gate at ctx=4096 ...", flush=True) + fp16_4k_res = run_niah_ctx(model, tok, "FP16", 0, 0, False, + hay_ids, device, 4096, SEED) + result["niah_results"]["FP16_4k"] = fp16_4k_res + save() # JSON written BEFORE any summary or gate check + + fp16_4k_hits = fp16_4k_res.get("hits", 0) + gate_pass = (fp16_4k_hits is not None and fp16_4k_hits >= 4) + result["fp16_gate"] = { + "hits_4k": fp16_4k_hits, + "required": 4, + "pass": gate_pass, + } + save() + + if not gate_pass: + msg = (f"GATE FAIL: FP16 ctx=4K scored {fp16_4k_hits}/{N_DEPTHS} " + f"(required >=4). Harness is broken. Aborting quant configs.") + print(f"\n*** {msg} ***", flush=True) + result["errors"]["gate_fail"] = msg + save() + print(json.dumps(result, indent=2), flush=True) + raise SystemExit(1) + + print(f" FP16 gate PASS: {fp16_4k_hits}/{N_DEPTHS} at 4K. " + "Proceeding with full sweep.", flush=True) + + # ----------------------------------------------------------------------- + # Phase 2: Full sweep — all configs at ctx=4K, then ctx=8K. + # ----------------------------------------------------------------------- + print("\n[phase 2] Full NIAH sweep ...", flush=True) + for ctx in CONTEXTS: + for label, k_bits, v_bits, do_quant in CONFIGS: + key = f"{label}_{ctx // 1024}k" + # FP16 at 4K already done above — reuse. + if label == "FP16" and ctx == 4096: + result["niah_results"][key] = fp16_4k_res + save() + continue + + print(f"\n--- {label} ctx={ctx} ---", flush=True) + try: + res = run_niah_ctx(model, tok, label, k_bits, v_bits, do_quant, + hay_ids, device, ctx, SEED + ctx) + result["niah_results"][key] = res + except Exception: + traceback.print_exc() + tb = traceback.format_exc() + result["niah_results"][key] = {"error": tb[-400:]} + result["errors"][f"niah_{key}"] = tb[-400:] + save() # JSON written after every config + free() + + # ----------------------------------------------------------------------- + # Summary (printed AFTER all saves). + # ----------------------------------------------------------------------- + print("\n========== NIAH FRONTIER SUMMARY ==========", flush=True) + print(f" {'Config':<14} {'bpe_raw':>7} {'4K hits':>7} {'8K hits':>7}", flush=True) + for label, k_bits, v_bits, do_quant in CONFIGS: + bpe_val = BPE_RAW.get(label) + bpe_str = f"{bpe_val:.1f}" if bpe_val is not None else "n/a" + h4k_res = result["niah_results"].get(f"{label}_4k", {}) + h8k_res = result["niah_results"].get(f"{label}_8k", {}) + h4k = h4k_res.get("hits") + h8k = h8k_res.get("hits") + h4k_str = f"{h4k}/{N_DEPTHS}" if h4k is not None else "err" + h8k_str = f"{h8k}/{N_DEPTHS}" if h8k is not None else "err" + print(f" {label:<14} {bpe_str:>7} {h4k_str:>7} {h8k_str:>7}", flush=True) + + result["run_complete_utc"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + save() + print(f"\n[done] -> {OUT_PATH}", flush=True) + print(json.dumps(result, indent=2), flush=True) + + +if __name__ == "__main__": + main() diff --git a/experiments/kaggle/nq_mistral_subbpe/kernel-metadata.json b/experiments/kaggle/nq_mistral_subbpe/kernel-metadata.json new file mode 100644 index 0000000..3f2dfac --- /dev/null +++ b/experiments/kaggle/nq_mistral_subbpe/kernel-metadata.json @@ -0,0 +1,20 @@ +{ + "id": "jagmardrop/nq-mistral-subbpe", + "title": "nq-mistral-subbpe", + "code_file": "nq_mistral_subbpe.py", + "language": "python", + "kernel_type": "script", + "is_private": true, + "enable_gpu": true, + "enable_tpu": false, + "enable_internet": true, + "keywords": [ + "gpu" + ], + "dataset_sources": [], + "kernel_sources": [], + "competition_sources": [], + "model_sources": [], + "docker_image": "gcr.io/kaggle-private-byod/python@sha256:00377cd1b3d470a605bc5b0ceca79969e369644e9b36802242a1c70e627372f9", + "machine_shape": "NvidiaTeslaT4" +} diff --git a/experiments/kaggle/nq_mistral_subbpe/nq_mistral_subbpe.py b/experiments/kaggle/nq_mistral_subbpe/nq_mistral_subbpe.py new file mode 100644 index 0000000..2351b8b --- /dev/null +++ b/experiments/kaggle/nq_mistral_subbpe/nq_mistral_subbpe.py @@ -0,0 +1,597 @@ +# NexusQuant Mistral-7B-Instruct-v0.3 sub-1.5bpe frontier sweep. +# Metric: paired AQUA-iso PPL (n>=60 chunks, prefix=1024, cont=1024) + +# live entropy-coded bpe (zstd-L22 + Shannon) for each config. +# Sub-bpe frontier: K3V2, K2V2, K2V1, K1V2, K1V1 all at pb=0. +# Honest framing: K1* configs expected to be catastrophic; that is a valid +# frontier datapoint, not a failure. Lead with bpe<->PPL frontier. +# Weights: FP16 (no NF4) via ungated unsloth/mistral-7b-instruct-v0.3. +# No HF_TOKEN required: model is ungated. + +import sys, os, gc, math, time, json, traceback, subprocess + +os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True") + +print("Installing deps + nexusquant ...", flush=True) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "numpy<2"]) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-U", + "transformers>=5.5.3", "accelerate>=1.1.1", "datasets", "zstandard"]) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", + "git+https://github.com/jagmarques/nexusquant.git@main"]) + +import numpy as np +import torch +import torch.nn.functional as F +from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, DynamicCache +from datasets import load_dataset + +from nexusquant.core.e8_lattice import E8Lattice +from nexusquant.core.hadamard import hadamard_matrix +from nexusquant.core.rope_utils import inverse_rope, forward_rope + +torch.manual_seed(42) + +# Model: ungated mirror; no HF_TOKEN required. +MODEL_ID = "unsloth/mistral-7b-instruct-v0.3" +MODEL_NAME = "Mistral-7B-Instruct-v0.3" + +# Mistral-Inst-v0.3: 8 KV heads, head_dim=128, rope_theta=1e6, standard RoPE. +ROPE_THETA = 1_000_000.0 +ROPE_SCALING = None + +PREFIX_LEN = 1024 +CONT_LEN = 1024 +SEG_LEN = PREFIX_LEN + CONT_LEN +N_SEGS_TARGET = 161 +N_SEGS_MIN = 60 +LOGIT_CHUNK = 256 +CONT_CHUNK = 256 +MAX_MEMORY = {0: "11GiB", 1: "14GiB"} +OUT_PATH = "/kaggle/working/nq_mistral_subbpe.json" + +# Configs: (label, k_bits, v_bits, pb). +# Theoretical raw bpe (before entropy coding) = (kb + vb) / 2. +# Entropy-coded bpe measured live below for honest reporting. +# FP16 bpe = 16.0 by definition. +CONFIGS = [ + ("FP16", 0, 0, 0), # baseline; bpe=16.0 + ("K3V2_pb0", 3, 2, 0), # bpe ~2.5 raw + ("K2V2_pb0", 2, 2, 0), # bpe ~2.0 raw + ("K2V1_pb0", 2, 1, 0), # bpe ~1.5 raw + ("K1V2_pb0", 1, 2, 0), # bpe ~1.5 raw + ("K1V1_pb0", 1, 1, 0), # bpe ~1.0 raw +] + +result = { + "model_name": MODEL_NAME, + "model_id": MODEL_ID, + "weights": "FP16 (unsloth/mistral-7b-instruct-v0.3, ungated)", + "protocol": ( + "AQUA-iso paired-PPL: wikitext-2-raw-v1 test split sliced into " + f"{SEG_LEN}-token windows (prefix={PREFIX_LEN}, cont={CONT_LEN}). " + "Per window: prefill prefix ONCE (FP16 KV), score cont -> base PPL. " + "Then for each quant config re-quantize SAME cached prefix KV and " + "rescore SAME cont. Paired delta% = (quant_ppl - base_ppl)/base_ppl*100." + ), + "frontier_note": ( + "This kernel maps the sub-1.5bpe quality frontier on Mistral-Inst-v0.3. " + "K1* configs are expected to be catastrophic; catastrophic PPL is a valid " + "frontier datapoint, not a failure. Lead with bpe<->PPL frontier." + ), + "prefix_len": PREFIX_LEN, + "cont_len": CONT_LEN, + "n_segs_target": N_SEGS_TARGET, + "n_segs_min": N_SEGS_MIN, + "run_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "configs_meta": [], + "ppl_results": {}, + "bpe_results": {}, + "frontier": [], + "base_fp16_ppl": None, + "n_segments": 0, + "model_config": {}, + "gpu": None, + "transformers": None, + "errors": {}, +} + + +def save(): + with open(OUT_PATH, "w") as f: + json.dump(result, f, indent=2) + + +def free(): + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + +def to_dyn(c): + return DynamicCache.from_legacy_cache(c) if isinstance(c, tuple) else c + + +def get_kv(c, l): + if hasattr(c, "key_cache"): + return c.key_cache[l], c.value_cache[l] + return c.layers[l].keys, c.layers[l].values + + +def set_kv(c, l, k, v): + if hasattr(c, "key_cache"): + c.key_cache[l] = k + c.value_cache[l] = v + else: + c.layers[l].keys = k + c.layers[l].values = v + + +def n_layers_kv(c): + return len(c.layers) if hasattr(c, "layers") else len(c.key_cache) + + +def cache_len(c): + lens = [] + for l in range(n_layers_kv(c)): + kl, _ = get_kv(c, l) + if kl is not None: + lens.append(kl.shape[2]) + return max(lens) if lens else 0 + + +def clone_cache(kv): + new = DynamicCache() + nl = n_layers_kv(kv) + for l in range(nl): + k, v = get_kv(kv, l) + kk = k.detach().clone() if k is not None else None + vv = v.detach().clone() if v is not None else None + if hasattr(new, "key_cache"): + new.key_cache.append(kk) + new.value_cache.append(vv) + else: + new.update(kk, vv, l) + return new + + +def nq_quantize_kv(kv, kb, vb): + """Hadamard + E8 per-head, standard RoPE (no pb: all layers quantized).""" + Hcache = {} + + def H(d): + p2 = 1 + while p2 < d: + p2 *= 2 + if p2 not in Hcache: + Hcache[p2] = hadamard_matrix(p2).cpu() + return Hcache[p2], p2 + + nl = n_layers_kv(kv) + quant_count = 0 + codeword_streams = {"K": [], "V": []} # collect for bpe measurement + + for l in range(nl): + kl, vl = get_kv(kv, l) + if kl is None: + continue + d_l = kl.shape[-1] + nk = kl.shape[1] + Hm, p2 = H(d_l) + h_pad = p2 - d_l + + for is_k, t_in, b_bits, stream_key in [ + (True, kl, kb, "K"), + (False, vl, vb, "V"), + ]: + t = t_in[0].float().cpu().clone() + lvl = 2 ** b_bits + layer_codes = [] + for h in range(nk): + if is_k: + th = inverse_rope(t[h:h+1], base=ROPE_THETA, + rope_scaling=ROPE_SCALING)[0] + else: + th = t[h] + pad_in = F.pad(th, (0, h_pad)) if h_pad > 0 else th + rot = pad_in @ Hm.T + amax = rot.abs().amax(dim=-1, keepdim=True).clamp(min=1e-8) + sc = amax / (lvl / 2) + norm = rot / sc + e8p = (8 - p2 % 8) % 8 + if e8p > 0: + norm = F.pad(norm, (0, e8p)) + grp = norm.reshape(-1, 8) + qnt = E8Lattice.nearest_point(grp).clamp(-lvl / 2, lvl / 2) + codes_np = qnt.to(torch.int8).flatten().cpu().numpy() + layer_codes.append(codes_np) + co = qnt.reshape(-1, norm.shape[-1]) + if e8p > 0: + co = co[..., :p2] + q = (co * sc @ Hm)[..., :d_l] + if is_k: + t[h] = forward_rope(q.unsqueeze(0), base=ROPE_THETA, + rope_scaling=ROPE_SCALING)[0] + else: + t[h] = q + codeword_streams[stream_key].extend(layer_codes) + tdev = t_in.device + tdt = t_in.dtype + if is_k: + set_kv(kv, l, t.unsqueeze(0).to(dtype=tdt, device=tdev), vl) + else: + kn, _ = get_kv(kv, l) + set_kv(kv, l, kn, t.unsqueeze(0).to(dtype=tdt, device=tdev)) + quant_count += 1 + + return kv, quant_count, codeword_streams + + +def measure_bpe_live(codeword_streams, kb, vb, n_kv_elems): + """Compute live entropy-coded bpe: zstd-L22 + Shannon over the codeword stream.""" + import zstandard + all_codes_k = np.concatenate(codeword_streams["K"]).astype(np.int8) + all_codes_v = np.concatenate(codeword_streams["V"]).astype(np.int8) + all_codes = np.concatenate([all_codes_k, all_codes_v]).astype(np.int8) + n_total = all_codes.size + + if n_total == 0: + return {"error": "empty codeword stream"} + + # Raw byte stream for zstd. + stream_bytes = all_codes.tobytes() + + # zstd-L22 bpe. + cctx = zstandard.ZstdCompressor(level=22) + zstd_bytes = len(cctx.compress(stream_bytes)) + bpe_zstd = zstd_bytes * 8.0 / n_total + + # Shannon entropy bpe. + vals, counts = np.unique(all_codes.astype(np.int32) + 128, return_counts=True) + p = counts / counts.sum() + bpe_shannon = float(-(p * np.log2(p + 1e-12)).sum()) + + # Nominal raw bpe = (kb + vb) / 2 (no scales/indices overhead). + # Honest bpe (scales as fp16 one per head per token): computed from n_kv_elems. + # Scale overhead: one fp16 (2 bytes = 16 bits) per (seq_len * n_kv_heads) group of head_dim elems. + # Each E8 group is 8 elements so scales_per_elem = 1 / head_dim (one per head per position). + # Honest bpe includes the scale bits amortized over elements. + # head_dim=128 => each scale covers 128 elems => scale bits per elem = 16/128 = 0.125 + # bpe_honest = (kb_or_vb + 0.125) averaged over K and V. + bpe_nominal_raw = (kb + vb) / 2.0 + bpe_honest_raw = (kb + 0.125 + vb + 0.125) / 2.0 + + return { + "n_codewords": int(n_total), + "bpe_nominal_raw": round(bpe_nominal_raw, 4), + "bpe_honest_raw": round(bpe_honest_raw, 4), + "bpe_zstd_l22": round(bpe_zstd, 4), + "bpe_shannon": round(bpe_shannon, 4), + "zstd_bytes": int(zstd_bytes), + "raw_int8_bytes": int(len(stream_bytes)), + } + + +def score_continuation(model, kv, cont_t, device): + seg_nll, seg_tok = 0.0, 0 + cur = 0 + while cur < cont_t.shape[1] - 1: + end = min(cur + CONT_CHUNK, cont_t.shape[1]) + chunk = cont_t[:, cur:end] + past_len = (kv.get_seq_length() if hasattr(kv, "get_seq_length") else cache_len(kv)) + cache_pos = torch.arange(past_len, past_len + chunk.shape[1], device=device) + with torch.no_grad(): + out2 = model(chunk, past_key_values=kv, cache_position=cache_pos, use_cache=True) + logits = out2.logits[0, :-1, :] + kv = to_dyn(out2.past_key_values) + tgts = chunk[0, 1:] + m = tgts.shape[0] + pos = 0 + while pos < m: + e2 = min(pos + LOGIT_CHUNK, m) + lc = logits[pos:e2].float() + wsum = F.cross_entropy(lc, tgts[pos:e2].to(device), reduction="sum").item() + if math.isfinite(wsum): + seg_nll += wsum + seg_tok += (e2 - pos) + del lc + pos = e2 + del out2, logits + cur = end + free() + return seg_nll, seg_tok + + +def ppl_stats(deltas): + import statistics as st + n = len(deltas) + if n == 0: + return {"n": 0, "mean_delta_pct": None, "sem_pct": None, "z": None} + mean = sum(deltas) / n + sigma = st.stdev(deltas) if n > 1 else 0.0 + sem = sigma / math.sqrt(n) if n > 1 else 0.0 + z = (mean / sem) if sem > 0 else None + n_neg = sum(1 for d in deltas if d < 0) + return { + "n": n, + "mean_delta_pct": round(mean, 4), + "paired_sigma_pct": round(sigma, 4), + "sem_pct": round(sem, 4), + "z": round(z, 2) if z is not None else None, + "significant_at_2sigma": (abs(z) >= 2.0) if z is not None else False, + "n_negative_segments": n_neg, + } + + +def main(): + import transformers + import zstandard # verify present before any GPU work + + gpu_info = None + if torch.cuda.is_available(): + p0 = torch.cuda.get_device_properties(0) + gpu_info = { + "name": p0.name, + "sm": f"{p0.major}{p0.minor}", + "n_gpus": torch.cuda.device_count(), + "total_gb": round(p0.total_memory / 1024**3, 1), + } + print(f"[start] GPU={gpu_info} transformers={transformers.__version__}", flush=True) + result["gpu"] = gpu_info + result["transformers"] = transformers.__version__ + save() + + # Populate configs metadata. + for label, kb, vb, pb in CONFIGS: + if label == "FP16": + meta = {"label": label, "kb": 0, "vb": 0, "pb": 0, + "bpe_nominal_raw": 16.0, "bpe_honest_raw": 16.0} + else: + meta = {"label": label, "kb": kb, "vb": vb, "pb": pb, + "bpe_nominal_raw": (kb + vb) / 2.0, + "bpe_honest_raw": (kb + 0.125 + vb + 0.125) / 2.0} + result["configs_meta"].append(meta) + save() + + # Resolve model config. + print(f"[config] resolving {MODEL_ID} ...", flush=True) + _cfg = AutoConfig.from_pretrained(MODEL_ID) + n_kv = getattr(_cfg, "num_key_value_heads", 8) + head_dim = (getattr(_cfg, "head_dim", None) + or _cfg.hidden_size // _cfg.num_attention_heads) + rope_theta = float(getattr(_cfg, "rope_theta", ROPE_THETA)) + n_layers = _cfg.num_hidden_layers + print(f" n_kv_heads={n_kv} head_dim={head_dim} rope_theta={rope_theta} " + f"n_layers={n_layers}", flush=True) + result["model_config"] = { + "n_kv_heads": n_kv, "head_dim": head_dim, + "rope_theta": rope_theta, "n_layers": n_layers, + } + save() + + # Load wikitext-2 test set. + print("[data] loading wikitext-2-raw test ...", flush=True) + tok = AutoTokenizer.from_pretrained(MODEL_ID) + ds = load_dataset("Salesforce/wikitext", "wikitext-2-raw-v1", split="test") + text = "\n\n".join(r["text"] for r in ds if r["text"].strip()) + bos_id = tok.bos_token_id + all_ids = tok(text, return_tensors="pt", add_special_tokens=False).input_ids[0] + n_possible = all_ids.shape[0] // SEG_LEN + n_use = min(N_SEGS_TARGET, n_possible) + if n_use < N_SEGS_MIN: + msg = (f"only {n_possible} non-overlapping {SEG_LEN}-token windows; " + f"need >= {N_SEGS_MIN}") + result["errors"]["window_count"] = msg + save() + sys.exit(f"FAIL: {msg}") + segs = [all_ids[i * SEG_LEN:(i + 1) * SEG_LEN] for i in range(n_use)] + segs = [s for s in segs if s.shape[0] == SEG_LEN] + print(f" {all_ids.shape[0]} tokens -> {n_possible} windows possible, " + f"using {len(segs)}", flush=True) + result["n_segments"] = len(segs) + save() + + # Load model FP16. + print(f"[model] loading {MODEL_ID} fp16 device_map=auto ...", flush=True) + t0 = time.time() + model = AutoModelForCausalLM.from_pretrained( + MODEL_ID, + torch_dtype=torch.float16, + device_map="auto", + max_memory=MAX_MEMORY, + attn_implementation="eager", + low_cpu_mem_usage=True, + ) + model.eval() + print(f" loaded in {time.time()-t0:.1f}s " + f"mem0={torch.cuda.memory_allocated(0)/1e9:.1f}GB " + f"mem1={torch.cuda.memory_allocated(1)/1e9:.1f}GB", flush=True) + device = next(model.parameters()).device + dmap = getattr(model, "hf_device_map", None) + result["device_map"] = ({str(k): str(v) for k, v in dmap.items()} + if dmap else None) + save() + + # PPL sweep: per segment, prefill once, score all quant configs. + base_nll_total, base_tok_total = 0.0, 0 + base_ppls = [] + quant_deltas = {lbl: [] for lbl, kb, vb, pb in CONFIGS if lbl != "FP16"} + # Collect bpe codewords from FIRST segment only (fast, representative). + bpe_codewords_collected = {lbl: None for lbl, kb, vb, pb in CONFIGS if lbl != "FP16"} + bpe_segment_done = False + + diag_done = False + for si, seg in enumerate(segs): + try: + prefix = seg[:PREFIX_LEN] + cont = seg[PREFIX_LEN:] + if bos_id is not None: + prefix = torch.cat([torch.tensor([bos_id], dtype=prefix.dtype), prefix]) + pre = prefix.unsqueeze(0).to(device) + cont_t = cont.unsqueeze(0).to(device) + if cont_t.shape[1] <= 1: + continue + + with torch.no_grad(): + out_pre = model(pre, use_cache=True) + base_kv_master = to_dyn(out_pre.past_key_values) + del out_pre + free() + + # Score baseline continuation. + b_kv = clone_cache(base_kv_master) + b_nll, b_tok = score_continuation(model, b_kv, cont_t, device) + del b_kv + free() + if b_tok == 0: + del base_kv_master + free() + continue + base_seg_ppl = math.exp(b_nll / b_tok) + base_ppls.append(base_seg_ppl) + base_nll_total += b_nll + base_tok_total += b_tok + + if not diag_done: + print(f" [diag] prefix_with_bos={pre.shape[1]} cont={cont_t.shape[1]} " + f"b_tok={b_tok} base_seg_ppl={base_seg_ppl:.3f}", flush=True) + + # Score each quant config. + for label, kb, vb, pb in CONFIGS: + if label == "FP16": + continue + qkv = clone_cache(base_kv_master) + qkv, qc, cw_streams = nq_quantize_kv(qkv, kb, vb) + if not diag_done: + print(f" [{label}] quantized {qc} layers", flush=True) + q_nll, q_tok = score_continuation(model, qkv, cont_t, device) + del qkv + free() + if q_tok == 0: + continue + q_seg_ppl = math.exp(q_nll / q_tok) + delta = 100.0 * (q_seg_ppl - base_seg_ppl) / base_seg_ppl + quant_deltas[label].append(delta) + # Collect bpe codewords from first segment. + if not bpe_segment_done and bpe_codewords_collected[label] is None: + n_kv_elems = (sum(c.size for c in cw_streams["K"]) + + sum(c.size for c in cw_streams["V"])) + bpe_codewords_collected[label] = (cw_streams, kb, vb, n_kv_elems) + + diag_done = True + if not bpe_segment_done: + bpe_segment_done = True + + del base_kv_master + free() + + if si % 10 == 0 or si == len(segs) - 1: + parts = [f"seg {si}: base_ppl={base_seg_ppl:.3f}"] + for lbl, *_ in CONFIGS: + if lbl == "FP16": + continue + dlist = quant_deltas[lbl] + # None-guard: only format if list is non-empty. + last_d = (f"{dlist[-1]:.3f}%" if dlist else "n/a") + parts.append(f"{lbl}={last_d}") + print(" " + " ".join(parts), flush=True) + + if si % 20 == 0: + if base_tok_total > 0: + result["base_fp16_ppl"] = round( + math.exp(base_nll_total / base_tok_total), 4) + result["n_segments"] = len(base_ppls) + for lbl, *_ in CONFIGS: + if lbl == "FP16": + continue + result["ppl_results"][lbl] = ppl_stats(quant_deltas[lbl]) + save() + + except Exception: + traceback.print_exc() + result["errors"][f"seg_{si}"] = traceback.format_exc()[-600:] + free() + + # Final PPL stats. + if base_tok_total > 0: + result["base_fp16_ppl"] = round(math.exp(base_nll_total / base_tok_total), 4) + result["n_segments"] = len(base_ppls) + for label, kb, vb, pb in CONFIGS: + if label == "FP16": + continue + st = ppl_stats(quant_deltas[label]) + st["per_segment_delta_pct"] = quant_deltas[label] + result["ppl_results"][label] = st + save() + + # BPE measurement from collected codewords (first segment). + print("\n[phase] live bpe measurement ...", flush=True) + # FP16 bpe is always 16.0 by definition. + result["bpe_results"]["FP16"] = {"bpe_nominal_raw": 16.0, "bpe_honest_raw": 16.0, + "bpe_zstd_l22": 16.0, "bpe_shannon": 16.0} + for label, kb, vb, pb in CONFIGS: + if label == "FP16": + continue + collected = bpe_codewords_collected.get(label) + if collected is None: + result["bpe_results"][label] = {"error": "no codewords collected"} + continue + cw_streams, kb_, vb_, n_kv_elems = collected + try: + bpe_m = measure_bpe_live(cw_streams, kb_, vb_, n_kv_elems) + result["bpe_results"][label] = bpe_m + print(f" [{label}] bpe_honest_raw={bpe_m['bpe_honest_raw']:.3f} " + f"bpe_zstd={bpe_m['bpe_zstd_l22']:.3f} " + f"bpe_shannon={bpe_m['bpe_shannon']:.3f}", flush=True) + except Exception: + traceback.print_exc() + result["bpe_results"][label] = {"error": traceback.format_exc()[-300:]} + save() + + # Build frontier table: bpe (honest_raw) vs mean PPL delta. + print("\n[phase] building frontier table ...", flush=True) + frontier = [] + for label, kb, vb, pb in CONFIGS: + bpe_m = result["bpe_results"].get(label, {}) + ppl_m = result["ppl_results"].get(label, {}) + bpe_h = bpe_m.get("bpe_honest_raw", 16.0) + bpe_z = bpe_m.get("bpe_zstd_l22") + bpe_s = bpe_m.get("bpe_shannon") + mean_d = ppl_m.get("mean_delta_pct") if label != "FP16" else 0.0 + sem_d = ppl_m.get("sem_pct") if label != "FP16" else 0.0 + # None-safe formatting: only emit if bpe_h is numeric. + row = { + "label": label, + "kb": kb, + "vb": vb, + "bpe_honest_raw": bpe_h, + "bpe_zstd_l22": bpe_z, + "bpe_shannon": bpe_s, + "mean_ppl_delta_pct": mean_d, + "sem_pct": sem_d, + } + frontier.append(row) + bpe_str = f"{bpe_h:.3f}" if bpe_h is not None else "n/a" + bpez_str = f"{bpe_z:.3f}" if bpe_z is not None else "n/a" + bpes_str = f"{bpe_s:.3f}" if bpe_s is not None else "n/a" + dmean_str = f"{mean_d:.4f}%" if mean_d is not None else "n/a" + dsem_str = f"{sem_d:.4f}%" if sem_d is not None else "n/a" + print(f" {label:12s} bpe_honest={bpe_str} " + f"bpe_zstd={bpez_str} bpe_shannon={bpes_str} " + f"ppl_delta={dmean_str} +/-{dsem_str}", flush=True) + result["frontier"] = frontier + result["run_complete_utc"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + + # SAVE JSON before any summary print (robustness requirement). + save() + + # Summary print after save. + print("\n========== MISTRAL SUB-BPE FRONTIER ==========", flush=True) + print(json.dumps({ + k: v for k, v in result.items() + if k not in ("errors", "ppl_results") + }, indent=2), flush=True) + print(f"\n[done] -> {OUT_PATH} n_segs={result['n_segments']} " + f"base_fp16_ppl={result['base_fp16_ppl']}", flush=True) + + +if __name__ == "__main__": + main() diff --git a/experiments/kaggle/nq_qwen3_subbpe_entropy/kernel-metadata.json b/experiments/kaggle/nq_qwen3_subbpe_entropy/kernel-metadata.json new file mode 100644 index 0000000..c7287c6 --- /dev/null +++ b/experiments/kaggle/nq_qwen3_subbpe_entropy/kernel-metadata.json @@ -0,0 +1,20 @@ +{ + "id": "jagmarques/nq-qwen3-subbpe-entropy", + "title": "nq-qwen3-subbpe-entropy", + "code_file": "nq_qwen3_subbpe_entropy.py", + "language": "python", + "kernel_type": "script", + "is_private": true, + "enable_gpu": true, + "enable_tpu": false, + "enable_internet": true, + "keywords": [ + "gpu" + ], + "dataset_sources": [], + "kernel_sources": [], + "competition_sources": [], + "model_sources": [], + "docker_image": "gcr.io/kaggle-private-byod/python@sha256:00377cd1b3d470a605bc5b0ceca79969e369644e9b36802242a1c70e627372f9", + "machine_shape": "NvidiaTeslaT4" +} diff --git a/experiments/kaggle/nq_qwen3_subbpe_entropy/nq_qwen3_subbpe_entropy.py b/experiments/kaggle/nq_qwen3_subbpe_entropy/nq_qwen3_subbpe_entropy.py new file mode 100644 index 0000000..3c4bd24 --- /dev/null +++ b/experiments/kaggle/nq_qwen3_subbpe_entropy/nq_qwen3_subbpe_entropy.py @@ -0,0 +1,611 @@ +# NexusQuant Qwen3-8B sub-bpe frontier + entropy-coded bpe table, Kaggle T4x2 (sm_75). +# Adds Qwen3-8B as a new architecture row to both the sub-1.5bpe frontier table +# and the entropy-coded bpe cross-arch table. +# +# Model: Qwen/Qwen3-8B (ungated; no HF token required). +# Weights: NF4 (bitsandbytes load_in_4bit) -- 8B fp16 fills T4x2; NF4 keeps it safe. +# Baseline: NF4-weights PPL. Per-segment KV-quant deltas are paired on identical tokens +# so the NF4 weight floor cancels in the delta. +# +# Protocol: AQUA-iso, n>=60 segments, prefix=1024, continuation=512. +# Configs: FP16(NF4-weights), K3V2_pb0, K3V2_pb1, K2V2_pb0, K2V2_pb1. +# For each quant config: raw bpw, live entropy-coded bpe (zstd-L22 + Shannon over +# the real quantized codeword stream), and paired PPL delta (mean/SEM/z). +# +# E8 quant: E8Lattice.quantize_perhead (per-head amax) or manual Hadamard+nearest_point. +# DynamicCache: use cache.layers[i].keys/.values if .key_cache absent (transformers 5.12). +# JSON saved to disk BEFORE any summary print. All None numbers guarded before formatting. + +import sys, os, math, time, gc, json, traceback, subprocess + +os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True") + +print("Installing transformers/accelerate/bitsandbytes + nexusquant ...", flush=True) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "numpy<2"]) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-U", + "transformers>=5.5.3", "accelerate>=1.1.1", "bitsandbytes>=0.43.0"]) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", + "git+https://github.com/jagmarques/nexusquant.git@main"]) +subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "zstandard"]) + +import numpy as np +import torch +import torch.nn.functional as F +import zstandard +from transformers import (AutoModelForCausalLM, AutoTokenizer, AutoConfig, + BitsAndBytesConfig, DynamicCache) +from datasets import load_dataset + +from nexusquant.core.e8_lattice import E8Lattice +from nexusquant.core.hadamard import hadamard_matrix +from nexusquant.core.rope_utils import inverse_rope, forward_rope +from nexusquant.core.entropy_coder import ( + encode_stream, build_cdf_from_samples, measure_entropy_bps, +) + +torch.manual_seed(42) + +MODEL_ID = "Qwen/Qwen3-8B" # non-gated; no HF token needed +MODEL_NAME = "Qwen3-8B" +PREFIX_LEN = 1024 # canonical AQUA-iso prefix length (BOS-prepended -> 1025 fed) +CONT_LEN = 512 # continuation tokens scored per segment +SEG_LEN = PREFIX_LEN + CONT_LEN +N_SEGS_TARGET = 161 # project paired-noise-floor target; use fewer if dataset short +N_SEGS_MIN = 60 # contract floor +LOGIT_CHUNK = 256 # vocab-CE token chunk +CONT_CHUNK = 512 # continuation forward chunk +MAX_MEMORY = {0: "11GiB", 1: "14GiB", "cpu": "24GiB"} +OUT_PATH = "/kaggle/working/nq_qwen3_subbpe_entropy.json" + +# Rope config for Qwen3-8B: standard split-half, theta=1e6, no partial-rotary. +ROPE_THETA = 1_000_000.0 +ROPE_SCALING = None # resolved from model config below; set at runtime + +HF_TOKEN = os.environ.get("HF_TOKEN") # ungated model -- None is fine + +# AQUA-iso configs: (label, k_bits, v_bits, protect_boundary_layers). +# FP16 = NF4-weights baseline (no KV quant). Listed first so it runs first. +CONFIGS = [ + ("K3V2_pb0", 3, 2, 0), + ("K3V2_pb1", 3, 2, 1), + ("K2V2_pb0", 2, 2, 0), + ("K2V2_pb1", 2, 2, 1), +] + +cfg_state = {} + + +def free(): + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() + + +def honest_bpw(kb, vb): + # raw bpw: (k_bits + index_scale_overhead + v_bits + index_scale_overhead) / 2 + # 0.125 bpw overhead per tensor for per-head amax (1 fp16 scale per head per token, + # amortized over head_dim=128 elements: 16 bits / 128 = 0.125 bpw). + return (kb + 0.125 + vb + 0.125) / 2.0 + + +def fp16_bpe(): + # fp16 baseline: 16 bits per KV element. + return 16.0 + + +def to_dyn(c): + return DynamicCache.from_legacy_cache(c) if isinstance(c, tuple) else c + + +def get_kv(c, l): + if hasattr(c, "key_cache"): + return c.key_cache[l], c.value_cache[l] + return c.layers[l].keys, c.layers[l].values + + +def set_kv(c, l, k, v): + if hasattr(c, "key_cache"): + c.key_cache[l] = k + c.value_cache[l] = v + else: + c.layers[l].keys = k + c.layers[l].values = v + + +def n_layers_kv(c): + return len(c.layers) if hasattr(c, "layers") else len(c.key_cache) + + +def cache_len(c): + lens = [] + for l in range(n_layers_kv(c)): + kl, _ = get_kv(c, l) + if kl is not None: + lens.append(kl.shape[2]) + return max(lens) if lens else 0 + + +def clone_cache(kv): + new = DynamicCache() + nl = n_layers_kv(kv) + for l in range(nl): + k, v = get_kv(kv, l) + kk = k.detach().clone() if k is not None else None + vv = v.detach().clone() if v is not None else None + if hasattr(new, "key_cache"): + new.key_cache.append(kk) + new.value_cache.append(vv) + else: + new.update(kk, vv, l) + return new + + +def resolve_model_config(model_id): + _cfg = AutoConfig.from_pretrained(model_id, token=HF_TOKEN) + n_layers = _cfg.num_hidden_layers + n_kv = getattr(_cfg, "num_key_value_heads", 8) + head_dim = getattr(_cfg, "head_dim", None) or _cfg.hidden_size // _cfg.num_attention_heads + r_theta = float(getattr(_cfg, "rope_theta", 1_000_000.0)) + r_scaling = getattr(_cfg, "rope_scaling", None) + cfg_state.update({ + "n_layers": n_layers, "n_kv_heads": n_kv, "head_dim": head_dim, + "rope_theta": r_theta, "rope_scaling": r_scaling, + "hidden_size": getattr(_cfg, "hidden_size", None), + "vocab_size": getattr(_cfg, "vocab_size", None), + }) + # Update module-level rope constants to match actual config. + global ROPE_THETA, ROPE_SCALING + ROPE_THETA = r_theta + ROPE_SCALING = r_scaling + print(f"[config] {model_id}: {n_layers}L KV={n_kv} head_dim={head_dim} " + f"rope_theta={r_theta} rope_scaling={r_scaling}", flush=True) + + +def nq_quantize_kv_and_collect(kv, kb, vb, pb): + """Hadamard + E8 VQ; returns (quantized_kv, all_int8_codewords_concatenated). + + pb = number of boundary layers kept at fp16 at each end. + Collects the raw int8 E8 codewords for entropy measurement. + """ + Hcache = {} + + def H(d): + p2 = 1 + while p2 < d: + p2 *= 2 + if p2 not in Hcache: + Hcache[p2] = hadamard_matrix(p2).cpu() + return Hcache[p2], p2 + + nl = n_layers_kv(kv) + plen = cache_len(kv) + protected = (set(range(min(pb, nl))) | set(range(max(0, nl - pb), nl))) if pb > 0 else set() + all_codes = [] + quant_count = 0 + + for l in range(nl): + if l in protected: + continue + kl, vl = get_kv(kv, l) + if kl is None or kl.shape[2] != plen: + continue + d_l = kl.shape[-1] + nk = kl.shape[1] + Hm, p2 = H(d_l) + h_pad = p2 - d_l + + for is_k, t_in, b_bits in [(True, kl, kb), (False, vl, vb)]: + t = t_in[0].float().cpu().clone() + lvl = 2 ** b_bits + for h in range(nk): + if is_k: + th = inverse_rope(t[h:h+1], base=ROPE_THETA, + rope_scaling=ROPE_SCALING)[0] + else: + th = t[h] + pad_in = F.pad(th, (0, h_pad)) if h_pad > 0 else th + rot = pad_in @ Hm.T + amax = rot.abs().amax(dim=-1, keepdim=True).clamp(min=1e-8) + sc = amax / (lvl / 2) + norm = rot / sc + e8p = (8 - p2 % 8) % 8 + if e8p > 0: + norm = F.pad(norm, (0, e8p)) + grp = norm.reshape(-1, 8) + lp = E8Lattice.nearest_point(grp).clamp(-lvl / 2, lvl / 2) + # Collect raw int8 codewords before inverse transform. + codes = lp.to(torch.int8).flatten().cpu().numpy() + all_codes.append(codes) + co = lp.reshape(-1, norm.shape[-1]) + if e8p > 0: + co = co[..., :p2] + q = (co * sc @ Hm)[..., :d_l] + if is_k: + t[h] = forward_rope(q.unsqueeze(0), base=ROPE_THETA, + rope_scaling=ROPE_SCALING)[0] + else: + t[h] = q + tdev = t_in.device + tdt = t_in.dtype + if is_k: + set_kv(kv, l, t.unsqueeze(0).to(dtype=tdt, device=tdev), vl) + else: + kn, _ = get_kv(kv, l) + set_kv(kv, l, kn, t.unsqueeze(0).to(dtype=tdt, device=tdev)) + quant_count += 1 + + combined = np.concatenate(all_codes) if all_codes else np.array([], dtype=np.int8) + return kv, quant_count, combined + + +def measure_entropy_bpe(int8_codes): + """Compute zstd-L22, Shannon, and live rANS bpe over an int8 codeword array. + + Returns dict with bpe_raw(=8.0), bpe_zstd, bpe_shannon, bpe_rans (or None). + All None-safe. + """ + n = len(int8_codes) + if n == 0: + return {"bpe_raw": 8.0, "bpe_zstd": None, "bpe_shannon": None, + "bpe_rans": None, "n_codewords": 0} + + stream = int8_codes.tobytes() + + # zstd L22 (practical production-proxy coder). + cctx = zstandard.ZstdCompressor(level=22) + zstd_n = len(cctx.compress(stream)) + bpe_zstd = zstd_n * 8.0 / n + + # Order-0 Shannon entropy (information-theoretic lower bound). + bpe_shannon = float(measure_entropy_bps(int8_codes)) + + # Live rANS (calibration-free, per-document empirical CDF). + bpe_rans = None + rans_err = None + try: + arr_u = (int8_codes.astype(np.int64) + 128).astype(np.int64) # signed -> [0,256) + _freqs, cdf = build_cdf_from_samples(arr_u, alphabet_size=256) + packed = encode_stream(arr_u, cdf) + bpe_rans = len(packed) * 8.0 / n + except Exception as e: + rans_err = str(e)[:200] + + return { + "n_codewords": int(n), + "bpe_raw": 8.0, + "bpe_zstd": bpe_zstd, + "bpe_shannon": bpe_shannon, + "bpe_rans": bpe_rans, + "rans_err": rans_err, + } + + +def score_continuation(model, kv, cont_t, device): + seg_nll_sum, seg_tok = 0.0, 0 + cur = 0 + while cur < cont_t.shape[1] - 1: + end = min(cur + CONT_CHUNK, cont_t.shape[1]) + chunk = cont_t[:, cur:end] + past_len = (kv.get_seq_length() if hasattr(kv, "get_seq_length") else cache_len(kv)) + cache_pos = torch.arange(past_len, past_len + chunk.shape[1], device=device) + with torch.no_grad(): + out2 = model(chunk, past_key_values=kv, cache_position=cache_pos, use_cache=True) + logits = out2.logits[0, :-1, :] + kv = to_dyn(out2.past_key_values) + tgts = chunk[0, 1:] + m = tgts.shape[0] + if m > 0: + pos = 0 + while pos < m: + e2 = min(pos + LOGIT_CHUNK, m) + lc = logits[pos:e2].float() + wsum = F.cross_entropy(lc, tgts[pos:e2].to(device), + reduction="sum").item() + if math.isfinite(wsum): + seg_nll_sum += wsum + seg_tok += (e2 - pos) + del lc + pos = e2 + del out2, logits + cur = end + free() + return seg_nll_sum, seg_tok + + +def compute_stats(deltas): + import statistics as st + n = len(deltas) + if n == 0: + return {"n": 0} + mean = sum(deltas) / n + sigma = st.stdev(deltas) if n > 1 else 0.0 + sem = sigma / math.sqrt(n) if n > 1 else 0.0 + z = (mean / sem) if sem > 0 else None + n_neg = sum(1 for d in deltas if d < 0) + return { + "n": n, + "mean_delta_pct": mean, + "paired_sigma_pct": sigma, + "sem_pct": sem, + "z": z, + "significant_at_2sigma": (abs(z) >= 2.0) if z is not None else False, + "n_negative_segments": n_neg, + } + + +def _fmt(v, fmt=":.3f"): + # Safe formatter; returns "None" if v is None. + return format(v, fmt.lstrip(":")) if v is not None else "None" + + +def load_model(model_id): + bnb = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + ) + print(f"[model] loading {model_id} NF4 device_map=auto ...", flush=True) + t0 = time.time() + model = AutoModelForCausalLM.from_pretrained( + model_id, quantization_config=bnb, device_map="auto", + max_memory=MAX_MEMORY, attn_implementation="eager", + low_cpu_mem_usage=True, token=HF_TOKEN) + model.eval() + print(f" loaded in {time.time()-t0:.1f}s " + f"mem0={torch.cuda.memory_allocated(0)/1e9:.1f}GB " + f"mem1={torch.cuda.memory_allocated(1)/1e9:.1f}GB", flush=True) + return model + + +def main(): + import transformers + gpu_info = None + if torch.cuda.is_available(): + p0 = torch.cuda.get_device_properties(0) + gpu_info = { + "name": p0.name, "sm": f"{p0.major}{p0.minor}", + "n_gpus": torch.cuda.device_count(), + "total_memory_gb": round(p0.total_memory / 1024**3, 1), + } + print(f"[start] GPU={gpu_info} transformers={transformers.__version__}", flush=True) + + print("[data] loading wikitext-2-raw test ...", flush=True) + ds = load_dataset("Salesforce/wikitext", "wikitext-2-raw-v1", split="test") + text = "\n\n".join(r["text"] for r in ds if r["text"].strip()) + print(f" text {len(text)} chars", flush=True) + + result = { + "model_name": MODEL_NAME, + "model_id": MODEL_ID, + "weights": "NF4 (bitsandbytes load_in_4bit, nf4, double-quant, compute fp16)", + "baseline_note": ( + "NF4-WEIGHTS baseline. KV-quant deltas are paired on identical tokens so " + "NF4 weight floor cancels in the per-segment delta. Absolute base PPL is " + "NF4-weight PPL, not strict FP16-weights."), + "protocol": "AQUA-iso: non-overlapping (prefix=1024 + cont=512) windows on wikitext-2-raw test", + "prefix_len": PREFIX_LEN, + "cont_len": CONT_LEN, + "seg_len": SEG_LEN, + "n_segs_target": N_SEGS_TARGET, + "n_segs_min": N_SEGS_MIN, + "rope_theta": ROPE_THETA, + "bpe_note": ( + "bpe_raw=8.0 (int8 floor), bpe_zstd=zstd-L22 over real E8 codeword stream, " + "bpe_shannon=order-0 Shannon (rANS lower bound), bpe_rans=live rANS bytes. " + "Measured on the codewords from the FIRST segment's prefix KV (calibration-free)."), + "cross_arch_note": ( + "Qwen3-8B K3V2 pb=0 iso-reference = +0.378% (nq-mm-qwen3-iso-v3, NF4-weights, " + "AQUA-iso n=6 seq_len=8192). This kernel extends to n>=60 paired segments + " + "adds K3V2 pb=1, K2V2 pb=0, K2V2 pb=1. Confirms Qwen3 is NOT the Qwen2.5 " + "catastrophe (+539x). K2V2 boundary protection expected: pb=1 ~+3.48% << pb=0 ~+8.37%."), + "sub_bpe_frontier_configs": [ + {"label": "FP16_baseline", "bpw": 16.0, "bpe_raw": 16.0, + "note": "fp16: 16 bpe, included as frontier anchor"}, + {"label": "K3V2_pb0", "bpw": honest_bpw(3, 2), + "note": "sub-1.5bpe frontier target"}, + {"label": "K3V2_pb1", "bpw": honest_bpw(3, 2), + "note": "K3V2 with 1-layer boundary protection"}, + {"label": "K2V2_pb0", "bpw": honest_bpw(2, 2), + "note": "sub-1.25bpe target"}, + {"label": "K2V2_pb1", "bpw": honest_bpw(2, 2), + "note": "K2V2 with 1-layer boundary protection"}, + ], + "transformers": transformers.__version__, + "gpu": gpu_info, + "run_at_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + "errors": {}, + # PPL results filled below. + "base_nf4w_ppl": None, + "n_segments": 0, + "ppl_results": {}, + # Entropy/bpe results filled below (from first segment's prefix KV). + "entropy_bpe": {}, + } + + def save(): + with open(OUT_PATH, "w") as f: + json.dump(result, f, indent=2) + + # Save early so JSON exists on disk before any potential crash. + save() + + # Resolve model config (rope_theta, rope_scaling) before loading weights. + resolve_model_config(MODEL_ID) + result["model_config"] = { + k: (sorted(v) if isinstance(v, set) else v) for k, v in cfg_state.items()} + save() + + tok = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN) + bos_id = tok.bos_token_id + print(f"[tok] bos={bos_id} vocab={tok.vocab_size}", flush=True) + + all_ids = tok(text, return_tensors="pt", add_special_tokens=False).input_ids[0] + n_possible = all_ids.shape[0] // SEG_LEN + n_use = min(N_SEGS_TARGET, n_possible) + if n_use < N_SEGS_MIN: + result["errors"]["window_count"] = ( + f"only {n_possible} non-overlapping {SEG_LEN}-token windows; need >= {N_SEGS_MIN}") + save() + sys.exit(f"FAIL: only {n_possible} windows ({all_ids.shape[0]} tokens); " + f"need >= {N_SEGS_MIN}") + segments = [all_ids[i * SEG_LEN:(i + 1) * SEG_LEN] for i in range(n_use)] + segments = [s for s in segments if s.shape[0] == SEG_LEN] + print(f"[data] {all_ids.shape[0]} tokens -> {n_possible} windows, using {len(segments)}", + flush=True) + result["n_segments"] = len(segments) + save() + + model = load_model(MODEL_ID) + device = next(model.parameters()).device + dmap = getattr(model, "hf_device_map", None) + result["device_map"] = {str(k): str(v) for k, v in dmap.items()} if dmap else None + save() + + base_nll_total, base_tok_total = 0.0, 0 + base_ppls = [] + quant_deltas = {lbl: [] for lbl, *_ in CONFIGS} + entropy_measured = False # measure entropy bpe on first segment only + diag_logged = False + + for si, seg in enumerate(segments): + try: + prefix = seg[:PREFIX_LEN] + cont = seg[PREFIX_LEN:] + if bos_id is not None: + prefix = torch.cat([torch.tensor([bos_id], dtype=prefix.dtype), prefix]) + pre = prefix.unsqueeze(0).to(device) + cont_t = cont.unsqueeze(0).to(device) + if cont_t.shape[1] <= 1: + continue + + with torch.no_grad(): + out_pre = model(pre, use_cache=True) + base_kv_master = to_dyn(out_pre.past_key_values) + del out_pre + free() + + # Baseline (NF4-weights, no KV quant). + b_kv = clone_cache(base_kv_master) + b_nll, b_tok = score_continuation(model, b_kv, cont_t, device) + del b_kv + free() + if b_tok == 0: + del base_kv_master + free() + continue + base_seg_ppl = math.exp(b_nll / b_tok) + base_ppls.append(base_seg_ppl) + base_nll_total += b_nll + base_tok_total += b_tok + + if not diag_logged: + print(f" [diag] prefix_with_bos={pre.shape[1]} cont={cont_t.shape[1]} " + f"base_tok={b_tok} base_seg_ppl={base_seg_ppl:.3f}", flush=True) + + # Quant configs. + for label, kb, vb, pb in CONFIGS: + qkv = clone_cache(base_kv_master) + qkv, qc, codes = nq_quantize_kv_and_collect(qkv, kb, vb, pb) + if not diag_logged: + print(f" [{label}] quantized {qc} layers (pb={pb})", flush=True) + + # Measure entropy bpe on first segment's prefix KV (all configs). + if not entropy_measured or si == 0: + ebpe = measure_entropy_bpe(codes) + ebpe["label"] = label + ebpe["kb"] = kb + ebpe["vb"] = vb + ebpe["pb"] = pb + ebpe["bpw_raw"] = honest_bpw(kb, vb) + ebpe["from_seg_idx"] = si + result["entropy_bpe"][label] = ebpe + print(f" [{label}] entropy bpe: raw=8.0 " + f"zstd={_fmt(ebpe['bpe_zstd'])} " + f"shannon={_fmt(ebpe['bpe_shannon'])} " + f"rans={_fmt(ebpe['bpe_rans'])} " + f"n_codewords={ebpe['n_codewords']}", flush=True) + + q_nll, q_tok = score_continuation(model, qkv, cont_t, device) + del qkv + free() + if q_tok == 0: + continue + q_seg_ppl = math.exp(q_nll / q_tok) + delta = 100.0 * (q_seg_ppl - base_seg_ppl) / base_seg_ppl + quant_deltas[label].append(delta) + + if not entropy_measured: + entropy_measured = True # first-segment entropy pass complete + + diag_logged = True + del base_kv_master + free() + + if si % 10 == 0 or si == len(segments) - 1: + k3p0 = quant_deltas.get("K3V2_pb0", []) + k3p1 = quant_deltas.get("K3V2_pb1", []) + k2p0 = quant_deltas.get("K2V2_pb0", []) + k2p1 = quant_deltas.get("K2V2_pb1", []) + print(f" seg {si}: base_ppl={base_seg_ppl:.3f} " + f"K3p0={_fmt(k3p0[-1] if k3p0 else None)} " + f"K3p1={_fmt(k3p1[-1] if k3p1 else None)} " + f"K2p0={_fmt(k2p0[-1] if k2p0 else None)} " + f"K2p1={_fmt(k2p1[-1] if k2p1 else None)}", flush=True) + + if si % 20 == 0: + for lbl in [c[0] for c in CONFIGS]: + result["ppl_results"][lbl] = compute_stats(quant_deltas[lbl]) + if base_tok_total > 0: + result["base_nf4w_ppl"] = math.exp(base_nll_total / base_tok_total) + result["per_segment_base_ppl"] = base_ppls + save() + + except Exception: + traceback.print_exc() + result["errors"][f"seg_{si}"] = traceback.format_exc()[-500:] + free() + + # Final stats. + for lbl in [c[0] for c in CONFIGS]: + st = compute_stats(quant_deltas[lbl]) + st["per_segment_delta_pct"] = quant_deltas[lbl] + result["ppl_results"][lbl] = st + result["per_segment_base_ppl"] = base_ppls + if base_tok_total > 0: + result["base_nf4w_ppl"] = math.exp(base_nll_total / base_tok_total) + + # Cross-arch table anchor: fp16 bpe = 16.0 (always). + result["entropy_bpe"]["FP16"] = { + "label": "FP16", "kb": 16, "vb": 16, "pb": 0, "bpw_raw": 16.0, + "bpe_raw": 16.0, "bpe_zstd": None, "bpe_shannon": None, "bpe_rans": None, + "note": "fp16 baseline: 16 bits/element; no compression.", + } + + # SAVE JSON before any summary print (contract requirement). + save() + + # Summary print (all None-guarded via _fmt). + print("\n========== PPL SUMMARY ==========", flush=True) + bnf4 = result["base_nf4w_ppl"] + print(f" base NF4w PPL = {_fmt(bnf4)}", flush=True) + for lbl, kb, vb, pb in CONFIGS: + st = result["ppl_results"].get(lbl, {}) + print(f" {lbl}(bpw={honest_bpw(kb,vb):.3f}): " + f"n={st.get('n','?')} mean={_fmt(st.get('mean_delta_pct'))}% " + f"sem={_fmt(st.get('sem_pct'))}% z={_fmt(st.get('z'),':.2f')}", flush=True) + + print("\n========== ENTROPY BPE SUMMARY ==========", flush=True) + for lbl, kb, vb, pb in CONFIGS: + ebpe = result["entropy_bpe"].get(lbl, {}) + print(f" {lbl}: raw=8.0 zstd={_fmt(ebpe.get('bpe_zstd'))} " + f"shannon={_fmt(ebpe.get('bpe_shannon'))} " + f"rans={_fmt(ebpe.get('bpe_rans'))} " + f"bpw_raw={honest_bpw(kb,vb):.3f}", flush=True) + + print(f"\n[done] -> {OUT_PATH}", flush=True) + print(json.dumps(result, indent=2, default=str), flush=True) + + +if __name__ == "__main__": + main() diff --git a/experiments/kaggle/results/nq_hqmq_qwen3_v3.json b/experiments/kaggle/results/nq_hqmq_qwen3_v3.json new file mode 100644 index 0000000..b7c02ab --- /dev/null +++ b/experiments/kaggle/results/nq_hqmq_qwen3_v3.json @@ -0,0 +1,621 @@ +{ + "configs": { + "fp16": { + "ppls": [ + 5.448348308054142, + 11.092286978670202, + 16.91371785382974, + 11.807678025521156, + 7.743664305075443, + 11.26696508157019, + 6.571962538556527, + 3.8333918047584103, + 3.1532620864771905, + 7.050686584819912, + 5.709820002693286, + 10.42023928423861, + 7.161718742493711, + 6.833759763883972, + 6.369764508102507, + 14.926303617194517, + 8.638681785041102, + 10.42023928423861, + 9.487735836358526, + 9.053260780008058, + 13.379852088930456, + 16.39333746248769, + 9.943061786014486, + 14.467069533669397, + 6.030771824634841, + 8.504751437796486, + 6.125742661881986, + 9.487735836358526, + 12.968197316969134, + 6.67545588945633, + 9.340642126143496, + 9.487735836358526, + 8.372897488127265, + 9.943061786014486, + 9.788908901255894, + 16.91371785382974, + 21.380942759123343, + 18.00455893510183, + 6.571962538556527, + 9.340642126143496, + 5.491080233448797, + 6.727812138894691, + 4.660219266837412, + 5.491080233448797, + 8.243087744472826, + 6.67545588945633, + 9.053260780008058, + 10.258688448076699, + 6.030771824634841, + 3.955076722920577, + 6.222209072198332, + 5.491080233448797, + 4.3099982582618726, + 7.38905609893065, + 5.363879496843748, + 13.804574186067095, + 11.624616945432633, + 15.400115355370453, + 17.45061689198871, + 9.340642126143496 + ], + "mean": 9.333464255122076, + "sem": 0.521520044949058, + "bpe": 16.0, + "z": 17.896654875525957, + "n": 60 + }, + "hqmq_s96r6_med3x": { + "ppls": [ + 5.448348308054142, + 11.092286978670202, + 16.91371785382974, + 11.807678025521156, + 7.743664305075443, + 11.26696508157019, + 6.571962538556527, + 3.8333918047584103, + 3.128723206238592, + 7.161718742493711, + 5.709820002693286, + 10.42023928423861, + 7.21788879875633, + 6.833759763883972, + 6.419723190321737, + 14.694892728788941, + 8.638681785041102, + 10.42023928423861, + 9.637145935456955, + 9.053260780008058, + 13.379852088930456, + 16.39333746248769, + 10.258688448076699, + 14.467069533669397, + 6.030771824634841, + 8.504751437796486, + 6.125742661881986, + 9.487735836358526, + 12.968197316969134, + 6.727812138894691, + 9.340642126143496, + 9.487735836358526, + 8.372897488127265, + 10.099642225480054, + 9.788908901255894, + 16.91371785382974, + 21.380942759123343, + 18.00455893510183, + 6.571962538556527, + 9.340642126143496, + 5.491080233448797, + 6.833759763883972, + 4.660219266837412, + 5.491080233448797, + 8.243087744472826, + 6.67545588945633, + 9.053260780008058, + 10.258688448076699, + 6.030771824634841, + 3.955076722920577, + 6.222209072198332, + 5.491080233448797, + 4.343801993561042, + 7.38905609893065, + 5.405948925141167, + 13.804574186067095, + 11.624616945432633, + 15.400115355370453, + 17.45061689198871, + 9.487735836358526 + ], + "mean": 9.34953257306169, + "sem": 0.520443543240864, + "bpe": 4.996148960336898, + "z": 17.9645471530707, + "n": 60, + "delta_pct_per_seg": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + -0.7782061739756374, + 1.5747708586685727, + 0.0, + 0.0, + 0.7843097206448046, + 0.0, + 0.7843097206448046, + -1.5503562994591547, + 0.0, + 0.0, + 1.5747708586685727, + 0.0, + 0.0, + 0.0, + 3.174340749910254, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.7843097206447824, + 0.0, + 0.0, + 0.0, + 1.5747708586685727, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.5747708586685727, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.7843097206448046, + 0.0, + 0.7843097206447824, + 0.0, + 0.0, + 0.0, + 0.0, + 1.574770858668595 + ], + "delta_pct_mean": 0.21068635288403878, + "delta_pct_sem": 0.08553181804560647 + }, + "hqmq_s96r4_med3x": { + "ppls": [ + 5.448348308054142, + 11.092286978670202, + 16.91371785382974, + 11.807678025521156, + 7.743664305075443, + 11.26696508157019, + 6.623507079583559, + 3.8333918047584103, + 3.128723206238592, + 7.050686584819912, + 5.754602676005731, + 10.42023928423861, + 7.21788879875633, + 6.833759763883972, + 6.470073703341919, + 14.926303617194517, + 8.638681785041102, + 10.42023928423861, + 9.637145935456955, + 9.195828892530896, + 13.379852088930456, + 16.39333746248769, + 10.258688448076699, + 14.242778409807016, + 5.983839985957149, + 8.504751437796486, + 6.125742661881986, + 9.487735836358526, + 12.968197316969134, + 6.727812138894691, + 9.340642126143496, + 9.487735836358526, + 8.372897488127265, + 9.943061786014486, + 9.788908901255894, + 16.91371785382974, + 21.380942759123343, + 18.28808948244362, + 6.571962538556527, + 9.340642126143496, + 5.491080233448797, + 6.727812138894691, + 4.69676981955058, + 5.491080233448797, + 8.115290514356445, + 6.727812138894691, + 9.053260780008058, + 10.258688448076699, + 6.07807175428536, + 4.017360118591115, + 6.271010462790426, + 5.491080233448797, + 4.3099982582618726, + 7.38905609893065, + 5.363879496843748, + 13.804574186067095, + 11.807678025521156, + 15.400115355370453, + 17.45061689198871, + 9.487735836358526 + ], + "mean": 9.355967344651713, + "sem": 0.5219034282904834, + "bpe": 4.515524018352086, + "z": 17.926625573810803, + "n": 60, + "delta_pct_per_seg": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.7843097206447824, + 0.0, + -0.7782061739756374, + 0.0, + 0.7843097206448046, + 0.0, + 0.7843097206448046, + 0.0, + 1.5747708586685727, + 0.0, + 0.0, + 0.0, + 1.5747708586685727, + 1.5747708586685727, + 0.0, + 0.0, + 3.174340749910254, + -1.5503562994591658, + -0.7782061739756374, + 0.0, + 0.0, + 0.0, + 0.0, + 0.7843097206447824, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.5747708586685727, + 0.0, + 0.0, + 0.0, + 0.0, + 0.7843097206448046, + 0.0, + -1.5503562994591547, + 0.7843097206447824, + 0.0, + 0.0, + 0.7843097206448046, + 1.5747708586685727, + 0.7843097206448046, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.5747708586685727, + 0.0, + 0.0, + 1.574770858668595 + ], + "delta_pct_mean": 0.26358482631465097, + "delta_pct_sem": 0.09830009579525308 + }, + "e8_k3v3": { + "ppls": [ + 5.57755216479126, + 11.807678025521156, + 17.45061689198871, + 11.624616945432633, + 7.623609917712736, + 11.26696508157019, + 6.5208191203301125, + 3.893758941798335, + 3.1779934275388384, + 7.161718742493711, + 5.621297448593752, + 10.584334175890335, + 7.161718742493711, + 6.833759763883972, + 6.369764508102507, + 14.926303617194517, + 9.053260780008058, + 10.42023928423861, + 9.637145935456955, + 9.340642126143496, + 13.379852088930456, + 16.651494963610144, + 10.42023928423861, + 14.694892728788941, + 6.07807175428536, + 8.504751437796486, + 6.222209072198332, + 9.637145935456955, + 13.59055410055989, + 6.623507079583559, + 9.487735836358526, + 9.487735836358526, + 8.504751437796486, + 10.099642225480054, + 10.099642225480054, + 17.45061689198871, + 22.40703562619394, + 18.00455893510183, + 6.623507079583559, + 9.637145935456955, + 5.534147309488141, + 6.780579023486763, + 4.8458612978620605, + 5.709820002693286, + 8.243087744472826, + 6.67545588945633, + 9.195828892530896, + 10.258688448076699, + 6.07807175428536, + 3.955076722920577, + 6.222209072198332, + 5.322137455434696, + 4.276457585717836, + 7.38905609893065, + 5.405948925141167, + 14.021964597512564, + 12.182493960703473, + 15.888967492629165, + 17.45061689198871, + 9.487735836358526 + ], + "mean": 9.476384851971982, + "sem": 0.5380338759166177, + "bpe": 3.125, + "bpe_nominal": 3.125, + "z": 17.61298921155774, + "n": 60, + "delta_pct_per_seg": [ + 2.37143166023579, + 6.449445891785954, + 3.174340749910276, + -1.5503562994591658, + -1.5503562994591547, + 0.0, + -0.7782061739756596, + 1.5747708586685727, + 0.7843097206448046, + 1.5747708586685727, + -1.5503562994591547, + 1.5747708586685727, + 0.0, + 0.0, + 0.0, + 0.0, + 4.7991002016632756, + 0.0, + 1.5747708586685727, + 3.174340749910254, + 0.0, + 1.5747708586685727, + 4.7991002016632756, + 1.5747708586685727, + 0.7843097206448046, + 0.0, + 1.5747708586685727, + 1.5747708586685727, + 4.7991002016632756, + -0.7782061739756596, + 1.574770858668595, + 0.0, + 1.5747708586685727, + 1.5747708586685727, + 3.174340749910276, + 3.174340749910276, + 4.7991002016632756, + 0.0, + 0.7843097206447824, + 3.174340749910276, + 0.7843097206447824, + 0.7843097206448046, + 3.983547133622989, + 3.983547133623011, + 0.0, + 0.0, + 1.5747708586685727, + 0.0, + 0.7843097206448046, + 0.0, + 0.0, + -3.076676552365598, + -0.7782061739756485, + 0.0, + 0.7843097206447824, + 1.5747708586685727, + 4.7991002016632756, + 3.174340749910276, + 0.0, + 1.574770858668595 + ], + "delta_pct_mean": 1.2884018903374892, + "delta_pct_sem": 0.24500157032621184 + }, + "e8_k4v4": { + "ppls": [ + 5.491080233448797, + 10.920317008742302, + 16.91371785382974, + 11.807678025521156, + 7.743664305075443, + 11.26696508157019, + 6.571962538556527, + 3.893758941798335, + 3.1532620864771905, + 7.274499402230307, + 5.754602676005731, + 10.584334175890335, + 7.161718742493711, + 6.887357605997627, + 6.320194607432744, + 14.694892728788941, + 8.638681785041102, + 10.258688448076699, + 9.637145935456955, + 9.195828892530896, + 13.59055410055989, + 16.39333746248769, + 9.943061786014486, + 14.467069533669397, + 6.030771824634841, + 8.638681785041102, + 6.173787457040811, + 9.637145935456955, + 12.968197316969134, + 6.780579023486763, + 9.340642126143496, + 9.487735836358526, + 8.372897488127265, + 10.099642225480054, + 9.788908901255894, + 16.91371785382974, + 21.380942759123343, + 18.00455893510183, + 6.623507079583559, + 9.340642126143496, + 5.448348308054142, + 6.780579023486763, + 4.770733181967603, + 5.534147309488141, + 8.243087744472826, + 6.727812138894691, + 9.195828892530896, + 10.099642225480054, + 5.983839985957149, + 3.9242980716773355, + 6.222209072198332, + 5.491080233448797, + 4.3099982582618726, + 7.38905609893065, + 5.322137455434696, + 14.021964597512564, + 11.624616945432633, + 15.400115355370453, + 17.45061689198871, + 9.340642126143496 + ], + "mean": 9.357124775803413, + "sem": 0.5205539323744404, + "bpe": 4.125, + "bpe_nominal": 4.125, + "z": 17.97532242840253, + "n": 60, + "delta_pct_per_seg": [ + 0.7843097206448046, + -1.5503562994591547, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.5747708586685727, + 0.0, + 3.174340749910276, + 0.7843097206448046, + 1.5747708586685727, + 0.0, + 0.7843097206448046, + -0.7782061739756374, + -1.5503562994591547, + 0.0, + -1.5503562994591547, + 1.5747708586685727, + 1.5747708586685727, + 1.5747708586685727, + 0.0, + 0.0, + 0.0, + 0.0, + 1.574770858668595, + 0.7843097206447824, + 1.5747708586685727, + 0.0, + 1.5747708586685727, + 0.0, + 0.0, + 0.0, + 1.5747708586685727, + 0.0, + 0.0, + 0.0, + 0.0, + 0.7843097206447824, + 0.0, + -0.7782061739756485, + 0.7843097206448046, + 2.37143166023579, + 0.7843097206447824, + 0.0, + 0.7843097206447824, + 1.5747708586685727, + -1.5503562994591547, + -0.7782061739756374, + -0.7782061739756485, + 0.0, + 0.0, + 0.0, + 0.0, + -0.7782061739756596, + 1.5747708586685727, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "delta_pct_mean": 0.31750455921573145, + "delta_pct_sem": 0.12407179806878488 + } + }, + "errors": [], + "meta": { + "model": "Qwen/Qwen3-8B", + "baseline_note": "NF4 weight quantization used to fit T4 x2 (14 GiB each). All configs (FP16 KV, HQMQ, E8) share this weight baseline. FP16 KV baseline means: FP16 KV cache + NF4 weights.", + "hqmq_spec": "arXiv:2605.27646; 24*S product code; no rotation; Med3x C=3; bpe=(log2(24*S)+b_r)/4 per paper headline (scale 16/d_h excluded); outlier correction: (1-p)*b_base + p*16 + 0.25", + "e8_spec": "NexusQuant E8 with Hadamard rotation, per-head amax scale; bpe=k_bits+16/d_h for K, same for V, averaged (honest per-head overhead)", + "d_h": 128, + "n_kv_heads": 8, + "n_segs": 60, + "prefix_len": 1024, + "cont_len": 512, + "hqmq_mean_p_outlier_r6": 0.03875011603037516, + "hqmq_mean_p_outlier_r4": 0.03875011603037516 + } +} \ No newline at end of file diff --git a/experiments/kaggle/results/nq_llama31_subbpe.json b/experiments/kaggle/results/nq_llama31_subbpe.json new file mode 100644 index 0000000..659c8d3 --- /dev/null +++ b/experiments/kaggle/results/nq_llama31_subbpe.json @@ -0,0 +1,1003 @@ +{ + "model_name": "Llama-3.1-8B-Instruct", + "model_id": "NousResearch/Meta-Llama-3.1-8B-Instruct", + "weights": "FP16 (NousResearch/Meta-Llama-3.1-8B-Instruct, ungated)", + "protocol": "AQUA-iso paired-PPL: wikitext-2-raw-v1 test split sliced into 2048-token windows (prefix=1024, cont=1024). Per window: prefill prefix ONCE (FP16 KV), score cont -> base PPL. Then for each quant config re-quantize SAME cached prefix KV and rescore SAME cont. Paired delta% = (quant_ppl - base_ppl)/base_ppl*100. rope_scaling loaded from model config via prepare_rope_scaling(), propagated to inverse_rope/forward_rope for bit-exact llama3-type piecewise frequency scaling.", + "frontier_note": "Sub-1.5bpe frontier on Llama-3.1-8B-Instruct. K1* configs expected to be catastrophic (PPL cliff at <4-bit keys). Catastrophic PPL is a valid frontier datapoint. NIAH viability requires >=4-bit keys on this architecture. Lead with bpe<->PPL frontier chart.", + "prefix_len": 1024, + "cont_len": 1024, + "n_segs_target": 161, + "n_segs_min": 60, + "run_at_utc": "2026-06-16T12:41:40Z", + "configs_meta": [ + { + "label": "FP16", + "kb": 0, + "vb": 0, + "pb": 0, + "bpe_nominal_raw": 16.0, + "bpe_honest_raw": 16.0 + }, + { + "label": "K4V2_pb0", + "kb": 4, + "vb": 2, + "pb": 0, + "bpe_nominal_raw": 3.0, + "bpe_honest_raw": 3.125 + }, + { + "label": "K3V2_pb0", + "kb": 3, + "vb": 2, + "pb": 0, + "bpe_nominal_raw": 2.5, + "bpe_honest_raw": 2.625 + }, + { + "label": "K2V2_pb0", + "kb": 2, + "vb": 2, + "pb": 0, + "bpe_nominal_raw": 2.0, + "bpe_honest_raw": 2.125 + }, + { + "label": "K2V1_pb0", + "kb": 2, + "vb": 1, + "pb": 0, + "bpe_nominal_raw": 1.5, + "bpe_honest_raw": 1.625 + }, + { + "label": "K1V2_pb0", + "kb": 1, + "vb": 2, + "pb": 0, + "bpe_nominal_raw": 1.5, + "bpe_honest_raw": 1.625 + } + ], + "ppl_results": { + "K4V2_pb0": { + "n": 141, + "mean_delta_pct": 0.2893, + "paired_sigma_pct": 0.3766, + "sem_pct": 0.0317, + "z": 9.12, + "significant_at_2sigma": true, + "n_negative_segments": 30, + "per_segment_delta_pct": [ + 0.0690204620037974, + 0.502145745491896, + -0.15604435025632818, + 0.1934349200294107, + 0.14363440607118363, + 0.39416732953661665, + 0.9992339963548958, + -0.3156181061442555, + 0.5044791605184088, + 0.46707286711384693, + -0.10803964389582446, + 0.6882612173787057, + 0.5218341309810156, + 0.5233980599327869, + 0.7919178250044047, + 0.1552514125517973, + 0.5086379455854839, + 1.04083785589711, + 0.379762557453796, + 0.25791834946345554, + 0.4123113929728444, + 0.29779144043916256, + 0.2710066161484872, + -0.14456667339197316, + 0.23749590733084472, + 0.5771488406129475, + 0.8731006467482957, + -0.011452411716404178, + 0.043098908456563736, + 0.17625953212106765, + 0.08647691381990268, + 0.723812116938066, + 0.03581968922464164, + 0.09599390869704985, + 0.28787419058202607, + 0.5696592524160594, + 0.19907676314284023, + 0.11489582222903737, + 0.16174517816155756, + 0.10436769593325981, + 0.07829326147935774, + 0.4025750731276052, + 0.08751900759171152, + 0.5527593263987832, + -0.5719211894270436, + 0.8440501413949333, + 0.15553908314938555, + -0.1832873465363223, + 0.5421189751011956, + 0.36403260096693796, + 1.124420729270313, + 0.43615599491744345, + 0.035304896076566075, + -0.05072410056865493, + 0.19020344416733023, + -0.0856518701307344, + 0.9817150284906516, + -0.05902810139047562, + 1.1301119700059388, + 0.4582900291524719, + 0.43476169904459344, + 0.04645734522201979, + 0.7800943001009223, + 0.5995547350521585, + 0.05956590574018963, + 0.47955110740816564, + 0.07623023629749934, + 0.759215793011814, + 0.5812173440644967, + -0.004481794861780762, + 0.09917742266912145, + 0.5781147943594737, + -0.045768881957173504, + 0.3996432469277795, + 0.43903780139704746, + 0.037402994056924564, + 0.34194335172448637, + 0.4558134196608872, + 0.6420147249147402, + 0.6401388096619204, + 0.273277662853444, + 0.27931403702977, + 0.059589855355734946, + 0.4959546401009037, + -0.025652418265359606, + 0.2517032960409638, + 0.6485791980047837, + 1.3316689371198578, + 1.109348506781782, + -0.1729268637440107, + -0.03205028750661137, + -0.11494327221564345, + 0.5391680338651947, + 0.6055324898786245, + 0.32465245713757135, + 0.0031714848799530675, + 0.0774249292968195, + -0.46853454446403814, + 0.3421625089206236, + 0.25176028561064706, + -0.38629562383033067, + 0.16721740904428156, + -0.6130512998681037, + 0.5428529637420205, + -0.0063725853153150614, + 0.5233890371959752, + -0.31893456471410164, + -0.15169780587662945, + 0.6056890118967037, + 0.25691347639468687, + 0.17311252893759962, + 0.4821846319924279, + 1.002751451078743, + 0.10213341951964454, + -0.10179010396535476, + 0.2628048758041611, + 0.33143637653695135, + 0.1767211009013763, + 0.26148198052218646, + 0.5979504937328595, + 0.2508754512082772, + 0.8204374953665812, + -0.28636466871460253, + 0.7010200124978727, + -0.06332485938771754, + 0.00738732190708134, + -0.19995623816964728, + -0.1885134720438986, + 0.0349996130191548, + 0.9064162755186191, + 0.6159868896782562, + 0.3551927147474453, + 0.2250627203771279, + 0.7750649732229279, + -0.03597734151446388, + 0.27082961466903815, + -0.8020266190802858, + -0.09916284921979561, + 0.1541157210752165, + 0.29322128183867063, + 0.8593483673734421 + ] + }, + "K3V2_pb0": { + "n": 141, + "mean_delta_pct": 0.6837, + "paired_sigma_pct": 0.4854, + "sem_pct": 0.0409, + "z": 16.73, + "significant_at_2sigma": true, + "n_negative_segments": 9, + "per_segment_delta_pct": [ + 0.47492156256500306, + 1.1337096287071728, + 0.3900373096741663, + 1.1644416932104042, + 0.24742017267650443, + 0.6715491898390793, + 1.7022380143364189, + 0.18469398818525432, + 1.2191227526327841, + 1.1057154146608104, + 0.8998802684884242, + 1.25853286496099, + 1.1879381215327036, + 1.342329172606364, + 1.5398644203704253, + 0.4151804979153559, + 0.6817604271146813, + 1.9885768693512458, + 1.5842135802291433, + 0.9638728570431121, + 0.9999320381937938, + 0.9248069157333887, + 0.45621315953644304, + 0.4126628914755564, + 0.2958289174057341, + 0.751079622874645, + 1.7139353985871137, + 0.31925866806004993, + 0.29714626425690854, + 0.5733091909724347, + 0.191015799813448, + 0.7946801643345235, + 0.7092847181642822, + 0.7813788087131979, + 0.3571654071888912, + 0.341114761719763, + 1.1149330144972465, + 0.17604673164643664, + 0.006364015109742489, + 0.6478414262329617, + 0.3672246398048487, + 0.8772445033781153, + 0.4539830521267762, + -0.06635968847386434, + -0.8102741354264258, + 1.361017609715564, + -0.015263607894159023, + -0.01091691803936379, + 0.6197498966134213, + 1.0211717386923773, + 1.0112524724108576, + 0.8435462748088415, + 0.9842408598121422, + -0.11729517861184505, + 0.46745461636033725, + 0.13082639815739172, + 0.2656607092574986, + 0.3721675455969704, + 0.6755522328867462, + 0.3986039110633154, + 1.29771283107134, + 1.3381358915726427, + 0.95257736543454, + 0.7154738992351128, + 0.6523765496948344, + 1.0636978015696819, + 0.39307698778631284, + 0.8254147875388453, + 0.9971187475152261, + 0.4888860006799865, + 0.8956086958990466, + 1.1162611199633492, + 0.4805251427041974, + 0.5058954736367096, + 1.175756349479156, + 0.1464299154095564, + 0.7531445019520558, + 0.8665879225969603, + 1.1319697861889115, + 1.3120401802799189, + 0.7615159854272238, + 0.5353689373625894, + 0.7974757529366433, + 0.8771086860052428, + 0.2553326964591887, + 0.9749898574589421, + 0.8542426659126082, + 1.63979054686007, + 1.53021014419233, + 0.4600814012552567, + 0.45467131449557124, + 0.47335538172195213, + 1.6236989512787912, + 0.8124260793184677, + 0.6821008195276772, + 0.6034044166030855, + 0.27078161432168457, + -0.07532609825318845, + 1.109581440538378, + 1.0471471800439425, + 0.07039171628191036, + 1.1074911061566028, + 0.2944755790919408, + 0.8210890541825552, + 0.2801421167006466, + 1.0294476061613917, + 0.040491857466684086, + 0.29921684439218404, + 0.9495162164083857, + 0.5224657147441173, + 0.22798345107200324, + 0.9259422875466757, + 1.146306981397854, + 0.27011861187446023, + -0.2817075480778699, + 0.42842450763762696, + 0.5235514465824965, + 0.9756575206302934, + 0.4105118600843957, + 0.7205846295026669, + 0.4118066793259029, + 1.4531880607099692, + 0.33304537339146073, + 1.7881130200495263, + 0.6037114342969528, + 0.04794203769595962, + -0.15675232616283744, + 0.782241187530259, + -0.524729538733377, + 1.326712127676215, + 0.5929573434925154, + 0.41223929086797556, + 1.0069157439768204, + 1.0176883997000326, + 0.41498521592141463, + 0.4691229076470893, + 0.2007405979373317, + 0.14949218132727393, + 0.07570325845484246, + 0.6751275232548062, + 0.9105252708052571 + ] + }, + "K2V2_pb0": { + "n": 141, + "mean_delta_pct": 3.2469, + "paired_sigma_pct": 1.0631, + "sem_pct": 0.0895, + "z": 36.27, + "significant_at_2sigma": true, + "n_negative_segments": 0, + "per_segment_delta_pct": [ + 2.4930998729117024, + 2.864961719747034, + 4.587629659801057, + 5.005057265745677, + 2.05991950610227, + 4.356062835744233, + 4.472273607186868, + 3.9347982321480552, + 3.731902479379022, + 2.6479897329834383, + 3.639103672437682, + 3.7128327644478363, + 2.735929697467805, + 3.7085382952876014, + 5.074184110361723, + 2.6502624055480086, + 3.792654120328481, + 2.890383062543048, + 5.468094985174389, + 3.273125374608168, + 4.523751368274229, + 2.47475151386849, + 3.2956404661781904, + 1.8753315562988515, + 2.301343489674534, + 2.0696394203414776, + 4.104651989398717, + 3.095266343679979, + 1.5594005811563512, + 4.77043306885146, + 1.7041611130949763, + 2.311395564548406, + 2.4431769706943833, + 2.771650111958766, + 1.9890406856442306, + 2.7139361092394485, + 2.6215628602833596, + 1.7445424117274038, + 2.8013143468143435, + 2.2969697374611484, + 4.9150619689163335, + 1.8811046845860435, + 2.3868880727480275, + 3.518069616625322, + 2.0373226803604814, + 4.153352794336222, + 1.8228824072417649, + 2.812110734750405, + 4.776614774972975, + 4.617852493877335, + 2.5894918795170327, + 3.7155324134013736, + 5.3655179095731205, + 3.117338498933277, + 3.2360569342731345, + 2.8186875517817644, + 6.071721848607245, + 2.838146688635267, + 2.2668635635533487, + 4.331969658343504, + 4.714673139168563, + 3.6355502183066037, + 3.852869615790208, + 2.723300327244306, + 3.5491110828669665, + 2.8634075249194217, + 2.293091976395821, + 3.9881921155554916, + 4.136841442142059, + 2.942374933570519, + 3.9421963418814774, + 3.4060589022757983, + 2.504517129095883, + 3.9308957004179144, + 2.898387206986186, + 2.528410694861229, + 4.172796483444281, + 3.9755799016619244, + 2.9678677798910202, + 5.485009930887881, + 3.57094263859801, + 2.548167732468854, + 3.625516864201002, + 5.123300726007011, + 1.9117376081451287, + 2.455774966398605, + 2.237264842351943, + 4.668428057049452, + 3.935578756078361, + 1.9171041954883863, + 3.590045151408226, + 3.9847355821212456, + 4.753795684042114, + 2.9718820306122176, + 1.62883752481796, + 3.7202833455917985, + 2.4662714138860853, + 4.055625270592102, + 3.71758357297454, + 3.3067329463696424, + 3.256197481102065, + 3.1828825367998643, + 2.8451374941899052, + 3.4798110635252884, + 2.7989737410340263, + 3.5653464307666027, + 3.2693341998449266, + 3.3096600267729053, + 3.5413134437563683, + 2.1690200683804255, + 4.150816251128246, + 2.7369225319893897, + 3.1460717690771602, + 2.174100620822151, + 1.5024614906082998, + 0.8289473057834192, + 3.768909899622714, + 2.1139633335120873, + 2.0888437812822342, + 0.6354025050769025, + 4.264351179562244, + 5.339261261507099, + 2.2032621305071722, + 4.4713546477120465, + 5.731919831990023, + 1.4837317932257121, + 2.1086627554704696, + 3.5785131838745694, + 2.176540109420761, + 3.7670595255464305, + 4.0990394020975, + 4.55221649523585, + 3.3959116510289147, + 4.1310604390974355, + 2.9926766904262836, + 3.9567826403539987, + 3.6201223225974415, + 1.7488803676871827, + 1.7001962831428985, + 2.3310342421556203, + 3.056664544088867 + ] + }, + "K2V1_pb0": { + "n": 141, + "mean_delta_pct": 5.4442, + "paired_sigma_pct": 1.6793, + "sem_pct": 0.1414, + "z": 38.5, + "significant_at_2sigma": true, + "n_negative_segments": 0, + "per_segment_delta_pct": [ + 3.8728414729142835, + 3.3684973972456644, + 7.6500314139360235, + 7.441818144151274, + 3.7958278800669856, + 7.282477540269829, + 8.052493202052151, + 7.665176691173888, + 7.345523794640964, + 2.8054359109734968, + 6.602146175055575, + 4.662406173042049, + 5.199656264408978, + 6.036608820387757, + 9.329593455119438, + 5.438680170924238, + 6.350066674670331, + 6.207015279489966, + 12.374639949312227, + 4.1495137259194035, + 5.342003246455789, + 3.8460712974500653, + 5.12330072600698, + 3.8774970492395657, + 4.303102635827865, + 3.1253016757956327, + 4.99073852526399, + 4.707401748207017, + 4.1131337373471215, + 8.142361576801562, + 1.8545004160921597, + 5.098406235678545, + 4.491739317766528, + 4.464522093678059, + 4.96145056393158, + 4.3250931577579905, + 4.489519662204238, + 4.304279133087955, + 4.25790648064228, + 4.100135732772319, + 6.391172144114289, + 5.117227511645684, + 4.497044798223429, + 7.923495753715819, + 4.203479330231821, + 5.036789751601175, + 3.7885209251029335, + 5.077928365366802, + 7.8407137651604, + 6.361856298361755, + 5.529509820199343, + 5.5321146726169035, + 6.533075498955234, + 5.026667872824142, + 3.991172728987355, + 3.418326649859395, + 9.292437333354997, + 5.452854922740755, + 3.927795546586017, + 7.071146951551764, + 7.093993421957657, + 4.413795139007853, + 8.973717420773188, + 4.511205528387623, + 5.8486857604976885, + 4.9006205430137655, + 2.428294327793203, + 7.420211959711853, + 5.931459897865373, + 5.654618452086595, + 7.208363070042338, + 3.5668461600160315, + 4.332812474433422, + 5.666726162816313, + 5.79564385537772, + 5.040089550353478, + 6.298748515453979, + 7.848557689569497, + 4.436587035632188, + 6.964293688868919, + 6.000584519096857, + 6.141185697606545, + 5.896678451212115, + 5.613178224136889, + 4.686289067265988, + 3.6499570353523323, + 4.753081100470842, + 7.718414651200524, + 5.375448599426184, + 3.3663448989595355, + 4.883905999861845, + 6.82245515258847, + 8.376851800467623, + 4.404604804280351, + 3.117202750764716, + 5.533084009870767, + 2.7296040607224374, + 7.7340240357557635, + 6.982079153752566, + 5.168305651827, + 6.482443193992916, + 5.648492415854082, + 5.322416954596186, + 6.827751125101844, + 5.427958093568099, + 4.7711258281149895, + 4.228651434165702, + 5.972406741799056, + 6.100399023798634, + 5.501924432883369, + 6.739652250332298, + 6.708101304563862, + 5.465905076738507, + 3.215596062047521, + 3.1516484073963746, + 3.9962629928742714, + 6.147337126855373, + 2.93935970383005, + 3.731663504383957, + 3.4468466643819475, + 6.5488095870731, + 6.964421700364738, + 4.015845407603009, + 6.232954248861601, + 10.734097343721775, + 2.6258276712629693, + 5.325205769755692, + 4.717229680950918, + 4.105835592502604, + 6.682981531700199, + 5.5067571949323915, + 7.312156294547588, + 6.449869481887982, + 4.689751693600377, + 4.461915463929984, + 5.748611320817686, + 5.871157353438256, + 3.733553594387055, + 3.470130222551657, + 4.027684396659152, + 5.733340214347752 + ] + }, + "K1V2_pb0": { + "n": 141, + "mean_delta_pct": 363.3605, + "paired_sigma_pct": 183.8369, + "sem_pct": 15.4819, + "z": 23.47, + "significant_at_2sigma": true, + "n_negative_segments": 0, + "per_segment_delta_pct": [ + 368.23230742791714, + 229.8794441719583, + 722.2724322160237, + 407.06686116455876, + 491.24995217385117, + 508.0504498258551, + 331.11413085047633, + 409.27407538270637, + 354.2874299645235, + 253.34894599288364, + 193.3584638160431, + 525.5423711428433, + 329.7833650582846, + 381.4972388589581, + 273.94180356315553, + 606.6095638886411, + 301.75512350595045, + 393.5624002767462, + 227.62156151053472, + 235.81144667588856, + 275.3886802785804, + 259.44504717978583, + 181.26485416090736, + 210.70506539260373, + 298.59956422388507, + 376.56986694474483, + 747.7003412370054, + 504.7690802661958, + 360.25798482114806, + 299.09720339432977, + 273.6727162456352, + 451.2242519979195, + 380.862755618325, + 1001.3380263882046, + 364.62381623378826, + 223.38194421117083, + 280.0454883816555, + 176.2400937747864, + 224.18448120148983, + 336.90817345869107, + 518.2552476905869, + 346.89305062778425, + 283.1611739664947, + 474.83558420687416, + 212.66994754378234, + 313.38300614672295, + 417.9297435389028, + 668.0152338884104, + 360.779440154502, + 195.22593332912194, + 209.59837608092985, + 194.9169340451881, + 218.46903236326918, + 178.26002424365612, + 176.6864021121178, + 244.2465581899776, + 335.5652174216234, + 199.11502333498285, + 425.9441985191093, + 289.9209740046449, + 278.6196176975802, + 324.6294105078407, + 214.23159187749508, + 241.16892771621522, + 239.39241151383087, + 233.70217693730876, + 286.7165458934264, + 194.4799214230118, + 161.36339974369696, + 625.5125919183471, + 316.17598537281896, + 313.69473756400185, + 857.8660793836415, + 273.9332448124498, + 367.9148140553564, + 336.03227834708804, + 322.08620802795707, + 241.8524523976231, + 399.00744863687487, + 181.9957718831323, + 158.17545200712325, + 215.15152426840265, + 317.34803798006243, + 247.9014104816044, + 251.7415378378097, + 225.29600610754176, + 279.90374549676716, + 959.5515846699362, + 198.9752682327989, + 506.21660909689297, + 253.70493359198443, + 211.02125452124875, + 443.3362562963315, + 182.49117663626947, + 259.08525922899236, + 212.5168090240867, + 508.79222318347917, + 504.95593177738465, + 647.5573959858599, + 459.51045282078707, + 410.88565449774626, + 343.99438271867245, + 1038.560558315046, + 224.2735042514632, + 273.905578452618, + 164.6553095346665, + 247.2525095802729, + 151.13292144016373, + 457.42259859424547, + 756.8619663633884, + 575.1800346670805, + 354.5310759662209, + 235.92774314587444, + 480.89578284661127, + 158.1263140415554, + 621.5034890848168, + 521.5459655191803, + 219.44876299129172, + 222.99017615401183, + 364.2216071509068, + 341.36534565935136, + 496.5473620939898, + 355.54792259740935, + 749.6547178138242, + 187.9881625323747, + 465.5376670683951, + 912.6093081981186, + 818.860264547219, + 362.07427280738654, + 537.3310630898313, + 244.39425493839596, + 504.3921145978043, + 154.55342846722704, + 204.94102548781072, + 359.9452105220318, + 511.2387164164263, + 486.34434179079346, + 179.01337601145607, + 281.56147191370167, + 449.53122973956357, + 319.0644106035002 + ] + } + }, + "bpe_results": { + "FP16": { + "bpe_nominal_raw": 16.0, + "bpe_honest_raw": 16.0, + "bpe_zstd_l22": 16.0, + "bpe_shannon": 16.0 + }, + "K4V2_pb0": { + "n_codewords": 67174400, + "bpe_nominal_raw": 3.0, + "bpe_honest_raw": 3.125, + "bpe_zstd_l22": 2.2174, + "bpe_shannon": 2.6227, + "zstd_bytes": 18618811, + "raw_int8_bytes": 67174400 + }, + "K3V2_pb0": { + "n_codewords": 67174400, + "bpe_nominal_raw": 2.5, + "bpe_honest_raw": 2.625, + "bpe_zstd_l22": 1.6362, + "bpe_shannon": 1.8314, + "zstd_bytes": 13739107, + "raw_int8_bytes": 67174400 + }, + "K2V2_pb0": { + "n_codewords": 67174400, + "bpe_nominal_raw": 2.0, + "bpe_honest_raw": 2.125, + "bpe_zstd_l22": 1.088, + "bpe_shannon": 1.1425, + "zstd_bytes": 9135350, + "raw_int8_bytes": 67174400 + }, + "K2V1_pb0": { + "n_codewords": 67174400, + "bpe_nominal_raw": 1.5, + "bpe_honest_raw": 1.625, + "bpe_zstd_l22": 0.6506, + "bpe_shannon": 0.8022, + "zstd_bytes": 5463241, + "raw_int8_bytes": 67174400 + }, + "K1V2_pb0": { + "n_codewords": 67174400, + "bpe_nominal_raw": 1.5, + "bpe_honest_raw": 1.625, + "bpe_zstd_l22": 0.7365, + "bpe_shannon": 0.7816, + "zstd_bytes": 6183833, + "raw_int8_bytes": 67174400 + } + }, + "frontier": [ + { + "label": "FP16", + "kb": 0, + "vb": 0, + "bpe_honest_raw": 16.0, + "bpe_zstd_l22": 16.0, + "bpe_shannon": 16.0, + "mean_ppl_delta_pct": 0.0, + "sem_pct": 0.0 + }, + { + "label": "K4V2_pb0", + "kb": 4, + "vb": 2, + "bpe_honest_raw": 3.125, + "bpe_zstd_l22": 2.2174, + "bpe_shannon": 2.6227, + "mean_ppl_delta_pct": 0.2893, + "sem_pct": 0.0317 + }, + { + "label": "K3V2_pb0", + "kb": 3, + "vb": 2, + "bpe_honest_raw": 2.625, + "bpe_zstd_l22": 1.6362, + "bpe_shannon": 1.8314, + "mean_ppl_delta_pct": 0.6837, + "sem_pct": 0.0409 + }, + { + "label": "K2V2_pb0", + "kb": 2, + "vb": 2, + "bpe_honest_raw": 2.125, + "bpe_zstd_l22": 1.088, + "bpe_shannon": 1.1425, + "mean_ppl_delta_pct": 3.2469, + "sem_pct": 0.0895 + }, + { + "label": "K2V1_pb0", + "kb": 2, + "vb": 1, + "bpe_honest_raw": 1.625, + "bpe_zstd_l22": 0.6506, + "bpe_shannon": 0.8022, + "mean_ppl_delta_pct": 5.4442, + "sem_pct": 0.1414 + }, + { + "label": "K1V2_pb0", + "kb": 1, + "vb": 2, + "bpe_honest_raw": 1.625, + "bpe_zstd_l22": 0.7365, + "bpe_shannon": 0.7816, + "mean_ppl_delta_pct": 363.3605, + "sem_pct": 15.4819 + } + ], + "base_fp16_ppl": 6.4725, + "n_segments": 141, + "model_config": { + "n_kv_heads": 8, + "head_dim": 128, + "rope_theta": 500000.0, + "n_layers": 32, + "rope_scaling_type": "llama3" + }, + "rope_scaling_loaded": { + "factor": 8.0, + "low_freq_factor": 1.0, + "high_freq_factor": 4.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3", + "rope_theta": 500000.0 + }, + "gpu": { + "name": "Tesla T4", + "sm": "75", + "n_gpus": 2, + "total_gb": 14.6 + }, + "transformers": "5.12.1", + "errors": {}, + "device_map": { + "model.embed_tokens": "0", + "model.layers.0": "0", + "model.layers.1": "0", + "model.layers.2": "0", + "model.layers.3": "0", + "model.layers.4": "0", + "model.layers.5": "0", + "model.layers.6": "0", + "model.layers.7": "0", + "model.layers.8": "0", + "model.layers.9": "0", + "model.layers.10": "0", + "model.layers.11": "0", + "model.layers.12": "0", + "model.layers.13": "0", + "model.layers.14": "1", + "model.layers.15": "1", + "model.layers.16": "1", + "model.layers.17": "1", + "model.layers.18": "1", + "model.layers.19": "1", + "model.layers.20": "1", + "model.layers.21": "1", + "model.layers.22": "1", + "model.layers.23": "1", + "model.layers.24": "1", + "model.layers.25": "1", + "model.layers.26": "1", + "model.layers.27": "1", + "model.layers.28": "1", + "model.layers.29": "1", + "model.layers.30": "1", + "model.layers.31": "1", + "model.norm": "1", + "model.rotary_emb": "1", + "lm_head": "1" + }, + "run_complete_utc": "2026-06-16T14:08:33Z" +} \ No newline at end of file diff --git a/experiments/kaggle/results/nq_mistral_niah_frontier.json b/experiments/kaggle/results/nq_mistral_niah_frontier.json new file mode 100644 index 0000000..76b161e --- /dev/null +++ b/experiments/kaggle/results/nq_mistral_niah_frontier.json @@ -0,0 +1,621 @@ +{ + "model_name": "Mistral-7B-Instruct-v0.3", + "model_id": "unsloth/mistral-7b-instruct-v0.3", + "weights": "FP16 (unsloth/mistral-7b-instruct-v0.3, ungated)", + "harness": "chat-template NIAH, 5 depths per context, prefill+generate", + "contexts": [ + 4096, + 8192 + ], + "n_depths": 5, + "bpe_raw": { + "FP16": 16.0, + "K4V2_pb0": 3.0, + "K3V2_pb0": 2.5, + "K2V2_pb0": 2.0, + "K2V1_pb0": 1.5, + "K1V2_pb0": 1.5 + }, + "frontier_note": "K1* configs expected to cliff. Catastrophic NIAH is a valid frontier datapoint; label and report honestly.", + "run_at_utc": "2026-06-16T15:29:34Z", + "niah_results": { + "FP16_4k": { + "config": "FP16", + "ctx": 4096, + "hits": 4, + "n": 5, + "bpe_raw": 16.0, + "elapsed_s": 43, + "cells": [ + { + "depth": 0.1667, + "target_key": "2279189", + "target_value": "307", + "answer": "The value of 2279189 is 307.", + "recall": true + }, + { + "depth": 0.3333, + "target_key": "2309015", + "target_value": "159", + "answer": "In the game Valkyria Chronicles III, the value 2309015 is the identification number of one of the main characters, Imca.", + "recall": false + }, + { + "depth": 0.5, + "target_key": "1352092", + "target_value": "796", + "answer": "The value 1352092 is 796 in base 10.", + "recall": true + }, + { + "depth": 0.6667, + "target_key": "1355090", + "target_value": "424", + "answer": "The value 1355090 is 424 in base 10. This is because when we convert 1355090 from base 16 to base 10, we get:\n\n13 * 16^4 + 5 *", + "recall": true + }, + { + "depth": 0.8333, + "target_key": "7142782", + "target_value": "205", + "answer": "The value of 7142782 is 205.", + "recall": true + } + ] + }, + "K4V2_pb0_4k": { + "config": "K4V2_pb0", + "ctx": 4096, + "hits": 5, + "n": 5, + "bpe_raw": 3.0, + "elapsed_s": 82, + "cells": [ + { + "depth": 0.1667, + "target_key": "5959444", + "target_value": "161", + "answer": "The value of 5959444 is 161.", + "recall": true + }, + { + "depth": 0.3333, + "target_key": "8504436", + "target_value": "553", + "answer": "The value 8504436 is 553 in base 10.", + "recall": true + }, + { + "depth": 0.5, + "target_key": "5321643", + "target_value": "478", + "answer": "The value of 5321643 is 478.awa in the game Valkyria Chronicles III.", + "recall": true + }, + { + "depth": 0.6667, + "target_key": "4940090", + "target_value": "220", + "answer": "The value of 4940090 is 220.", + "recall": true + }, + { + "depth": 0.8333, + "target_key": "2498212", + "target_value": "568", + "answer": "The value of 2498212 is 568.", + "recall": true + } + ] + }, + "K3V2_pb0_4k": { + "config": "K3V2_pb0", + "ctx": 4096, + "hits": 5, + "n": 5, + "bpe_raw": 2.5, + "elapsed_s": 82, + "cells": [ + { + "depth": 0.1667, + "target_key": "3186998", + "target_value": "866", + "answer": "The value 3186998 is 866 in base 10.", + "recall": true + }, + { + "depth": 0.3333, + "target_key": "2924634", + "target_value": "362", + "answer": "The value of 2924634 is 362.", + "recall": true + }, + { + "depth": 0.5, + "target_key": "5640920", + "target_value": "144", + "answer": "The value 5640920 is 144.awa in the Valkyria Chronicles III game.", + "recall": true + }, + { + "depth": 0.6667, + "target_key": "5668961", + "target_value": "606", + "answer": "The value of 5668961 is 606.", + "recall": true + }, + { + "depth": 0.8333, + "target_key": "6411103", + "target_value": "269", + "answer": "The value of 6411103 is 269.", + "recall": true + } + ] + }, + "K2V2_pb0_4k": { + "config": "K2V2_pb0", + "ctx": 4096, + "hits": 5, + "n": 5, + "bpe_raw": 2.0, + "elapsed_s": 82, + "cells": [ + { + "depth": 0.1667, + "target_key": "6431679", + "target_value": "620", + "answer": "The value of 6431679 is 620.", + "recall": true + }, + { + "depth": 0.3333, + "target_key": "2875851", + "target_value": "604", + "answer": "The value 2875851 is 604 in base 10.", + "recall": true + }, + { + "depth": 0.5, + "target_key": "1031700", + "target_value": "911", + "answer": "The value 1031700 is 911 in base 8.", + "recall": true + }, + { + "depth": 0.6667, + "target_key": "1195771", + "target_value": "550", + "answer": "The value of 1195771 is 550.", + "recall": true + }, + { + "depth": 0.8333, + "target_key": "2086875", + "target_value": "279", + "answer": "The value of 2086875 is 279.", + "recall": true + } + ] + }, + "K2V1_pb0_4k": { + "config": "K2V1_pb0", + "ctx": 4096, + "hits": 4, + "n": 5, + "bpe_raw": 1.5, + "elapsed_s": 94, + "cells": [ + { + "depth": 0.1667, + "target_key": "2360475", + "target_value": "886", + "answer": "The value of 2360475 is 886. This is because 2360475 is a base 10 number, and when you divide it by 27 (the number of digits), you get 886.", + "recall": true + }, + { + "depth": 0.3333, + "target_key": "6244407", + "target_value": "572", + "answer": "The value 6244407 is a nonet (a set of nine numbers) in base 10. When written in decimal notation, it is the number 624,440,070.\n\nIn the context of the Valkyria", + "recall": false + }, + { + "depth": 0.5, + "target_key": "7412930", + "target_value": "957", + "answer": "The value 7412930 is 957 in base 10. This is because 7412930 in base 16 is 1111010101101011011010110", + "recall": true + }, + { + "depth": 0.6667, + "target_key": "3679707", + "target_value": "584", + "answer": "The value of 3679707 is 584.", + "recall": true + }, + { + "depth": 0.8333, + "target_key": "4471342", + "target_value": "804", + "answer": "The value of 4471342 is 804.", + "recall": true + } + ] + }, + "K1V2_pb0_4k": { + "config": "K1V2_pb0", + "ctx": 4096, + "hits": 0, + "n": 5, + "bpe_raw": 1.5, + "elapsed_s": 104, + "cells": [ + { + "depth": 0.1667, + "target_key": "7502115", + "target_value": "511", + "answer": "Valkyria Chronicles III was released in Japan on January 27, 2011320113 , 2011132011320113201132011320113201", + "recall": false + }, + { + "depth": 0.3333, + "target_key": "3450957", + "target_value": "503", + "answer": "The game 's story was adapted into a manga by Misaki Kobayashi , who had worked on the original Valkyria Chronicles . The manga was released in 2013 , and was l", + "recall": false + }, + { + "depth": 0.5, + "target_key": "8925079", + "target_value": "570", + "answer": "Valkyria Chronicles III was released in Japan on January 27201111011010101010101010101010101010101010101010", + "recall": false + }, + { + "depth": 0.6667, + "target_key": "3762754", + "target_value": "355", + "answer": "= = = =\n\n = = = =\n\n = = = = =\n\n = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =", + "recall": false + }, + { + "depth": 0.8333, + "target_key": "7932261", + "target_value": "604", + "answer": "The game was adapted into a manga by Misaki Kawai , released in Japan on February 16 , 2011 , 2011 , 2011 , 2011 , 2011 , 2011 , 201", + "recall": false + } + ] + }, + "FP16_8k": { + "config": "FP16", + "ctx": 8192, + "hits": 0, + "n": 5, + "bpe_raw": 16.0, + "elapsed_s": 2, + "cells": [ + { + "depth": 0.1667, + "target_key": "2279189", + "target_value": "307", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.3333, + "target_key": "2309015", + "target_value": "159", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.5, + "target_key": "1352092", + "target_value": "796", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.6667, + "target_key": "1355090", + "target_value": "424", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.8333, + "target_key": "7142782", + "target_value": "205", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + } + ] + }, + "K4V2_pb0_8k": { + "config": "K4V2_pb0", + "ctx": 8192, + "hits": 0, + "n": 5, + "bpe_raw": 3.0, + "elapsed_s": 2, + "cells": [ + { + "depth": 0.1667, + "target_key": "5959444", + "target_value": "161", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.3333, + "target_key": "8504436", + "target_value": "553", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.5, + "target_key": "5321643", + "target_value": "478", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.6667, + "target_key": "4940090", + "target_value": "220", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.8333, + "target_key": "2498212", + "target_value": "568", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + } + ] + }, + "K3V2_pb0_8k": { + "config": "K3V2_pb0", + "ctx": 8192, + "hits": 0, + "n": 5, + "bpe_raw": 2.5, + "elapsed_s": 2, + "cells": [ + { + "depth": 0.1667, + "target_key": "3186998", + "target_value": "866", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.3333, + "target_key": "2924634", + "target_value": "362", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.5, + "target_key": "5640920", + "target_value": "144", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.6667, + "target_key": "5668961", + "target_value": "606", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.8333, + "target_key": "6411103", + "target_value": "269", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + } + ] + }, + "K2V2_pb0_8k": { + "config": "K2V2_pb0", + "ctx": 8192, + "hits": 0, + "n": 5, + "bpe_raw": 2.0, + "elapsed_s": 2, + "cells": [ + { + "depth": 0.1667, + "target_key": "6431679", + "target_value": "620", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.3333, + "target_key": "2875851", + "target_value": "604", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.5, + "target_key": "1031700", + "target_value": "911", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.6667, + "target_key": "1195771", + "target_value": "550", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.8333, + "target_key": "2086875", + "target_value": "279", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + } + ] + }, + "K2V1_pb0_8k": { + "config": "K2V1_pb0", + "ctx": 8192, + "hits": 0, + "n": 5, + "bpe_raw": 1.5, + "elapsed_s": 2, + "cells": [ + { + "depth": 0.1667, + "target_key": "2360475", + "target_value": "886", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.3333, + "target_key": "6244407", + "target_value": "572", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.5, + "target_key": "7412930", + "target_value": "957", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.6667, + "target_key": "3679707", + "target_value": "584", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.8333, + "target_key": "4471342", + "target_value": "804", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + } + ] + }, + "K1V2_pb0_8k": { + "config": "K1V2_pb0", + "ctx": 8192, + "hits": 0, + "n": 5, + "bpe_raw": 1.5, + "elapsed_s": 2, + "cells": [ + { + "depth": 0.1667, + "target_key": "7502115", + "target_value": "511", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.3333, + "target_key": "3450957", + "target_value": "503", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.5, + "target_key": "8925079", + "target_value": "570", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.6667, + "target_key": "3762754", + "target_value": "355", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + }, + { + "depth": 0.8333, + "target_key": "7932261", + "target_value": "604", + "answer": "OutOfMemoryError: CUDA out of memory. Tried to allocate 3.98 GiB. GPU 0 has a total capacity of 14.56 GiB of which 3.18 GiB is free. Inclu", + "recall": null + } + ] + } + }, + "fp16_gate": { + "hits_4k": 4, + "required": 4, + "pass": true + }, + "errors": {}, + "gpu": { + "name": "Tesla T4", + "sm": "75", + "n_gpus": 2, + "total_gb": 14.6 + }, + "transformers": "5.12.1", + "device_map": { + "model.embed_tokens": "0", + "model.layers.0": "0", + "model.layers.1": "0", + "model.layers.2": "0", + "model.layers.3": "0", + "model.layers.4": "0", + "model.layers.5": "0", + "model.layers.6": "0", + "model.layers.7": "0", + "model.layers.8": "0", + "model.layers.9": "0", + "model.layers.10": "0", + "model.layers.11": "0", + "model.layers.12": "0", + "model.layers.13": "0", + "model.layers.14": "0", + "model.layers.15": "0", + "model.layers.16": "1", + "model.layers.17": "1", + "model.layers.18": "1", + "model.layers.19": "1", + "model.layers.20": "1", + "model.layers.21": "1", + "model.layers.22": "1", + "model.layers.23": "1", + "model.layers.24": "1", + "model.layers.25": "1", + "model.layers.26": "1", + "model.layers.27": "1", + "model.layers.28": "1", + "model.layers.29": "1", + "model.layers.30": "1", + "model.layers.31": "1", + "model.norm": "1", + "model.rotary_emb": "1", + "lm_head": "1" + }, + "model_config": { + "n_kv_heads": 8, + "head_dim": 128, + "rope_theta": null, + "n_layers": 32 + }, + "run_complete_utc": "2026-06-16T15:40:05Z" +} \ No newline at end of file diff --git a/experiments/kaggle/results/nq_mistral_subbpe_frontier.json b/experiments/kaggle/results/nq_mistral_subbpe_frontier.json new file mode 100644 index 0000000..ee17974 --- /dev/null +++ b/experiments/kaggle/results/nq_mistral_subbpe_frontier.json @@ -0,0 +1,1094 @@ +{ + "model_name": "Mistral-7B-Instruct-v0.3", + "model_id": "unsloth/mistral-7b-instruct-v0.3", + "weights": "FP16 (unsloth/mistral-7b-instruct-v0.3, ungated)", + "protocol": "AQUA-iso paired-PPL: wikitext-2-raw-v1 test split sliced into 2048-token windows (prefix=1024, cont=1024). Per window: prefill prefix ONCE (FP16 KV), score cont -> base PPL. Then for each quant config re-quantize SAME cached prefix KV and rescore SAME cont. Paired delta% = (quant_ppl - base_ppl)/base_ppl*100.", + "frontier_note": "This kernel maps the sub-1.5bpe quality frontier on Mistral-Inst-v0.3. K1* configs are expected to be catastrophic; catastrophic PPL is a valid frontier datapoint, not a failure. Lead with bpe<->PPL frontier.", + "prefix_len": 1024, + "cont_len": 1024, + "n_segs_target": 161, + "n_segs_min": 60, + "run_at_utc": "2026-06-16T12:32:54Z", + "configs_meta": [ + { + "label": "FP16", + "kb": 0, + "vb": 0, + "pb": 0, + "bpe_nominal_raw": 16.0, + "bpe_honest_raw": 16.0 + }, + { + "label": "K3V2_pb0", + "kb": 3, + "vb": 2, + "pb": 0, + "bpe_nominal_raw": 2.5, + "bpe_honest_raw": 2.625 + }, + { + "label": "K2V2_pb0", + "kb": 2, + "vb": 2, + "pb": 0, + "bpe_nominal_raw": 2.0, + "bpe_honest_raw": 2.125 + }, + { + "label": "K2V1_pb0", + "kb": 2, + "vb": 1, + "pb": 0, + "bpe_nominal_raw": 1.5, + "bpe_honest_raw": 1.625 + }, + { + "label": "K1V2_pb0", + "kb": 1, + "vb": 2, + "pb": 0, + "bpe_nominal_raw": 1.5, + "bpe_honest_raw": 1.625 + }, + { + "label": "K1V1_pb0", + "kb": 1, + "vb": 1, + "pb": 0, + "bpe_nominal_raw": 1.0, + "bpe_honest_raw": 1.125 + } + ], + "ppl_results": { + "K3V2_pb0": { + "n": 161, + "mean_delta_pct": 0.4311, + "paired_sigma_pct": 0.435, + "sem_pct": 0.0343, + "z": 12.57, + "significant_at_2sigma": true, + "n_negative_segments": 25, + "per_segment_delta_pct": [ + 1.8229600919655953, + 0.8415127310572957, + 0.3574056159184195, + -0.7396568846237053, + -0.08853057924749629, + 0.6301335152453226, + -0.07467135931375163, + -0.5612023784653362, + 0.826543007924459, + 1.0559754534089596, + 0.7941494033038491, + 0.9198337753436475, + -0.46963636182717183, + 0.5352636596146103, + 0.1668637728212931, + 0.7077630936306989, + 0.42990284938807716, + -0.48141015657952546, + 0.06658638224110645, + -0.039733769755316596, + 0.6603842818732776, + 0.030276823237417504, + 0.5589118102805093, + 0.09036086442458004, + 0.35823734308615457, + 0.5530270794761161, + 0.25799933967173766, + 0.3926204294689298, + -0.1219659543027439, + 0.4849234505231436, + 0.38236943974453674, + 0.2950097104357601, + 0.8842740566087557, + 0.5539115722375247, + 0.3153733385913907, + 0.4972475543522323, + 0.2879882108775827, + 0.5133562550080317, + 0.3389292383009992, + 0.04954948160558214, + 0.28884486750703703, + 0.014122856862934196, + 0.9495131960802338, + 0.5762987488067542, + -0.08212885104474472, + 0.43468357096926646, + 0.33897727127114197, + 0.5761287313077234, + 0.6611823795984374, + 0.9533702287379651, + 0.31483459679035825, + 0.8766680353435156, + 0.9848632633564043, + 0.6774709788743158, + 0.2806161653870948, + 0.7917037167111293, + 0.15953059795788163, + 0.4517139235710481, + -0.07944876224369551, + 0.648329258390661, + 0.17047661521373444, + 0.26523772879294366, + 0.16682930834091117, + 0.46598473942632096, + 1.154826138200131, + -0.1280678158587348, + 0.7933200947886283, + 0.9695822505638393, + 0.18858175019661663, + 0.036894178807391535, + -0.2610178756907739, + 0.36996633087581127, + 0.002767563763558531, + 0.6088405743437563, + 0.2589922249502889, + 0.21553045543986765, + -0.007288050125632724, + 0.37287326585654545, + -0.21837030497130955, + 0.5404795526105683, + 0.4049452288137747, + 0.776231827062028, + 0.5523020418928636, + 0.37780443694052973, + 0.3782549214056432, + 0.013898431467082528, + -0.16206646605306313, + 0.7331184673648528, + 0.8347757789364765, + 0.39089934342568167, + 0.758724408778581, + 0.22938687145387757, + 0.16462810152834614, + 0.10308283115197517, + 0.4309996007229958, + -0.13603374006520433, + 1.0397404909113765, + 0.37046783121126886, + 1.0783156390831692, + 0.6760763451150543, + -0.1546403325543632, + 0.09421202362435444, + 0.11382349016081918, + 0.9221225368995926, + -0.2758507833347983, + 1.2564000675286142, + 0.8359523749456491, + 0.19648063647129235, + 0.025021560093920515, + 0.44327501843680384, + 0.49256006766590854, + 0.11884078513939941, + 1.7873821222757131, + 0.7541000880531249, + 0.5192446790401267, + -0.0042633942940103475, + 0.2656787084657853, + -0.04178389457277952, + 1.477987254490875, + 0.2632458447769801, + 0.21686473804730483, + 0.6730401466387314, + 0.6679499002795068, + 0.6120735117520255, + 0.46103420781998844, + 0.465040903407843, + 0.8451755531008003, + -0.0030965886585745915, + 0.6297180294965383, + 0.16536233123558158, + 0.19209495403180024, + 0.9645223220424666, + 0.5725178080519266, + 0.25514372443461747, + 1.3004617549428124, + 0.7599182053100112, + 0.47046357261266764, + 0.614918221367925, + 1.4597022331693068, + 1.2495717824552852, + -0.13592916528131405, + 1.585222638756233, + 0.1511222319002615, + 0.21323373193032086, + 0.6663114378663204, + 0.25866826130133713, + 1.0198630167678429, + 0.371684055006735, + 0.48989620538142825, + 0.6479950027683272, + -0.2566132321055745, + 0.7348589829350377, + -0.0337252113580521, + 0.32909797375140576, + 0.4493126102352295, + 0.7594238030104041, + 0.5963914244305516, + 0.32487757984746274, + -0.08148461986488963, + 0.06046102651876684, + 0.7095046776720296 + ] + }, + "K2V2_pb0": { + "n": 161, + "mean_delta_pct": 1.2833, + "paired_sigma_pct": 0.7745, + "sem_pct": 0.061, + "z": 21.02, + "significant_at_2sigma": true, + "n_negative_segments": 2, + "per_segment_delta_pct": [ + 3.097880513708848, + 0.5510625706634521, + 1.8437223163868313, + 0.09632333630240424, + 0.5273410734264695, + 0.13683622079482596, + 0.28054715816046427, + 0.7013574571215584, + 0.6478625053513999, + 2.4201245045869593, + 1.5670641575266946, + 0.7404514404017913, + 0.5627599365331022, + 0.6569450048224398, + 3.026122100834446, + 1.9423858422730098, + 1.3204338504595432, + 1.7210810808060326, + 0.5413910080533999, + 1.8306968733905458, + 1.372075218162479, + 0.967790855829002, + 1.3029137246244302, + 1.6317444305918918, + 0.9153258519440769, + 0.8151045240823118, + 0.9807240497258581, + 1.129866886054364, + 0.7329647609415081, + 2.9885322129419793, + 1.544704055247352, + 1.8987248073314489, + 2.0545667723501255, + 1.719215485877925, + 1.5125064412938194, + 2.377557585013685, + 1.3087908132083987, + 0.5434846795674754, + 1.7246723712844105, + 0.8813040181564349, + 1.5152794175666782, + 0.9536752939917004, + 0.40693992790654393, + 1.5470844624879347, + -0.49294140800857766, + 0.8348361168593723, + 0.601128902902759, + 1.7443445444625145, + 0.46965195715579605, + 1.4721792856259162, + 1.293451691896843, + 2.103344123473342, + 1.5754151361953024, + 0.8086950761093024, + 0.7886127673813783, + 2.0871302683164594, + 0.9607554831998848, + 0.7808239954730419, + 0.5123277709786521, + 2.083474259261826, + 0.7402977227892266, + 1.1218278565754494, + 1.3264802095026256, + 1.6831672249797778, + 2.6332861432304235, + 1.636925983468549, + 2.4250244816264663, + 1.697655586960603, + 0.7545733627587874, + 1.5790073622217655, + 0.7813456404444956, + 0.35803916920044515, + 0.46032185631812245, + 1.6176272557892515, + 0.646191246090975, + 0.2418265931393095, + 0.3522892942791063, + 2.052397361524718, + 1.888841311344815, + 2.651653674528494, + 2.3923715859959858, + 3.9889917080597144, + 1.699219552908654, + 0.581063869423678, + 0.4941836752508107, + 0.8498644008412904, + 0.19363276887623354, + 1.3663885574036656, + 2.188762885353496, + 0.3775401536619476, + 2.6682582205897796, + 0.2831394622440487, + 1.3617151193096202, + 2.7269669569434942, + 1.5011647546068427, + 0.17516855982169624, + 0.41292125868096197, + 0.4608208024097259, + 1.3659275727502393, + 1.3962266082607273, + 0.7704881420848515, + 1.081267283575466, + 1.5539981377329963, + 2.972532089005146, + 1.8444018179642223, + 1.8926183886425514, + 0.153890981544197, + 1.0935042209227428, + 1.696350271927779, + 0.7155311524421146, + 1.093023304912543, + 0.6656970213366152, + 2.9213132904733996, + 1.689839160459076, + 1.2508198640568464, + 0.9703707159922597, + 0.9967923987818194, + 1.3641170151549449, + 2.379802828238421, + 0.6289593208165767, + 1.0780374147442555, + 1.922257604980565, + 1.3492243589752977, + 1.0252551809724237, + 1.5910340383680732, + 1.399293718674417, + 1.8563349701719694, + 0.5122796550244405, + 1.4457212573021616, + 0.7615491471697613, + 0.997692882105678, + 0.5789934893144111, + 1.3100275003318933, + 1.2344203295993625, + 1.3894950660664305, + 0.7365121238378312, + 1.3865738553569282, + 1.243728420567322, + 2.926620640094308, + 0.8031546252418884, + 1.0030234243294744, + 2.851076359890826, + 1.7892976946718004, + 0.8112346768682156, + 1.4036624634214703, + 0.49777374514706396, + 2.3275531915687355, + 0.600120589875862, + 0.7253159022379225, + 0.920869448615369, + 0.23608037747382657, + 2.922144710977807, + 0.6199786919735355, + 0.6340837263100476, + 1.861524925869555, + 1.0742058596253343, + 1.4365250962036102, + 1.3506677379269765, + 0.30406937139347695, + -0.14701049546532566, + 2.304300239861057 + ] + }, + "K2V1_pb0": { + "n": 161, + "mean_delta_pct": 2.2317, + "paired_sigma_pct": 1.0872, + "sem_pct": 0.0857, + "z": 26.05, + "significant_at_2sigma": true, + "n_negative_segments": 3, + "per_segment_delta_pct": [ + 4.992251042399835, + 1.8453647068871044, + 2.8921562346991667, + 1.8767062261261687, + 1.7247515028412088, + 1.1762194963965595, + 2.557044299307692, + 1.5420335690610305, + 0.21575533328626673, + 2.7356837956632436, + 2.489643973320935, + 2.5346012328254353, + 1.4976329791997416, + 2.2720224189421434, + 3.7593417103232305, + 4.398057730908155, + 2.66355854827075, + 3.112207959458952, + 2.4576356775936437, + 2.3277889318016456, + 3.4670035694396693, + 2.9668696328626813, + 1.7062424824733597, + 1.981461215380191, + 1.3013497914954113, + 0.5233920447748128, + 2.1513562630078424, + 1.6522988698221943, + 1.9202663454522872, + 4.033252673103128, + 2.0680056282855985, + 2.1886314170038976, + 2.4569643462642174, + 2.7351551087764068, + 3.1367461555639853, + 4.08906083212968, + 1.5836968975140984, + -0.09371981834897784, + 3.1792088952392965, + 3.3535298025768476, + 2.9246636481706862, + 1.1951997486697676, + 2.040741957063557, + 1.1168646725175557, + -0.7925629438139553, + 2.0167514331729945, + 2.514497153126181, + 2.90065926605342, + 1.7717269431189289, + 1.8436552806024824, + 2.613506566393418, + 4.22092111486041, + 2.258492451646236, + 1.6803957470993691, + 2.317484222926526, + 3.5787394100744296, + 3.1382396782550077, + 2.8251694196504373, + 1.218783573006503, + 3.585969611735073, + 1.8449289680506475, + 1.5946602526874014, + 3.45137017344657, + 1.9702303550491154, + 3.6962671471359823, + 3.4221418602039737, + 2.295610821036142, + 2.606485457181546, + 2.291512756711747, + 2.36250071872603, + 1.3189333058293304, + 2.8354544937000066, + 0.936892034019379, + 2.5669496507139486, + 1.8812784321661393, + 1.3462194069782958, + 0.9888153132332466, + 1.7195548207456408, + 2.7978142228248637, + 3.925982761265342, + 3.227041296956112, + 4.872333539602966, + 2.5385617779862977, + 1.283287507585283, + 0.32863570288775257, + 1.5636364436425327, + -0.10111760506390197, + 1.2885906965342, + 2.324219209286184, + 0.8026720745307431, + 3.466845691441876, + 1.5760776522533169, + 2.382899690615993, + 4.479506788106499, + 2.6771636145646323, + 0.5351553740459907, + 2.788461664246969, + 1.2118397256637707, + 1.6010254380801483, + 2.0372768872536153, + 1.6430535714590777, + 3.355490310418505, + 3.8502347485522797, + 3.0081960815544395, + 3.9127749473059246, + 3.1278749531277015, + 1.377580209649153, + 1.8902923753257364, + 1.7994639269974635, + 1.8939567087047282, + 2.012572976173515, + 1.8445237021347658, + 3.0524835759753546, + 1.6212179342271107, + 2.202100158196944, + 1.0095872675556936, + 1.912277303296042, + 1.457871787380308, + 1.6516906006670964, + 0.660742672144475, + 2.1725507504284547, + 2.3818980245828083, + 0.10970200789439037, + 2.0374539540474323, + 3.5348917528606294, + 3.293112449846531, + 2.7014538615983987, + 1.971014430915624, + 1.7081504418221487, + 2.26776925124175, + 2.1652663688164915, + 1.700594889142448, + 1.731094401680971, + 2.4937990397960794, + 0.9667365759769544, + 1.5322180851213023, + 2.0740706403512235, + 2.5897220843953694, + 3.769069790877732, + 0.9478127656849429, + 1.854786872525686, + 4.177920572984798, + 4.6711588435378015, + 0.30189065676133164, + 2.804937623540505, + 0.5194672309421965, + 3.8233834225358265, + 0.6198943988855958, + 1.8502676590800478, + 0.6789168398851687, + 2.2137143595026414, + 4.366363644585696, + 2.115736873649386, + 3.0198988049706808, + 2.693738489359457, + 2.869996856731538, + 0.9747783813569495, + 3.6433549543365955, + 0.5893819306819523, + 0.11365275678532054, + 2.791340230234339 + ] + }, + "K1V2_pb0": { + "n": 161, + "mean_delta_pct": 31.6683, + "paired_sigma_pct": 7.9055, + "sem_pct": 0.623, + "z": 50.83, + "significant_at_2sigma": true, + "n_negative_segments": 0, + "per_segment_delta_pct": [ + 42.13245702783802, + 24.434809377538887, + 21.86201572150557, + 35.368435464090894, + 34.49194071007711, + 27.803275355226674, + 42.63676131098447, + 38.49091284667458, + 44.135790128662215, + 25.720539890713273, + 19.110921999848916, + 24.697016156173866, + 28.820610037051583, + 25.326281598709254, + 34.74649520906078, + 32.7315942326982, + 33.68691374937629, + 33.31009224172866, + 31.48283365041243, + 31.307115479115918, + 31.054968708905236, + 34.53027775392523, + 24.117297934128707, + 41.50435096858827, + 31.423665970778703, + 39.993148754639904, + 29.53149982265387, + 50.02790699066708, + 35.10478708165518, + 36.811224082835665, + 42.029178232625945, + 44.45712602165742, + 23.317597056874842, + 32.26345215388081, + 36.480898632521935, + 30.609526596799082, + 32.69802576448475, + 21.658257440771358, + 28.73311453530792, + 26.737061280292764, + 22.67042694892315, + 21.343858883778182, + 29.779849581989897, + 22.813203692486987, + 29.506288415946702, + 21.329758800333167, + 23.253275235647056, + 31.912162568518927, + 29.20954994703125, + 44.15684493347343, + 22.827996170694654, + 28.387928671335825, + 46.64622842708075, + 30.199087282601138, + 26.97732932907446, + 32.66490636937759, + 13.75727835938714, + 25.829624311052704, + 23.70542158263566, + 29.233296152623325, + 38.48370949577222, + 29.534181681264048, + 30.02623821699391, + 28.61654117102309, + 39.744494420633124, + 37.71619546389175, + 37.396374413966264, + 42.21523242364775, + 32.183690826248245, + 28.912237881659937, + 23.921109819016273, + 32.61128302839073, + 15.59773070368337, + 26.62571264429175, + 21.154077291786933, + 22.725082898952632, + 23.972560013219848, + 38.201452046553385, + 52.973092194250505, + 42.19623101873445, + 43.195085790265566, + 31.388914820863214, + 29.579696481782246, + 23.229178872617343, + 30.06979914502542, + 17.677243462576758, + 24.210820250253214, + 24.059911796672154, + 39.865198868331134, + 23.951731125643, + 32.045798572135055, + 22.45697235095634, + 28.341104528908645, + 36.43466606007831, + 15.399643807970266, + 26.440860905034974, + 26.127672052464685, + 30.138998227371435, + 32.826969588827936, + 29.30564806313125, + 19.126115033815253, + 38.1048599656072, + 39.796549732806966, + 27.243880984565923, + 31.95127627217295, + 49.145160245067984, + 32.53956582512649, + 20.011708447982915, + 36.76298098734886, + 29.652661268719836, + 24.957341584929022, + 37.59355821268484, + 27.503396696247925, + 29.020015245680913, + 40.233224561607265, + 41.62071959334136, + 38.373936374661106, + 43.684125718072366, + 24.43546834837781, + 23.70678361941944, + 20.94011135686543, + 33.134859743630074, + 35.30324837108442, + 29.645372638767817, + 38.0753855222264, + 37.63411768689174, + 42.38427194260348, + 29.99572060493202, + 40.08353604830954, + 36.27659593526212, + 30.935971570649897, + 22.174013674687586, + 29.591541014019576, + 35.01172667322148, + 33.19456269283031, + 23.91219331430556, + 17.4773470244085, + 27.710487013347738, + 42.50474093864584, + 32.55944625224469, + 24.847000010733765, + 58.77897489431277, + 28.55713248503285, + 32.06276003322892, + 37.78229361624816, + 25.105190578972003, + 28.40885373797619, + 32.95809531862335, + 36.10546561927939, + 35.735958900866535, + 26.532836584213698, + 37.38822090628994, + 34.15493026042598, + 23.62180349226825, + 33.67425896616718, + 38.43589761190304, + 40.70345577017372, + 36.32678433288366, + 33.867971412904204, + 54.003994130710204, + 32.40076754712751 + ] + }, + "K1V1_pb0": { + "n": 161, + "mean_delta_pct": 42.4878, + "paired_sigma_pct": 12.2291, + "sem_pct": 0.9638, + "z": 44.08, + "significant_at_2sigma": true, + "n_negative_segments": 0, + "per_segment_delta_pct": [ + 64.71645847709745, + 35.092260802866996, + 28.794592900696603, + 55.3012769449466, + 45.59227350898055, + 43.1746126722634, + 58.840553817564874, + 47.71050881179737, + 56.546169507743265, + 36.294353672423554, + 21.33150488552376, + 30.308509732630466, + 36.43913185787504, + 34.03205368135209, + 42.066760635663215, + 44.701025637923415, + 45.192502288431406, + 45.08857352886494, + 34.64701823870669, + 33.78393220170052, + 44.063036295824915, + 40.36994817670738, + 28.381222000657452, + 47.22588974611413, + 42.629617566038725, + 53.027941622134534, + 35.804109254807535, + 67.18457449064839, + 39.54473332121513, + 52.62800902309726, + 57.463133147710465, + 54.25528959720868, + 32.26210275013714, + 45.08227063074733, + 41.271296075315604, + 45.00973322362599, + 100.37576814726309, + 24.182754300860346, + 41.57421149353446, + 34.98309410490219, + 28.764121651179398, + 28.467429014521226, + 39.14524307506185, + 33.15789509751179, + 49.5549779187274, + 28.03592624819808, + 30.57127550483385, + 47.31154552070065, + 46.892347402592904, + 57.3551101098979, + 34.81770236013659, + 34.56389504173419, + 61.30896684949128, + 43.710914743651095, + 37.89003172415896, + 43.8474352956123, + 17.58776446834597, + 34.66211398786834, + 29.160277544123083, + 42.30211555633973, + 47.36230376290042, + 36.70980561486298, + 45.24154829620653, + 38.195628196296234, + 48.89619327541698, + 53.13365842875594, + 45.12028751156236, + 58.13621084938439, + 45.436294584199814, + 39.97799145813469, + 30.119052566737537, + 43.12283462015691, + 22.41974267034398, + 36.5646298928409, + 30.85703818466287, + 33.87834133387195, + 33.94549890145245, + 47.18064756922397, + 62.07776861580107, + 45.49952019219856, + 61.99823156817534, + 46.64051157781457, + 33.123898187080854, + 39.98968492270561, + 37.34147757773834, + 22.224987167782512, + 33.59215997202267, + 30.696533858766113, + 66.00771240206076, + 29.350180837595012, + 39.73131639551773, + 26.57616445835076, + 43.450600975054456, + 42.6053938022453, + 25.063821784250926, + 32.27325849843588, + 36.38153673252033, + 33.59893900476753, + 41.09426511375315, + 33.21601208795952, + 30.357993954026064, + 48.413873953745295, + 59.51690867428807, + 34.593972946312, + 42.60171174077583, + 70.41028912705731, + 37.587963746893664, + 29.60508891919973, + 45.64643305966039, + 39.06249666479069, + 31.798018001842873, + 47.94619653405477, + 36.56279533440746, + 37.96548407432782, + 56.42282044017151, + 48.1729724424888, + 52.92419656403044, + 70.36406694790499, + 33.60469905466055, + 27.096573174666204, + 25.516931758415996, + 43.24784358162087, + 41.8324314962728, + 39.03243514540072, + 48.57554896236685, + 52.56301407137644, + 52.58526337099239, + 38.503920018600546, + 50.71993869745151, + 59.67561983301352, + 47.272637347862535, + 29.84648186019928, + 36.52804961130047, + 50.07078033500559, + 45.13359164802745, + 31.27728898389649, + 20.89411176092622, + 34.14874311897102, + 54.663863650396, + 45.24599599388795, + 35.471979771343925, + 93.18474362935407, + 33.73968958797282, + 58.72863191418807, + 53.99019936838353, + 32.700165729246145, + 37.66598981947318, + 45.26758240742124, + 44.08356316841237, + 44.482632636064345, + 35.441648925063205, + 47.717803177606825, + 48.00936411950257, + 31.640168045388258, + 45.70240441808845, + 50.045000987685114, + 54.378176522702475, + 43.604183879356945, + 36.258665211409195, + 61.828888575624994, + 43.689710114923855 + ] + } + }, + "bpe_results": { + "FP16": { + "bpe_nominal_raw": 16.0, + "bpe_honest_raw": 16.0, + "bpe_zstd_l22": 16.0, + "bpe_shannon": 16.0 + }, + "K3V2_pb0": { + "n_codewords": 67174400, + "bpe_nominal_raw": 2.5, + "bpe_honest_raw": 2.625, + "bpe_zstd_l22": 1.6346, + "bpe_shannon": 1.8265, + "zstd_bytes": 13725816, + "raw_int8_bytes": 67174400 + }, + "K2V2_pb0": { + "n_codewords": 67174400, + "bpe_nominal_raw": 2.0, + "bpe_honest_raw": 2.125, + "bpe_zstd_l22": 1.0947, + "bpe_shannon": 1.1396, + "zstd_bytes": 9192271, + "raw_int8_bytes": 67174400 + }, + "K2V1_pb0": { + "n_codewords": 67174400, + "bpe_nominal_raw": 1.5, + "bpe_honest_raw": 1.625, + "bpe_zstd_l22": 0.6567, + "bpe_shannon": 0.7995, + "zstd_bytes": 5514007, + "raw_int8_bytes": 67174400 + }, + "K1V2_pb0": { + "n_codewords": 67174400, + "bpe_nominal_raw": 1.5, + "bpe_honest_raw": 1.625, + "bpe_zstd_l22": 0.7387, + "bpe_shannon": 0.7796, + "zstd_bytes": 6202432, + "raw_int8_bytes": 67174400 + }, + "K1V1_pb0": { + "n_codewords": 67174400, + "bpe_nominal_raw": 1.0, + "bpe_honest_raw": 1.125, + "bpe_zstd_l22": 0.297, + "bpe_shannon": 0.3013, + "zstd_bytes": 2494135, + "raw_int8_bytes": 67174400 + } + }, + "frontier": [ + { + "label": "FP16", + "kb": 0, + "vb": 0, + "bpe_honest_raw": 16.0, + "bpe_zstd_l22": 16.0, + "bpe_shannon": 16.0, + "mean_ppl_delta_pct": 0.0, + "sem_pct": 0.0 + }, + { + "label": "K3V2_pb0", + "kb": 3, + "vb": 2, + "bpe_honest_raw": 2.625, + "bpe_zstd_l22": 1.6346, + "bpe_shannon": 1.8265, + "mean_ppl_delta_pct": 0.4311, + "sem_pct": 0.0343 + }, + { + "label": "K2V2_pb0", + "kb": 2, + "vb": 2, + "bpe_honest_raw": 2.125, + "bpe_zstd_l22": 1.0947, + "bpe_shannon": 1.1396, + "mean_ppl_delta_pct": 1.2833, + "sem_pct": 0.061 + }, + { + "label": "K2V1_pb0", + "kb": 2, + "vb": 1, + "bpe_honest_raw": 1.625, + "bpe_zstd_l22": 0.6567, + "bpe_shannon": 0.7995, + "mean_ppl_delta_pct": 2.2317, + "sem_pct": 0.0857 + }, + { + "label": "K1V2_pb0", + "kb": 1, + "vb": 2, + "bpe_honest_raw": 1.625, + "bpe_zstd_l22": 0.7387, + "bpe_shannon": 0.7796, + "mean_ppl_delta_pct": 31.6683, + "sem_pct": 0.623 + }, + { + "label": "K1V1_pb0", + "kb": 1, + "vb": 1, + "bpe_honest_raw": 1.125, + "bpe_zstd_l22": 0.297, + "bpe_shannon": 0.3013, + "mean_ppl_delta_pct": 42.4878, + "sem_pct": 0.9638 + } + ], + "base_fp16_ppl": 5.0513, + "n_segments": 161, + "model_config": { + "n_kv_heads": 8, + "head_dim": 128, + "rope_theta": 1000000.0, + "n_layers": 32 + }, + "gpu": { + "name": "Tesla T4", + "sm": "75", + "n_gpus": 2, + "total_gb": 14.6 + }, + "transformers": "5.12.1", + "errors": {}, + "device_map": { + "model.embed_tokens": "0", + "model.layers.0": "0", + "model.layers.1": "0", + "model.layers.2": "0", + "model.layers.3": "0", + "model.layers.4": "0", + "model.layers.5": "0", + "model.layers.6": "0", + "model.layers.7": "0", + "model.layers.8": "0", + "model.layers.9": "0", + "model.layers.10": "0", + "model.layers.11": "0", + "model.layers.12": "0", + "model.layers.13": "0", + "model.layers.14": "0", + "model.layers.15": "0", + "model.layers.16": "1", + "model.layers.17": "1", + "model.layers.18": "1", + "model.layers.19": "1", + "model.layers.20": "1", + "model.layers.21": "1", + "model.layers.22": "1", + "model.layers.23": "1", + "model.layers.24": "1", + "model.layers.25": "1", + "model.layers.26": "1", + "model.layers.27": "1", + "model.layers.28": "1", + "model.layers.29": "1", + "model.layers.30": "1", + "model.layers.31": "1", + "model.norm": "1", + "model.rotary_emb": "1", + "lm_head": "1" + }, + "run_complete_utc": "2026-06-16T14:09:14Z" +} \ No newline at end of file diff --git a/experiments/kaggle/results/nq_qwen3_subbpe_entropy.json b/experiments/kaggle/results/nq_qwen3_subbpe_entropy.json new file mode 100644 index 0000000..d1d6195 --- /dev/null +++ b/experiments/kaggle/results/nq_qwen3_subbpe_entropy.json @@ -0,0 +1,1031 @@ +{ + "model_name": "Qwen3-8B", + "model_id": "Qwen/Qwen3-8B", + "weights": "NF4 (bitsandbytes load_in_4bit, nf4, double-quant, compute fp16)", + "baseline_note": "NF4-WEIGHTS baseline. KV-quant deltas are paired on identical tokens so NF4 weight floor cancels in the per-segment delta. Absolute base PPL is NF4-weight PPL, not strict FP16-weights.", + "protocol": "AQUA-iso: non-overlapping (prefix=1024 + cont=512) windows on wikitext-2-raw test", + "prefix_len": 1024, + "cont_len": 512, + "seg_len": 1536, + "n_segs_target": 161, + "n_segs_min": 60, + "rope_theta": 1000000.0, + "bpe_note": "bpe_raw=8.0 (int8 floor), bpe_zstd=zstd-L22 over real E8 codeword stream, bpe_shannon=order-0 Shannon (rANS lower bound), bpe_rans=live rANS bytes. Measured on the codewords from the FIRST segment's prefix KV (calibration-free).", + "cross_arch_note": "Qwen3-8B K3V2 pb=0 iso-reference = +0.378% (nq-mm-qwen3-iso-v3, NF4-weights, AQUA-iso n=6 seq_len=8192). This kernel extends to n>=60 paired segments + adds K3V2 pb=1, K2V2 pb=0, K2V2 pb=1. Confirms Qwen3 is NOT the Qwen2.5 catastrophe (+539x). K2V2 boundary protection expected: pb=1 ~+3.48% << pb=0 ~+8.37%.", + "sub_bpe_frontier_configs": [ + { + "label": "FP16_baseline", + "bpw": 16.0, + "bpe_raw": 16.0, + "note": "fp16: 16 bpe, included as frontier anchor" + }, + { + "label": "K3V2_pb0", + "bpw": 2.625, + "note": "sub-1.5bpe frontier target" + }, + { + "label": "K3V2_pb1", + "bpw": 2.625, + "note": "K3V2 with 1-layer boundary protection" + }, + { + "label": "K2V2_pb0", + "bpw": 2.125, + "note": "sub-1.25bpe target" + }, + { + "label": "K2V2_pb1", + "bpw": 2.125, + "note": "K2V2 with 1-layer boundary protection" + } + ], + "transformers": "5.12.1", + "gpu": { + "name": "Tesla T4", + "sm": "75", + "n_gpus": 2, + "total_memory_gb": 14.6 + }, + "run_at_utc": "2026-06-16T12:35:13Z", + "errors": {}, + "base_nf4w_ppl": 9.16228209578557, + "n_segments": 161, + "ppl_results": { + "K3V2_pb0": { + "n": 161, + "mean_delta_pct": 1.1658640950519377, + "paired_sigma_pct": 1.7387700840770108, + "sem_pct": 0.13703427973183283, + "z": 8.50782809479101, + "significant_at_2sigma": true, + "n_negative_segments": 42, + "per_segment_delta_pct": [ + 2.8357511690727066, + 1.3174789060099366, + 1.112745093670428, + -0.35266392564596044, + -0.2654247715132073, + 0.8405184815227554, + -0.508299541164353, + 1.1855120400736483, + 3.4788817671673757, + -1.253062131645703, + 0.46352875134428456, + 0.03331808364038324, + -0.16033645204992053, + -0.30198960966072474, + 3.6393619189070914, + 3.341890159801166, + 3.9219370415924795, + 0.8612133156559371, + -0.3946933985490677, + -0.0036309883833101665, + 0.8239343605689182, + 1.8495514021983013, + 1.299352299964945, + 1.6218795709315885, + -0.2979824241505431, + -0.1914978541339822, + 1.888104046847156, + 1.5349663459068674, + 3.07363373912027, + 0.5725984870983464, + -0.008856274806761492, + 0.1715829644919943, + 1.8939031108941546, + 1.0324034847410741, + 1.4776493107310158, + 2.3302210295900587, + 2.069930450045895, + 0.9504376098493413, + 3.40640466666375, + 0.23299890843277535, + 1.1154443764622213, + 2.078342928531392, + 2.116482096083378, + 0.7218461704189715, + 1.2791845154100119, + 0.6032233942923226, + 0.405059611481961, + -0.14460003387945228, + 1.5176556756934347, + -0.9019277235038258, + 0.9791753899499163, + -0.9460622276246331, + -0.9264381434470028, + 2.9853497265966626, + -0.8466001841883459, + 3.0714177144857286, + 1.3688875269320147, + -0.9689471511916492, + 0.12477582951246934, + -1.483577369767597, + 3.3723335206041383, + 5.105762984144336, + 0.5120789524227836, + 5.204509699612876, + 0.6223371478827303, + 1.1733362705632855, + 1.1929451158666209, + 2.2970054349854334, + 2.232450417209761, + 4.0456287119969865, + 3.2245400413678618, + 0.7490567684766469, + -0.08806801340382284, + 3.0982409695078372, + 0.4916237956865787, + 0.9478211054331597, + 0.5290020186808428, + 0.025719293419835497, + 0.3702150503289205, + -1.5642192775048656, + 1.0507961361662925, + 2.762896004068712, + -1.755620164805923, + 5.804071971278445, + 5.285213701202877, + 2.8515789899950543, + 3.899143611012533, + -1.4171770439825986, + 1.6121211043391177, + 0.9537414957146656, + -0.9100176608522991, + -1.0582890684618016, + -1.0877406823462485, + -0.7250506817801212, + 2.237603549923082, + 1.9986107472724224, + 2.358850328208557, + -0.11036371576227734, + -0.6896492690629921, + 1.8244090938817237, + 0.8524012300651589, + 0.5800405992453939, + -0.4261444025162834, + 0.35382814008670355, + 0.12047660339054966, + 1.471395195520292, + -0.4139351222753589, + 2.6082340253583376, + -1.9188236904922933, + 1.7213504083043734, + 0.3376176815583863, + 1.4348963068490503, + 1.7873880998348928, + 1.3603034878317959, + -0.18748026667073503, + -1.9128781131102437, + 6.058061737593763, + 2.5320068174039325, + 0.9327021731328311, + -0.6690074458108002, + -1.4932611802904914, + 0.2708079041372887, + 0.48753086340662943, + -0.277044791517532, + -1.5332335915385673, + 1.0689506969150082, + 2.5487616540306335, + 0.7526789853724182, + -1.636729980073967, + 2.0824884627802818, + 2.49224341378034, + 2.07299056126425, + 0.8474082574084953, + 2.416940327922228, + 6.886259601104846, + 2.605268166613569, + 0.4010181734957818, + 2.521132336059571, + 2.83100391190274, + 1.073960674443165, + 1.9085817552020405, + 2.413025860919614, + 1.324982188639602, + 2.5169934784205696, + 2.383953393650591, + 1.064737679129469, + 1.6361669765476181, + 1.765719221145739, + -5.255826366839, + -0.056325194423246086, + 2.284421032740837, + 1.6643228066348843, + 3.6073301412825667, + -0.2437832684804708, + 4.780513400510841, + 1.2240370021989218, + 1.4103226974510348, + -0.034757719800339246, + 0.490339485205234, + 0.6414004390022094, + 3.036964661336575 + ] + }, + "K3V2_pb1": { + "n": 161, + "mean_delta_pct": 0.9445572921815545, + "paired_sigma_pct": 1.5768961608926055, + "sem_pct": 0.12427682739579496, + "z": 7.60042971786963, + "significant_at_2sigma": true, + "n_negative_segments": 46, + "per_segment_delta_pct": [ + 2.230747009845046, + 1.1184939884550247, + 0.9945063551154996, + 0.12037495493741915, + -0.23280882139804837, + 1.205915025452985, + -0.02562312136208674, + 0.6730353190966823, + 2.9753742322119296, + -1.251057031224295, + -0.04824912912703991, + 0.10652781270641924, + -0.8139314304103602, + -1.0225749820505654, + 4.224402262489402, + 2.2375058577880433, + 3.257988970532035, + 1.8336892522321524, + 0.7875600161949425, + 0.656391577926599, + 1.5774764092569347, + 1.8542594328708464, + 1.2973438102521986, + 0.3853456486694934, + -0.5193565653076547, + -1.4301993186748194, + 1.7935826608362837, + 1.9856184300504496, + 1.4725344823686346, + -0.16548796518623857, + -0.5703657543616417, + 0.8975660357167159, + 2.1804451844020853, + 0.3685966189793506, + 0.8728756050224117, + 2.343899016466077, + 1.2731603653322783, + -0.07825203131648281, + 3.2178145700713134, + 1.433727153816143, + 1.7811817587909407, + 1.632324838684882, + 2.192076921283284, + 2.9333918080961734, + 0.990477378791047, + 1.1377720798956803, + 0.8613337869590829, + -0.2224052178880701, + 1.189838869815359, + -0.98562396574326, + 0.5091016519595667, + -0.9045849886399132, + -1.3887422395274032, + 2.8772329713713165, + -0.17499133238471853, + 0.6323250927935029, + 0.9094615330170099, + -0.47276734002894594, + 0.6546002177231433, + -1.1533357393629462, + 1.6563876340806512, + 4.0271755439074886, + 0.34169851522670475, + 5.347694926410577, + 0.3564052749185805, + 1.5192441325881823, + 0.18101163269448062, + 1.6040019005160209, + 1.5411698017393287, + 3.6613678426476985, + 2.871365656078666, + -0.1589829432780434, + -0.4362889199246018, + 3.090698724728766, + 0.25318588064059877, + 1.5429830045898376, + 1.1097137702627753, + -0.8611306490823056, + 0.6780046269676161, + -1.1239568017869004, + 1.0232928798019167, + 2.209716236140303, + -1.0619820785424974, + 5.535588063670281, + 4.496003114654534, + 0.9794527982257263, + 2.1953359979183946, + 0.46511271402091003, + 0.9290252632184981, + 0.701630983138971, + 0.3256997211142598, + -0.3931765049143969, + -0.7167381171634172, + -1.6926675492915653, + 2.630076170730228, + 1.5852416183264915, + 1.436713669923418, + 1.9853321669655346, + -0.618416844559099, + 1.9587801921001597, + 0.37677895611904766, + -0.624351863475068, + -0.24290154426217944, + -0.5155066501411204, + -0.22842942979730757, + 1.8136096599150624, + -0.7832843289139851, + 2.321164494756861, + -0.8019178048414846, + -0.11128836983049742, + 0.4567131968761394, + 0.522560231939945, + 4.856251791878161, + 0.8516182385168058, + -0.918521147785926, + -1.1419418100466041, + 6.566866326465908, + 2.152896798409856, + 0.9778969094850434, + -0.22518200170171226, + -2.152325346344119, + 0.16678521388093442, + 1.000875843184946, + 0.32809638173989025, + -1.2469169597737801, + -2.5781184650887723, + 2.236431250463528, + -0.10133148330937532, + -1.1393027178623805, + 1.9204016696407669, + 2.9477590852036726, + 0.6801752062221916, + 1.0034816586300397, + 2.167997153214327, + 3.843623783666882, + 2.324024366529906, + 0.013438192625282246, + 1.4427354322516615, + 0.8216462791511074, + 0.8032230949837996, + 1.527514210473874, + 1.6293629105396665, + 1.253868561257235, + 1.78287159672502, + 1.8962155264804939, + 0.2988969731147808, + 0.4710287954429885, + 0.5223080921121898, + -4.390624471083666, + -0.17946845222399085, + 2.134443840039712, + 1.0685040362770892, + 3.3995376678892266, + -0.7972849431483772, + 2.299241466485658, + 1.2957346273498525, + 1.0542843591436695, + 0.9347516564690391, + 0.3694118255824025, + -0.29346893765513477, + 2.6175733636944023 + ] + }, + "K2V2_pb0": { + "n": 161, + "mean_delta_pct": 10.516246636600394, + "paired_sigma_pct": 5.454642883227846, + "sem_pct": 0.4298860818589917, + "z": 24.462868374626424, + "significant_at_2sigma": true, + "n_negative_segments": 1, + "per_segment_delta_pct": [ + 4.7211140730573655, + 8.710499023389273, + 9.094469770785519, + 4.045268315762152, + 13.47567614419486, + 4.53796109047184, + 10.554040751227657, + 5.977059628917554, + 16.794192194435286, + 3.5858491490181934, + 5.873348200696593, + 6.961878383654888, + 5.590700838234662, + 10.742185043075873, + 6.897788616295629, + 11.85980876957418, + 17.57984054765425, + 6.755760804551969, + 13.222617375503082, + 13.785009047124317, + 11.902772066336647, + 10.084866902979698, + 8.797024272609221, + 9.891473196104856, + 13.237588991652132, + 6.557027556632781, + 1.6258002137955831, + 3.0016188663974566, + 9.717039699863456, + 4.832733792761292, + 6.892375058955358, + 8.959413868193643, + 8.852371042193601, + 20.31363181394084, + 16.90215382781083, + 18.116603357484845, + 26.120559646333767, + 4.629538121780897, + 15.128044617437132, + 10.725109874209696, + 8.66493241210635, + 9.079368438207718, + 11.973821298727096, + 17.475188836933352, + 20.088325681758345, + 3.562956255634043, + 2.625528404850767, + 6.1957492009863255, + 6.47497350367787, + -3.3459133404039076, + 4.032468845214907, + 3.834501520510585, + 4.768261682772925, + 15.07074392106224, + 8.25761160035471, + 4.4777196512002435, + 17.73712653901264, + 17.819676125684726, + 8.542153515522779, + 12.192787483048361, + 25.108294767695384, + 19.798417899798277, + 12.73453728971491, + 16.060556302837167, + 4.822360250123552, + 10.669227282790612, + 12.36570788430367, + 8.431933016090062, + 5.986512541155248, + 8.019614948102637, + 7.521390119019833, + 8.811826580857272, + 9.02903707679464, + 5.261134301591265, + 17.65491584995792, + 5.689391886458959, + 2.932992233414353, + 17.327950024707707, + 8.516616347939912, + 13.693362609321918, + 12.009242421464988, + 14.650044468879496, + 14.909237602181912, + 11.897024853131732, + 8.69006308642958, + 5.381548634834032, + 8.764288304072048, + 4.14173747750074, + 7.204574153462004, + 10.028630619650873, + 9.517502903744164, + 1.4437956386836002, + 5.415589497605656, + 3.57689180241855, + 18.35452267952444, + 16.003338783845123, + 3.9983280586831995, + 15.76553436486396, + 10.272844959511307, + 8.290764186632414, + 6.071306799749158, + 8.694178528430559, + 18.254428641983957, + 11.43527459479286, + 14.731573859636013, + 15.85623570471799, + 19.042555467862506, + 14.709026415460091, + 6.555710274130269, + 2.5711792034341587, + 8.558172404421764, + 11.611593291701302, + 15.345608117282335, + 4.939471204011235, + 8.14981789963251, + 4.154413546465945, + 18.805615730745757, + 11.208993092833635, + 10.205797230531223, + 20.064194900348312, + 4.8367595296559776, + 11.623185351341355, + 9.595559186046078, + 7.89831911123328, + 21.262071867775713, + 12.155151524266094, + 14.120038228000622, + 13.829935424017082, + 0.8429876652640199, + 11.32505482650629, + 11.65746196048406, + 14.865751132481263, + 9.27152872859025, + 12.904948185960116, + 35.62979963039754, + 9.172902617489834, + 18.86788554258007, + 8.008926053128393, + 14.608969538570461, + 9.011548981251053, + 12.27154361111328, + 11.236286547041644, + 7.351520108579834, + 7.37181979514189, + 10.175710116123847, + 17.924860083564123, + 11.131300892579455, + 6.918832594013891, + 6.064421187985778, + 13.562474147227435, + 13.259529298356593, + 13.563627109606779, + 9.019843431859387, + 6.833660687499152, + 11.098001896009139, + 14.1421222464654, + 14.303833726226728, + 6.247659672451627, + 10.357489684748996, + 10.726776273583026, + 9.768383547518383 + ] + }, + "K2V2_pb1": { + "n": 161, + "mean_delta_pct": 6.623357020409349, + "paired_sigma_pct": 3.965647245399263, + "sem_pct": 0.31253678615725833, + "z": 21.192247804956605, + "significant_at_2sigma": true, + "n_negative_segments": 6, + "per_segment_delta_pct": [ + 1.5639311803405849, + 3.2220433670590074, + 6.527618488392175, + 3.8647426340827375, + 9.76797055121334, + 0.8176603244945281, + 4.977619831006142, + 4.22656526933245, + 14.884548880266944, + 1.861589552021992, + 3.535004221154566, + 3.2295027461524835, + 2.725245457992173, + 3.9122804066853214, + 7.797610467096028, + 9.111645353948395, + 11.111060177585761, + 7.426775154714653, + 4.296007627428122, + 16.3247583433487, + 7.951712977347904, + 5.3389249404449215, + 6.954251501628831, + 6.08132245707645, + 10.530670620562814, + 4.919249155231151, + 1.0586538602239113, + 0.8794603339756292, + 8.97199951308952, + 2.82640425598423, + 6.47592097245308, + 7.77402490402317, + 6.876965796496183, + 11.564857310163074, + 11.236545631140917, + 13.943479001728857, + 12.910342577957254, + 2.4024942409670027, + 12.929656614350387, + 5.244420275890184, + 5.748715203718763, + 6.227388542075015, + 6.096255841300995, + 12.465592928989155, + 6.655800104157344, + 2.636469125776481, + 3.4380901617654973, + -1.5462405357685696, + 4.495566271055497, + -4.902817350552214, + 4.027877574246188, + 4.724422533535015, + 4.3085654262509925, + 9.052767034660194, + 7.718267895030261, + -2.7506020457199267, + 9.56084900690677, + 6.006351524024087, + 7.566779136011066, + 2.216711769418099, + 16.330677392791383, + 12.319588516684965, + 5.972854034394183, + 13.0740499872613, + 2.5814096040727676, + 5.599756660701434, + 9.521518860536156, + 6.652411519219399, + 4.054800594923759, + 6.41076876155413, + 8.454839967492413, + 5.76382283790697, + 7.765213827729127, + 3.5196029064798986, + 9.339052835201048, + 5.427438397553917, + 2.638001532746901, + 8.975683074002694, + 6.294682010358396, + 9.872429425781297, + 8.137688866257376, + 10.898443833475712, + 6.542773770713394, + 8.1704494227419, + 4.082843234587858, + 5.618211248530477, + 4.744181659625151, + 2.0320219406321156, + 0.46526871169599293, + 6.448276155379714, + 5.209247149681009, + 0.4857725140753314, + 4.187958458192536, + 4.085578293559819, + 19.225513490397855, + 12.62380592254392, + 2.5390489042493045, + 12.762925723273305, + 9.191864267957719, + 2.7133690092383773, + 4.3378229877296, + 7.876876187358124, + 6.108662856161578, + 6.15246652900755, + 10.293387472377809, + 11.042667213630109, + 8.508450907532067, + 8.671500095604507, + 6.515754081472593, + 1.016451415994202, + 7.5010300542290045, + 9.791408910560715, + 10.979452801989238, + 3.9116660367551033, + 7.39571499405565, + 2.8615732071337803, + 11.858445973859263, + 6.871904332123628, + 12.34610114945254, + 10.798441942419192, + 4.259090477079247, + 7.510789049115356, + 12.107993772457768, + 3.751340477899184, + 3.4203747352589082, + 10.125754175768092, + 8.132470829538583, + 9.428326451129571, + -0.45284141123039184, + 8.645037090247033, + 6.661571137585751, + 10.0890088592184, + 5.661616835766597, + 10.23622173318376, + 15.893646309453514, + 7.113058984744406, + 7.639613700736132, + 7.009619432065361, + 7.404771676216721, + 5.4538986121117246, + 8.97445954526016, + -0.5594212967690156, + 0.7460122815837149, + 4.056590324886271, + 4.398731939665698, + 4.481475924196601, + -1.4811886285860154, + 3.926281578749345, + 3.124775584845114, + 13.65542690000538, + 9.131639157118483, + 9.223182978783248, + 7.0888429461728, + 3.6173297259236055, + 8.95785215221935, + 9.000455977285567, + 8.74497227349111, + 5.888909983218558, + 1.03784007152493, + 8.749317107061565, + 6.129659235343397 + ] + } + }, + "entropy_bpe": { + "K3V2_pb0": { + "n_codewords": 75497472, + "bpe_raw": 8.0, + "bpe_zstd": 1.694186316596137, + "bpe_shannon": 1.931600825155257, + "bpe_rans": 1.9316425323486328, + "rans_err": null, + "label": "K3V2_pb0", + "kb": 3, + "vb": 2, + "pb": 0, + "bpw_raw": 2.625, + "from_seg_idx": 0 + }, + "K3V2_pb1": { + "n_codewords": 71303168, + "bpe_raw": 8.0, + "bpe_zstd": 1.7603089949663948, + "bpe_shannon": 1.88746658690758, + "bpe_rans": 1.8874991921817554, + "rans_err": null, + "label": "K3V2_pb1", + "kb": 3, + "vb": 2, + "pb": 1, + "bpw_raw": 2.625, + "from_seg_idx": 0 + }, + "K2V2_pb0": { + "n_codewords": 75497472, + "bpe_raw": 8.0, + "bpe_zstd": 1.138491736518012, + "bpe_shannon": 1.2067205626565842, + "bpe_rans": 1.2067345513237848, + "rans_err": null, + "label": "K2V2_pb0", + "kb": 2, + "vb": 2, + "pb": 0, + "bpw_raw": 2.125, + "from_seg_idx": 0 + }, + "K2V2_pb1": { + "n_codewords": 71303168, + "bpe_raw": 8.0, + "bpe_zstd": 1.1634540557861328, + "bpe_shannon": 1.1781310558700682, + "bpe_rans": 1.1781437817741842, + "rans_err": null, + "label": "K2V2_pb1", + "kb": 2, + "vb": 2, + "pb": 1, + "bpw_raw": 2.125, + "from_seg_idx": 0 + }, + "FP16": { + "label": "FP16", + "kb": 16, + "vb": 16, + "pb": 0, + "bpw_raw": 16.0, + "bpe_raw": 16.0, + "bpe_zstd": null, + "bpe_shannon": null, + "bpe_rans": null, + "note": "fp16 baseline: 16 bits/element; no compression." + } + }, + "model_config": { + "n_layers": 36, + "n_kv_heads": 8, + "head_dim": 128, + "rope_theta": 1000000.0, + "rope_scaling": { + "rope_theta": 1000000, + "rope_type": "default" + }, + "hidden_size": 4096, + "vocab_size": 151936 + }, + "device_map": { + "model.embed_tokens": "0", + "model.layers.0": "0", + "model.layers.1": "0", + "model.layers.2": "0", + "model.layers.3": "1", + "model.layers.4": "1", + "model.layers.5": "1", + "model.layers.6": "1", + "model.layers.7": "1", + "model.layers.8": "1", + "model.layers.9": "1", + "model.layers.10": "1", + "model.layers.11": "1", + "model.layers.12": "1", + "model.layers.13": "1", + "model.layers.14": "1", + "model.layers.15": "1", + "model.layers.16": "1", + "model.layers.17": "1", + "model.layers.18": "1", + "model.layers.19": "1", + "model.layers.20": "1", + "model.layers.21": "1", + "model.layers.22": "1", + "model.layers.23": "1", + "model.layers.24": "1", + "model.layers.25": "1", + "model.layers.26": "1", + "model.layers.27": "1", + "model.layers.28": "1", + "model.layers.29": "1", + "model.layers.30": "1", + "model.layers.31": "1", + "model.layers.32": "1", + "model.layers.33": "1", + "model.layers.34": "1", + "model.layers.35": "1", + "model.norm": "1", + "model.rotary_emb": "1", + "lm_head": "1" + }, + "per_segment_base_ppl": [ + 5.61937046139659, + 11.130413832568783, + 16.50265673408572, + 11.684100899077528, + 7.820836676349614, + 11.53331862146282, + 6.7263590083749385, + 3.659914163848637, + 3.147860141461009, + 6.901928856351774, + 5.559844166732602, + 10.559469668587361, + 7.0398723891043575, + 6.81063566431677, + 6.387315679144375, + 14.786040735721198, + 8.384022767091736, + 10.448703972920777, + 9.189355162211045, + 9.1177842240123, + 12.794701979351633, + 16.067953178474816, + 10.342646264889801, + 14.087970020805173, + 6.05608102654371, + 8.623059630283267, + 6.152613637160116, + 9.462698836206739, + 12.66218314059296, + 6.884151275970726, + 9.499285472493945, + 9.552289600228566, + 8.467849442937506, + 10.100631462295189, + 9.89295312849974, + 17.068181741133127, + 22.090891947960007, + 18.230663016104558, + 6.762505356090557, + 9.164809440374055, + 5.592730097537938, + 6.867251161223241, + 4.765922551254759, + 5.334110134471534, + 8.482756746085713, + 6.618668326665751, + 9.084856450899935, + 10.4632872713475, + 5.903950050266444, + 3.7978317578972725, + 6.153838443276409, + 5.379919681393386, + 4.286367927518294, + 7.543654934017332, + 5.357412255032889, + 13.908770700606748, + 11.822035541292582, + 14.914537972488505, + 17.701000349516505, + 9.301744054188509, + 10.032515835208219, + 8.713440397637678, + 10.900079216716223, + 11.405435430297757, + 14.856659649165726, + 8.20600613274653, + 10.156494197411549, + 11.760320965627463, + 8.687890948427729, + 6.952667746153729, + 7.32891556200751, + 6.786871545853976, + 4.841305373897557, + 6.717236011500902, + 5.5295921628740095, + 4.837907277451897, + 4.487679916248823, + 8.60259289434125, + 8.08889991703968, + 7.978525977388292, + 8.907469956411784, + 13.097531364230019, + 13.08389843552478, + 10.905225672623436, + 13.778634086243237, + 8.193298091885818, + 5.122930271709729, + 10.7128960660276, + 10.222523185716609, + 9.80593456288517, + 7.430814973655051, + 6.8732263525648705, + 8.761261697879988, + 13.756864789448759, + 10.970637051405914, + 7.278559530879648, + 12.468015489416505, + 13.949002316409139, + 17.29706108713394, + 8.300702704130224, + 6.610394854075483, + 8.591269935351221, + 7.541011314314098, + 7.71192235560067, + 6.42184869581212, + 13.375737552411644, + 6.607048745429719, + 13.928163264767447, + 6.080432012204673, + 6.729003964282357, + 7.021347423121954, + 6.813491577268784, + 6.385761423998478, + 6.306575048381739, + 9.12385770878872, + 6.739319695034043, + 7.372797695094878, + 7.1156673783120175, + 13.543752609405118, + 8.292526714796825, + 8.116509999466254, + 23.883598636487278, + 16.0998742334318, + 21.956441634948337, + 18.37019390912922, + 17.840649275455785, + 16.097666768783665, + 17.733011566155525, + 7.689822966050992, + 25.51444274064346, + 10.51440765813733, + 11.123886841205726, + 11.049748326014202, + 10.181128479856143, + 6.666307975656422, + 11.521818899727709, + 18.213906015980992, + 10.213960206567384, + 12.535337711978844, + 9.608661911270849, + 11.359021390316633, + 7.936748170753453, + 13.91939045595241, + 10.3294734515776, + 10.592284047746391, + 10.679233519659581, + 8.52631329210859, + 8.98700280711406, + 8.844738027221954, + 8.166422151895429, + 5.587980225932658, + 10.145778319059634, + 10.001073634248776, + 10.99128416153328, + 10.02162671863774, + 10.79769730482414, + 7.108670506898742, + 6.848296554307748, + 6.636950113581545, + 14.87635031315605, + 13.093120490994098 + ] +} \ No newline at end of file diff --git a/experiments/kaggle/results/nq_yi_subbpe_niah.json b/experiments/kaggle/results/nq_yi_subbpe_niah.json new file mode 100644 index 0000000..4ade37c --- /dev/null +++ b/experiments/kaggle/results/nq_yi_subbpe_niah.json @@ -0,0 +1,1736 @@ +{ + "model": "01-ai/Yi-6B-Chat", + "context": 2048, + "n_needles": 40, + "rope_theta": 5000000.0, + "rope_partial_rotary_factor": null, + "run_at_utc": "2026-06-15T14:13:22Z", + "niah_results": { + "FP16": { + "config": "FP16", + "ctx": 2048, + "hits": 39, + "n": 40, + "elapsed_s": 156, + "recall_array": [ + true, + true, + false, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true, + true + ], + "cells": [ + { + "trial": 0, + "target_key": "6092819", + "target_value": "673", + "answer": "The value of 6092819 is 673.", + "recall": true + }, + { + "trial": 1, + "target_key": "4906615", + "target_value": "125", + "answer": "The value of 4906615 is 125.", + "recall": true + }, + { + "trial": 2, + "target_key": "6643104", + "target_value": "399", + "answer": "The value of 6643104 is 147.", + "recall": false + }, + { + "trial": 3, + "target_key": "3592499", + "target_value": "878", + "answer": "The value of 3592499 is 878.", + "recall": true + }, + { + "trial": 4, + "target_key": "3357002", + "target_value": "243", + "answer": "The value of 3357002 is 243.", + "recall": true + }, + { + "trial": 5, + "target_key": "2764676", + "target_value": "334", + "answer": "The value of 2764676 is 334.", + "recall": true + }, + { + "trial": 6, + "target_key": "8517866", + "target_value": "410", + "answer": "The value of 8517866 is 410.", + "recall": true + }, + { + "trial": 7, + "target_key": "7860553", + "target_value": "993", + "answer": "The value of 7860553 is 993.", + "recall": true + }, + { + "trial": 8, + "target_key": "2533138", + "target_value": "680", + "answer": "The value of 2533138 is 680.", + "recall": true + }, + { + "trial": 9, + "target_key": "3074099", + "target_value": "130", + "answer": "The value of 3074099 is 130.", + "recall": true + }, + { + "trial": 10, + "target_key": "5571439", + "target_value": "183", + "answer": "The value of 5571439 is 183.", + "recall": true + }, + { + "trial": 11, + "target_key": "2589069", + "target_value": "756", + "answer": "The value of 2589069 is 756.", + "recall": true + }, + { + "trial": 12, + "target_key": "8312771", + "target_value": "240", + "answer": "The value of 8312771 is 240.", + "recall": true + }, + { + "trial": 13, + "target_key": "1388833", + "target_value": "460", + "answer": "The value of 1388833 is 460.", + "recall": true + }, + { + "trial": 14, + "target_key": "1341484", + "target_value": "242", + "answer": "The value of 1341484 is 242.", + "recall": true + }, + { + "trial": 15, + "target_key": "8319109", + "target_value": "701", + "answer": "The value of 8319109 is 701.", + "recall": true + }, + { + "trial": 16, + "target_key": "7155076", + "target_value": "344", + "answer": "The value of 7155076 is 344.", + "recall": true + }, + { + "trial": 17, + "target_key": "3628555", + "target_value": "418", + "answer": "The value of 3628555 is 418.", + "recall": true + }, + { + "trial": 18, + "target_key": "5313223", + "target_value": "167", + "answer": "The value of 5313223 is 167.", + "recall": true + }, + { + "trial": 19, + "target_key": "7258862", + "target_value": "466", + "answer": "The value of 7258862 is 466.", + "recall": true + }, + { + "trial": 20, + "target_key": "2072378", + "target_value": "159", + "answer": "The value of 2072378 is 159.", + "recall": true + }, + { + "trial": 21, + "target_key": "4786069", + "target_value": "330", + "answer": "The value of 4786069 is 330.", + "recall": true + }, + { + "trial": 22, + "target_key": "6974831", + "target_value": "481", + "answer": "The value of 6974831 is 481.", + "recall": true + }, + { + "trial": 23, + "target_key": "7868943", + "target_value": "825", + "answer": "The value of 7868943 is 825.", + "recall": true + }, + { + "trial": 24, + "target_key": "3042933", + "target_value": "552", + "answer": "The value of 3042933 is 552.", + "recall": true + }, + { + "trial": 25, + "target_key": "5183336", + "target_value": "714", + "answer": "The value of 5183336 is 714.", + "recall": true + }, + { + "trial": 26, + "target_key": "1565124", + "target_value": "790", + "answer": "The value of 1565124 is 790.", + "recall": true + }, + { + "trial": 27, + "target_key": "9468295", + "target_value": "876", + "answer": "The value of 9468295 is 876.", + "recall": true + }, + { + "trial": 28, + "target_key": "9734448", + "target_value": "416", + "answer": "The value of 9734448 is 416.", + "recall": true + }, + { + "trial": 29, + "target_key": "4633979", + "target_value": "470", + "answer": "The value of 4633979 is 470.", + "recall": true + }, + { + "trial": 30, + "target_key": "3636358", + "target_value": "108", + "answer": "The value of 3636358 is 108.", + "recall": true + }, + { + "trial": 31, + "target_key": "8409286", + "target_value": "907", + "answer": "The value of 8409286 is 907.", + "recall": true + }, + { + "trial": 32, + "target_key": "8702934", + "target_value": "319", + "answer": "The value of 8702934 is 319.", + "recall": true + }, + { + "trial": 33, + "target_key": "8912673", + "target_value": "480", + "answer": "The value of 8912673 is 480.", + "recall": true + }, + { + "trial": 34, + "target_key": "4725928", + "target_value": "168", + "answer": "The value of 4725928 is 168.", + "recall": true + }, + { + "trial": 35, + "target_key": "2270015", + "target_value": "638", + "answer": "The value of 2270015 is 638.", + "recall": true + }, + { + "trial": 36, + "target_key": "8165448", + "target_value": "598", + "answer": "The value of 8165448 is 598.", + "recall": true + }, + { + "trial": 37, + "target_key": "2729748", + "target_value": "344", + "answer": "The value of 2729748 is 344.", + "recall": true + }, + { + "trial": 38, + "target_key": "3096866", + "target_value": "663", + "answer": "The value of 3096866 is 663.", + "recall": true + }, + { + "trial": 39, + "target_key": "9375417", + "target_value": "378", + "answer": "The value of 9375417 is 378.", + "recall": true + } + ] + }, + "K2V2_pb0": { + "config": "K2V2_pb0", + "ctx": 2048, + "hits": 13, + "n": 40, + "elapsed_s": 262, + "recall_array": [ + true, + false, + false, + true, + true, + true, + false, + false, + false, + false, + false, + false, + false, + false, + true, + false, + true, + false, + true, + false, + false, + false, + false, + false, + true, + true, + false, + false, + true, + true, + true, + false, + false, + false, + false, + false, + false, + false, + false, + true + ], + "cells": [ + { + "trial": 0, + "target_key": "8851326", + "target_value": "690", + "answer": "The value of 8851326 is 690.", + "recall": true + }, + { + "trial": 1, + "target_key": "1332433", + "target_value": "855", + "answer": "The value of 1332433 is 218.", + "recall": false + }, + { + "trial": 2, + "target_key": "8658529", + "target_value": "790", + "answer": "The value of 8658529 is 670.", + "recall": false + }, + { + "trial": 3, + "target_key": "7409905", + "target_value": "535", + "answer": "The value of 7409905 is 535.", + "recall": true + }, + { + "trial": 4, + "target_key": "8157561", + "target_value": "738", + "answer": "The value of 8157561 is 738.", + "recall": true + }, + { + "trial": 5, + "target_key": "1686172", + "target_value": "821", + "answer": "The value of 1686172 is 821.", + "recall": true + }, + { + "trial": 6, + "target_key": "3295178", + "target_value": "724", + "answer": "The value of 3295178 is 487.", + "recall": false + }, + { + "trial": 7, + "target_key": "3509677", + "target_value": "838", + "answer": "The value of 3509677 is 154.", + "recall": false + }, + { + "trial": 8, + "target_key": "9973758", + "target_value": "247", + "answer": "The value of 9973758 is 546.", + "recall": false + }, + { + "trial": 9, + "target_key": "2494450", + "target_value": "981", + "answer": "The value of 2494450 is 982.", + "recall": false + }, + { + "trial": 10, + "target_key": "8799686", + "target_value": "696", + "answer": "The value of 8799686 is 699.", + "recall": false + }, + { + "trial": 11, + "target_key": "4915241", + "target_value": "864", + "answer": "The value of 4915241 is 999.", + "recall": false + }, + { + "trial": 12, + "target_key": "7217072", + "target_value": "841", + "answer": "The value of 7217072 is 440.", + "recall": false + }, + { + "trial": 13, + "target_key": "2358854", + "target_value": "540", + "answer": "The value of 2358854 is 938.", + "recall": false + }, + { + "trial": 14, + "target_key": "3982306", + "target_value": "181", + "answer": "The value of 3982306 is 181.", + "recall": true + }, + { + "trial": 15, + "target_key": "6545284", + "target_value": "563", + "answer": "The value of 6545284 is 941.", + "recall": false + }, + { + "trial": 16, + "target_key": "8158461", + "target_value": "819", + "answer": "The value of 8158461 is 819.", + "recall": true + }, + { + "trial": 17, + "target_key": "4992066", + "target_value": "965", + "answer": "The value of 4992066 is 999.", + "recall": false + }, + { + "trial": 18, + "target_key": "2968156", + "target_value": "707", + "answer": "The value of 2968156 is 707.", + "recall": true + }, + { + "trial": 19, + "target_key": "2242891", + "target_value": "878", + "answer": "The value of 2242891 is 999.", + "recall": false + }, + { + "trial": 20, + "target_key": "5400001", + "target_value": "361", + "answer": "The value of 5400001 is 271.", + "recall": false + }, + { + "trial": 21, + "target_key": "3221988", + "target_value": "155", + "answer": "The value of 3221988 is 152.", + "recall": false + }, + { + "trial": 22, + "target_key": "3612286", + "target_value": "211", + "answer": "The value of 3612286 is 428.", + "recall": false + }, + { + "trial": 23, + "target_key": "8677825", + "target_value": "370", + "answer": "The value of 8677825 is 288.", + "recall": false + }, + { + "trial": 24, + "target_key": "8319261", + "target_value": "456", + "answer": "The value of 8319261 is 456.", + "recall": true + }, + { + "trial": 25, + "target_key": "4793097", + "target_value": "497", + "answer": "The value of 4793097 is 497.", + "recall": true + }, + { + "trial": 26, + "target_key": "5982904", + "target_value": "918", + "answer": "The value of 5982904 is 998.", + "recall": false + }, + { + "trial": 27, + "target_key": "6106531", + "target_value": "476", + "answer": "The value of 6106531 is 925.", + "recall": false + }, + { + "trial": 28, + "target_key": "1709415", + "target_value": "944", + "answer": "The value of 1709415 is 944.", + "recall": true + }, + { + "trial": 29, + "target_key": "8665745", + "target_value": "263", + "answer": "The value of 8665745 is 263.", + "recall": true + }, + { + "trial": 30, + "target_key": "4104802", + "target_value": "264", + "answer": "The value of 4104802 is 264.", + "recall": true + }, + { + "trial": 31, + "target_key": "5531663", + "target_value": "767", + "answer": "The value of 5531663 is 897.", + "recall": false + }, + { + "trial": 32, + "target_key": "3335629", + "target_value": "311", + "answer": "The value of 3335629 is 208.", + "recall": false + }, + { + "trial": 33, + "target_key": "1724214", + "target_value": "454", + "answer": "The value of 1724214 is 453.", + "recall": false + }, + { + "trial": 34, + "target_key": "5232266", + "target_value": "589", + "answer": "The value of 5232266 is 801.", + "recall": false + }, + { + "trial": 35, + "target_key": "5780191", + "target_value": "755", + "answer": "The value of 5780191 is 908.", + "recall": false + }, + { + "trial": 36, + "target_key": "7122137", + "target_value": "102", + "answer": "The value of 7122123 is 889.", + "recall": false + }, + { + "trial": 37, + "target_key": "9867077", + "target_value": "995", + "answer": "The value of 9867077 is 885.", + "recall": false + }, + { + "trial": 38, + "target_key": "5130783", + "target_value": "251", + "answer": "The value of 5130783 is 469.", + "recall": false + }, + { + "trial": 39, + "target_key": "4369831", + "target_value": "740", + "answer": "The value of 4369831 is 740.", + "recall": true + } + ] + }, + "K2V1_pb0": { + "config": "K2V1_pb0", + "ctx": 2048, + "hits": 6, + "n": 40, + "elapsed_s": 262, + "recall_array": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + true, + false, + false, + true, + false, + true, + false, + false, + true, + false + ], + "cells": [ + { + "trial": 0, + "target_key": "1733709", + "target_value": "437", + "answer": "The value of 1733709 is 999.", + "recall": false + }, + { + "trial": 1, + "target_key": "3604132", + "target_value": "591", + "answer": "The value of 3604132 is 592.", + "recall": false + }, + { + "trial": 2, + "target_key": "2545014", + "target_value": "607", + "answer": "The value of 2545014 is 254.", + "recall": false + }, + { + "trial": 3, + "target_key": "2540314", + "target_value": "995", + "answer": "The value of 2540314 is 704.", + "recall": false + }, + { + "trial": 4, + "target_key": "8563776", + "target_value": "336", + "answer": "The value of 8563776 is 333.", + "recall": false + }, + { + "trial": 5, + "target_key": "6344144", + "target_value": "734", + "answer": "The value of 6344144 is 748.", + "recall": false + }, + { + "trial": 6, + "target_key": "7562685", + "target_value": "986", + "answer": "The value of 7562685 is 102.", + "recall": false + }, + { + "trial": 7, + "target_key": "6508324", + "target_value": "559", + "answer": "The value of 6508324 is 592.", + "recall": false + }, + { + "trial": 8, + "target_key": "7819844", + "target_value": "193", + "answer": "The value of 7819844 is 299.", + "recall": false + }, + { + "trial": 9, + "target_key": "3725973", + "target_value": "614", + "answer": "The value of 3725973 is 999.", + "recall": false + }, + { + "trial": 10, + "target_key": "7896171", + "target_value": "411", + "answer": "The value of 7896171 is 171.", + "recall": false + }, + { + "trial": 11, + "target_key": "3455250", + "target_value": "382", + "answer": "The value of 3455250 is 412.", + "recall": false + }, + { + "trial": 12, + "target_key": "9284790", + "target_value": "980", + "answer": "The value of 9284790 is 980.", + "recall": true + }, + { + "trial": 13, + "target_key": "5095279", + "target_value": "430", + "answer": "The value of 5095279 is 299.", + "recall": false + }, + { + "trial": 14, + "target_key": "7846768", + "target_value": "266", + "answer": "The value of 7846768 is 896.", + "recall": false + }, + { + "trial": 15, + "target_key": "7629442", + "target_value": "134", + "answer": "The value of 7629442 is 988.", + "recall": false + }, + { + "trial": 16, + "target_key": "5907384", + "target_value": "983", + "answer": "The value of 5907384 is 999.", + "recall": false + }, + { + "trial": 17, + "target_key": "4295319", + "target_value": "774", + "answer": "The value of 4295319 is 799.", + "recall": false + }, + { + "trial": 18, + "target_key": "8521123", + "target_value": "342", + "answer": "The value of 8521123 is 414.", + "recall": false + }, + { + "trial": 19, + "target_key": "9434428", + "target_value": "330", + "answer": "The value of 9434428 is 292.", + "recall": false + }, + { + "trial": 20, + "target_key": "5838840", + "target_value": "299", + "answer": "The value of 5838840 is 383.", + "recall": false + }, + { + "trial": 21, + "target_key": "8948396", + "target_value": "331", + "answer": "The value of 8948396 is 104.", + "recall": false + }, + { + "trial": 22, + "target_key": "6496514", + "target_value": "803", + "answer": "The value of 6496514 is 108.", + "recall": false + }, + { + "trial": 23, + "target_key": "8083037", + "target_value": "172", + "answer": "The value of 8083037 is 109.", + "recall": false + }, + { + "trial": 24, + "target_key": "4240779", + "target_value": "602", + "answer": "The value of 4240779 is 424.", + "recall": false + }, + { + "trial": 25, + "target_key": "8381917", + "target_value": "877", + "answer": "The value of 8381917 is 289.", + "recall": false + }, + { + "trial": 26, + "target_key": "4268297", + "target_value": "953", + "answer": "The value of 4268297 is 399.", + "recall": false + }, + { + "trial": 27, + "target_key": "6077735", + "target_value": "542", + "answer": "The value of 6077735 is 107.", + "recall": false + }, + { + "trial": 28, + "target_key": "3527603", + "target_value": "385", + "answer": "The value of 3527603 is 384.", + "recall": false + }, + { + "trial": 29, + "target_key": "2747569", + "target_value": "599", + "answer": "The value of 2747569 is 599.", + "recall": true + }, + { + "trial": 30, + "target_key": "3786173", + "target_value": "423", + "answer": "The value of 3786173 is 423.", + "recall": true + }, + { + "trial": 31, + "target_key": "6991092", + "target_value": "823", + "answer": "The value of 6991092 is 898.", + "recall": false + }, + { + "trial": 32, + "target_key": "8984996", + "target_value": "689", + "answer": "The value of 8984996 is 999.", + "recall": false + }, + { + "trial": 33, + "target_key": "2424824", + "target_value": "421", + "answer": "The value of 2424824 is 421.", + "recall": true + }, + { + "trial": 34, + "target_key": "3465899", + "target_value": "768", + "answer": "The value of 3465899 is 100.", + "recall": false + }, + { + "trial": 35, + "target_key": "8696228", + "target_value": "195", + "answer": "The value of 8696228 is 195.", + "recall": true + }, + { + "trial": 36, + "target_key": "5153044", + "target_value": "481", + "answer": "The value of 5153044 is 928.", + "recall": false + }, + { + "trial": 37, + "target_key": "3892560", + "target_value": "208", + "answer": "The value of 3892560 is 400.", + "recall": false + }, + { + "trial": 38, + "target_key": "6326344", + "target_value": "882", + "answer": "The value of 6326344 is 882.", + "recall": true + }, + { + "trial": 39, + "target_key": "2806665", + "target_value": "814", + "answer": "The value of 2806665 is 999.", + "recall": false + } + ] + }, + "K1V2_pb0": { + "config": "K1V2_pb0", + "ctx": 2048, + "hits": 0, + "n": 40, + "elapsed_s": 383, + "recall_array": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "cells": [ + { + "trial": 0, + "target_key": "2245573", + "target_value": "607", + "answer": "The game 's V The game , the player , is a soldier who is a The value of 8880980397 , a tactical V is a battle with the Imperial unit \" Valky's 397197, a", + "recall": false + }, + { + "trial": 1, + "target_key": "6327370", + "target_value": "490", + "answer": "The game ' Valky 8 9 , is a tactical role - playing 80 is a The game ' 92 0 .3030 is 992 0 992 0 0 0 9 .", + "recall": false + }, + { + "trial": 2, + "target_key": "5955547", + "target_value": "569", + "answer": "The game 's main character , a 4 , 76 , 7 , 7 , 7 , 7 , 7 , a game , a game , a game , is a game , a game , a game , a", + "recall": false + }, + { + "trial": 3, + "target_key": "4330336", + "target_value": "325", + "answer": "The game 's story , a former 10 , 112 , 112 , 113 , 114 , 115 , 116 , 117 , 119 ,", + "recall": false + }, + { + "trial": 4, + "target_key": "6269693", + "target_value": "421", + "answer": "' V , a former , the , the , the , the , the , the , the , the , the , a The , the , a The , the , a The, the , a The, the , a ,", + "recall": false + }, + { + "trial": 5, + "target_key": "2082945", + "target_value": "759", + "answer": "The game 's a , The value of 1005453 is a The value of 108894, The value of 10229945 is 1024.\n\nThe game 's 102429", + "recall": false + }, + { + "trial": 6, + "target_key": "9601126", + "target_value": "782", + "answer": "The game 's , which is set to be a V , meaning \" Always Ready , \" , which is a Valkyria , is a unit Shock , a Shock , who is a Shock , a fan and a The value 288", + "recall": false + }, + { + "trial": 7, + "target_key": "9652220", + "target_value": "729", + "answer": "The game 's The value of 98 , a The value of 9 , a The value of 9 , a The value of 9. , a The value of 9 , a The value of 9 , a The value of 9 , a The value", + "recall": false + }, + { + "trial": 8, + "target_key": "1858110", + "target_value": "450", + "answer": "The game 's The value 894. \n\nThe game ' Valky ' Valky , which is a The value of 82177. \n\nThe game ' Valky ' , a The value of 82241", + "recall": false + }, + { + "trial": 9, + "target_key": "7349758", + "target_value": "671", + "answer": "The player , which is a game . \n\nThe game ' The Valky ' \n\nThe game ' The Valky ' The value 78711\nThe game ' The Valky ' The Valky ' The game ' The game ' The ga", + "recall": false + }, + { + "trial": 10, + "target_key": "3057517", + "target_value": "989", + "answer": "The game 's The value of 107874 , which is 813 819 , is a game , which is a tactical role , a game that is a bit of the original from Media.\n\n\n\nThe game 's stor", + "recall": false + }, + { + "trial": 11, + "target_key": "6299516", + "target_value": "402", + "answer": "The game 's The value of 44 is set in the war , the game , which is a battle between the Gallia military a\n\nThe game 's story begins with the main character , N", + "recall": false + }, + { + "trial": 12, + "target_key": "6005601", + "target_value": "917", + "answer": "The value of 10 is 4 , which is a The value of 10 is a value of 10 , a The value of 10 is a value of 10 is a value of 10 10 is a value of 10 10", + "recall": false + }, + { + "trial": 13, + "target_key": "7791108", + "target_value": "545", + "answer": "The game 's The value of 10 is a The game 's The value of 10 2 \n\n\n\nThe game 's story follows a group of the Valky 12332 a penal The value of 8240 , a", + "recall": false + }, + { + "trial": 14, + "target_key": "6098996", + "target_value": "972", + "answer": "The game begins with the story of the game , which begins with the game , the game , which begins with the game , 7 , which begins with the game , the game begi", + "recall": false + }, + { + "trial": 15, + "target_key": "4618566", + "target_value": "216", + "answer": "The game 's story , which is a subtitle , \" V The value \n\n\n\nThe game 's story takes place in the Gallia E The value of 168 , where the player 's are 18 , and 18", + "recall": false + }, + { + "trial": 16, + "target_key": "4250224", + "target_value": "469", + "answer": "The game 's story takes place , a 3 , is a young man who is a member of Squad , a young , a young The value 3 The value ' 10 ' , a The value ' 10 ' 10 ' 10 ' 1", + "recall": false + }, + { + "trial": 17, + "target_key": "4349919", + "target_value": "801", + "answer": "The game 's story follows the events of the Gallia Liberation Front , which are a The value of 841288839 is a The value 8486334 is 4480798437 is 4", + "recall": false + }, + { + "trial": 18, + "target_key": "9232207", + "target_value": "618", + "answer": "The game 's story is set in the Valkyria Chronicles , during The game , which are known as the main story , which is a group of the game 's main story , the ga", + "recall": false + }, + { + "trial": 19, + "target_key": "6943603", + "target_value": "527", + "answer": "The game 's story is divided into three : The game 's The value of 705 2702 \n\n\n\n\nThe game 's story is divided into three : The game 's The value of 78027020", + "recall": false + }, + { + "trial": 20, + "target_key": "2493247", + "target_value": "494", + "answer": "The game 's story , the player , is a The value of V , which is a Shock , a former member of the Gallia , who is a member of the Valkyria Chronicles , a militar", + "recall": false + }, + { + "trial": 21, + "target_key": "7906541", + "target_value": "346", + "answer": "The game ' Valky ' .\n\nThe game ' Valky ' Valky ' .\n\nThe game ' Valky ' . \n\nThe game ' Valky ' , which is a The value 442872284", + "recall": false + }, + { + "trial": 22, + "target_key": "2169548", + "target_value": "549", + "answer": "The game 's Valky .\n\n\n\nThe game 's Valky , The game 's The game 's Valky , is a member of a The game 's Valky , a The game 's Valky , a The game 's", + "recall": false + }, + { + "trial": 23, + "target_key": "9445284", + "target_value": "724", + "answer": "The game takes place during the V , a , which is a military unit , is a The value of , which is a tactical unit composed of the The value , which is a The value", + "recall": false + }, + { + "trial": 24, + "target_key": "4205485", + "target_value": "250", + "answer": ", The value , a , a character , and a , and a The value of 999 , a The game 'The , which is a The value 99, a The game ' The game ' 99,'s The value 9, a game", + "recall": false + }, + { + "trial": 25, + "target_key": "6418130", + "target_value": "203", + "answer": "The game 's The value \n\n\nThe game takes place \n\n\nThe game takes place in the world of Valkyrie \n\n\nThe game takes place in the world of Valky \n\nThe game takes pl", + "recall": false + }, + { + "trial": 26, + "target_key": "5571663", + "target_value": "668", + "answer": "The game 's story takes place in the Europan Empire , which ' s \n\n\n\n\nThe game 's story takes from 1618.\n\n\n\n\n\nThe game 's story takes place in the world of the g", + "recall": false + }, + { + "trial": 27, + "target_key": "2846921", + "target_value": "816", + "answer": "The V 's , a \n\n\n\nThe game 's 15447\n\n\nThe game ' Valky ' s 154\n\n\nThe game ' Valky ' s 154\n\nThe game ' Valky ' s 1", + "recall": false + }, + { + "trial": 28, + "target_key": "1230616", + "target_value": "413", + "answer": "The game 's The value 10 is 2 , 17 , 2 The value of 10 is 93 , 201 , a veteran of the Gallia , a The The value of 17112 is 2", + "recall": false + }, + { + "trial": 29, + "target_key": "7901761", + "target_value": "662", + "answer": "The game 's The value , The value \n\n\nThe game ' s story follows the Gallia ' s invasion of the Gallia , a war that has been waged by a and a group of a The game", + "recall": false + }, + { + "trial": 30, + "target_key": "6092048", + "target_value": "831", + "answer": "The game begins with a series of missions , which are a Shock ' s The game , which is a tactical role , a battle with a The game ' s The game ' s story , during", + "recall": false + }, + { + "trial": 31, + "target_key": "8584306", + "target_value": "291", + "answer": "The game , a tactical game 8 , is a tactical game . The game ' The value of 10 is a tactical game . \n\n\nThe game 's The value of 1984 is 199. \n\nThe game 's The", + "recall": false + }, + { + "trial": 32, + "target_key": "2917107", + "target_value": "543", + "answer": "The , a character . The game , a tactical game . \n\nThe game 's 21417 's 214 , a game , which is a game of the game , a game , which is a game of the game , a", + "recall": false + }, + { + "trial": 33, + "target_key": "8001302", + "target_value": "490", + "answer": "The game 's story follows the story of the game 's main characters , with the game 's story taking place during the game 's development .\n\nThe game 's story fol", + "recall": false + }, + { + "trial": 34, + "target_key": "8973181", + "target_value": "431", + "answer": "Kurt , who is a song and a The game , is sung by and , which The game ' 4 's a The game ' , and are a The game , is a The game: 484801889 is 284.", + "recall": false + }, + { + "trial": 35, + "target_key": "2283697", + "target_value": "132", + "answer": "The story takes place , the story of the game . The game , which The value , a story , is a story . . 289689 . \n\nThe game 's story follows The value 28969892", + "recall": false + }, + { + "trial": 36, + "target_key": "6011995", + "target_value": "928", + "answer": "The game 's main character , the game 's The game , which is The game ' , a game , which is a game that is a game , is a game that is a game that is a game that", + "recall": false + }, + { + "trial": 37, + "target_key": "2510321", + "target_value": "954", + "answer": ", a member of Squad , a The The The game , a and The game ' s 143481150, a former member of Squad 4 , is a The value 3436883, a The value of 10434", + "recall": false + }, + { + "trial": 38, + "target_key": "2064198", + "target_value": "748", + "answer": "The game 's story follows the story of the game , which is a battle , and a main character , which is carried over by a battle , and a character , a character ,", + "recall": false + }, + { + "trial": 39, + "target_key": "1641091", + "target_value": "812", + "answer": "The game 's story is set in the V. The value of 2972984 is 14, a The value of 928724 is 10234 and 1023420814 is 102", + "recall": false + } + ] + }, + "K1V1_pb0": { + "config": "K1V1_pb0", + "ctx": 2048, + "hits": 1, + "n": 40, + "elapsed_s": 383, + "recall_array": [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + false, + true, + false, + false, + false, + false, + false, + false, + false, + false, + false + ], + "cells": [ + { + "trial": 0, + "target_key": "3580140", + "target_value": "644", + "answer": "The game 's story begins with , a and The The game 's story is Al The value of 88802442. \nThe game 's story is a story of a character who is a The game 's story", + "recall": false + }, + { + "trial": 1, + "target_key": "8558235", + "target_value": "395", + "answer": "", + "recall": false + }, + { + "trial": 2, + "target_key": "7110620", + "target_value": "512", + "answer": "The game 's story begins with the Valky The value of 48887 and No.463174, a The value of the Valky 's474171748817541784875", + "recall": false + }, + { + "trial": 3, + "target_key": "7145815", + "target_value": "837", + "answer": "Valky, a , and , a Valky , and , is a The 8863, , a Valky 8899999, a 9969, which 9933, a squad, a The 9", + "recall": false + }, + { + "trial": 4, + "target_key": "9463531", + "target_value": "455", + "answer": "The , The value of , and , and , and , and , and , and , and , , and , , and , and , and , a The value , a, and , and , and , and ,", + "recall": false + }, + { + "trial": 5, + "target_key": "7779944", + "target_value": "754", + "answer": "V The value of , the game , which is a The , a The value of the game , the game , the game , the game, the game, the game, the game , the game , the game , the ", + "recall": false + }, + { + "trial": 6, + "target_key": "2280068", + "target_value": "786", + "answer": ", and the player , who is a V \" The Valkyria \" 384761 , which is a squad 39471 , and 4441 , which is a squad 31 , and the main character 38", + "recall": false + }, + { + "trial": 7, + "target_key": "4437613", + "target_value": "499", + "answer": "The game 's The value of 3044440 is a The value of 4444444444444444444444444444444444444444", + "recall": false + }, + { + "trial": 8, + "target_key": "1145255", + "target_value": "453", + "answer": "The V\n\n\n\nValkyra 's\n\n\n\nThe game 's", + "recall": false + }, + { + "trial": 9, + "target_key": "8649244", + "target_value": "536", + "answer": "The game 's The value of 8!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", + "recall": false + }, + { + "trial": 10, + "target_key": "1656050", + "target_value": "528", + "answer": "Valky The value The value of 1424 The value of the game 142 The value of the game 144 142 , a The value of the game 142147, a The value of the game, a", + "recall": false + }, + { + "trial": 11, + "target_key": "8102980", + "target_value": "992", + "answer": "The game 's The value of 983.\n\n\n\n\nThe game 's gameplay is a group of the game 's main The game 's main battlefield, a group of the game 's are a Shock the game ", + "recall": false + }, + { + "trial": 12, + "target_key": "8257620", + "target_value": "820", + "answer": "The value of 820704650 is a The V. , a veteran , which is a The value of 824996 and 820012324The game 's 82408The game ' The value of", + "recall": false + }, + { + "trial": 13, + "target_key": "7166157", + "target_value": "618", + "answer": "The , The V The V The Valky ' The Valkyria ' Valky 7191, which is a The value of 716163838197197194819419481948", + "recall": false + }, + { + "trial": 14, + "target_key": "6598944", + "target_value": "190", + "answer": "The game 's The value of 484.\n\n\n\nThe game 's gameplay.\n\n\nThe game 's The value of the game.\n\n\n\n\n\nThe game 's story \n\n\n\n\n\nThe game 's gameplay", + "recall": false + }, + { + "trial": 15, + "target_key": "7621273", + "target_value": "732", + "answer": "S , a Valky's 7679353 is a The value of Valky 349934993 is a Valky 33573076 is a Valky 331338937", + "recall": false + }, + { + "trial": 16, + "target_key": "1903146", + "target_value": "299", + "answer": "The story takes place in the game .", + "recall": false + }, + { + "trial": 17, + "target_key": "2900091", + "target_value": "937", + "answer": ", \" V \" , and , and . The value of 2910, 294, 291, and 2924, and 292, and 292, 292, and 2929, and", + "recall": false + }, + { + "trial": 18, + "target_key": "6673137", + "target_value": "500", + "answer": "", + "recall": false + }, + { + "trial": 19, + "target_key": "1451632", + "target_value": "926", + "answer": "The , a V \" , which is a The Valkyria ,\" , and a The 2402252 24246 2424242424242424242424242", + "recall": false + }, + { + "trial": 20, + "target_key": "7221626", + "target_value": "475", + "answer": "The , The 498818 , which is a The 498192 , and No. , and the The V The 416008181081804100818181818", + "recall": false + }, + { + "trial": 21, + "target_key": "4739111", + "target_value": "904", + "answer": "Valkyra 06718 ,7 , and the The value of 5650498111 , which is 4924.\n\n\n\n\nThe game begins during the Valky's Valky", + "recall": false + }, + { + "trial": 22, + "target_key": "6183995", + "target_value": "571", + "answer": "Valky 495832940801\n\n\n\nThe game 'The Almighty'\n\n\n\nThe game'The game 'The game'\n\n\n\n\nThe game'sThe game's", + "recall": false + }, + { + "trial": 23, + "target_key": "1249093", + "target_value": "780", + "answer": "Valk The , a has , who , who is a The The value , which is a known to the 2 , which is the The 201 , which is a The 201 , which is a 20488883,", + "recall": false + }, + { + "trial": 24, + "target_key": "6556013", + "target_value": "399", + "answer": "The game 's 10363645.\n\n\n\nThe game's 1511382\n\nThe game's 154562,\n\n\n\nThe game's 14545.", + "recall": false + }, + { + "trial": 25, + "target_key": "9645441", + "target_value": "574", + "answer": "The game 's 1000.,880,440,4400,444,440,4401,4444,440,4400,4440,440,40,", + "recall": false + }, + { + "trial": 26, + "target_key": "5531616", + "target_value": "688", + "answer": "The V: The V: The V The game 80 , and the Valkyria Abroad : Valky 's 460. The Valky 's 4200 46000 . The game takes place : Valky , a", + "recall": false + }, + { + "trial": 27, + "target_key": "3217307", + "target_value": "742", + "answer": "Valky 's V. , who is a veteran , and a former Valkyra , and the nation 's 84849158999999484694848494849494", + "recall": false + }, + { + "trial": 28, + "target_key": "7443334", + "target_value": "326", + "answer": "", + "recall": false + }, + { + "trial": 29, + "target_key": "2465435", + "target_value": "568", + "answer": "The game 's's447's\n\nThe game 's The value of 3443\n\nThe game 's value of 2744\nThe game 's' 7772\nThe game 's' 79980", + "recall": false + }, + { + "trial": 30, + "target_key": "6818550", + "target_value": "450", + "answer": "The game 's , and 450.\n\n\n\n\n\n\nThe game 's 32188987 , and are a \n\n\n\n\nThe game 's 328418832, and the", + "recall": true + }, + { + "trial": 31, + "target_key": "1082671", + "target_value": "967", + "answer": "The game 'The Nameless' , the game 'The Nameless' , is a penal squad of the Gallia The game 14770The game 'The Nameless', or No.\n\n\n\n\n\n\nThe game '", + "recall": false + }, + { + "trial": 32, + "target_key": "9339104", + "target_value": "997", + "answer": "V - \" The Nameless \" , a former , and Alicia , a V B . , and No. , are the Valkyra:", + "recall": false + }, + { + "trial": 33, + "target_key": "6952158", + "target_value": "571", + "answer": "and , and the V The , a and the V The , and the V the V the , and the V the V the V the V the V the V the V the V the V the V the V the V the V the V the V the ", + "recall": false + }, + { + "trial": 34, + "target_key": "5055904", + "target_value": "266", + "answer": "The game 's The value of 2322\n\n\n\nThe game \n\n\nThe game 's story begins \n\n\n\nThe game \n\n\n\nThe game '\n\n\n\n\n\n\nThe game \n\nthe game", + "recall": false + }, + { + "trial": 35, + "target_key": "3878902", + "target_value": "205", + "answer": "", + "recall": false + }, + { + "trial": 36, + "target_key": "3532461", + "target_value": "753", + "answer": "Valky ' s Valky The value of 214 , The value of the 2194939318187 The Valky ' s The value of 18481018498", + "recall": false + }, + { + "trial": 37, + "target_key": "6336849", + "target_value": "229", + "answer": "The game 's The value of 8000 is a The value of 86062020200000 is 8010000000000000000000000", + "recall": false + }, + { + "trial": 38, + "target_key": "7075858", + "target_value": "902", + "answer": "The , and , a Valk , which is a, which is, which is, which is, which is, which is, which is, which is, which is, which is , which is, which is, which is, which ", + "recall": false + }, + { + "trial": 39, + "target_key": "5976713", + "target_value": "125", + "answer": "The game 's , a The value of 734 is 648 , the The value of 744 and 64848, and the The value of 498.", + "recall": false + } + ] + } + }, + "errors": {}, + "gpu": { + "name": "Tesla T4", + "sm": "75", + "n_gpus": 2, + "total_gb": 14.6 + }, + "transformers": "5.12.0", + "device_map": { + "model.embed_tokens": "0", + "model.layers.0": "0", + "model.layers.1": "0", + "model.layers.2": "0", + "model.layers.3": "0", + "model.layers.4": "0", + "model.layers.5": "0", + "model.layers.6": "0", + "model.layers.7": "0", + "model.layers.8": "0", + "model.layers.9": "0", + "model.layers.10": "0", + "model.layers.11": "0", + "model.layers.12": "0", + "model.layers.13": "0", + "model.layers.14": "0", + "model.layers.15": "1", + "model.layers.16": "1", + "model.layers.17": "1", + "model.layers.18": "1", + "model.layers.19": "1", + "model.layers.20": "1", + "model.layers.21": "1", + "model.layers.22": "1", + "model.layers.23": "1", + "model.layers.24": "1", + "model.layers.25": "1", + "model.layers.26": "1", + "model.layers.27": "1", + "model.layers.28": "1", + "model.layers.29": "1", + "model.layers.30": "1", + "model.layers.31": "1", + "model.norm": "1", + "model.rotary_emb": "1", + "lm_head": "1" + }, + "model_config": { + "n_kv_heads": 4, + "head_dim": 128, + "rope_theta": null, + "n_layers": 32 + }, + "paired_mcnemar": { + "K2V2_pb0": { + "b_fp16hit_quantmiss": 26, + "c_fp16miss_quantrescue": 0 + }, + "K2V1_pb0": { + "b_fp16hit_quantmiss": 33, + "c_fp16miss_quantrescue": 0 + }, + "K1V2_pb0": { + "b_fp16hit_quantmiss": 39, + "c_fp16miss_quantrescue": 0 + }, + "K1V1_pb0": { + "b_fp16hit_quantmiss": 38, + "c_fp16miss_quantrescue": 0 + } + }, + "run_complete_utc": "2026-06-15T14:38:51Z" +} \ No newline at end of file