diff --git a/experiments/kaggle/results/nq_kvarn_h2h.json b/experiments/kaggle/results/nq_kvarn_h2h.json new file mode 100644 index 0000000..d0307dc --- /dev/null +++ b/experiments/kaggle/results/nq_kvarn_h2h.json @@ -0,0 +1,669 @@ +{ + "kernel": "nq_kvarn_h2h", + "model_name": "Mistral-7B-Instruct-v0.3", + "model_id": "unsloth/mistral-7b-instruct-v0.3", + "weights": "FP16 (unsloth/mistral-7b-instruct-v0.3, ungated)", + "protocol_ppl": "AQUA-iso paired-PPL: wikitext-2-raw-v1 test, n>=60 chunks, prefix=1024 cont=1024. Paired delta% per segment.", + "protocol_niah": "Chat-template NIAH at 4K+8K, 5 depths, single needle UUID passcode, 1 trial per depth+context.", + "bpe_frame": { + "kvarn_k4v2_g128_bpe": 3.375, + "kvarn_bpe_formula": "tile_bytes*8/(2*D*group) verified from source", + "nq_k4v2_honest_raw_bpe": 3.125, + "frame_note": "NQ K4V2 is 0.250 bpe lighter than KVarN k4v2_g128 (NQ has smaller per-head scale overhead). Comparable 3.x bpe class. NEVER 'E8 beats KVarN'. KVarN paper reports reasoning accuracy; we measure wikitext PPL + NIAH as controlled iso-bpe contrast.", + "kvarn_sink_tokens": 128, + "kvarn_sinkhorn_iters": 8 + }, + "faithfulness_gate": { + "sinkhorn_max_abs_diff": 0.0, + "hadamard_max_err": 5.96e-08, + "bpe_formula_verified": true, + "gate_result": "PASS" + }, + "ppl_results": { + "KVarN_k4v2": { + "n": 161, + "mean_delta_pct": 0.4212, + "paired_sigma_pct": 0.3895, + "sem_pct": 0.0307, + "z": 13.72, + "significant_at_2sigma": true, + "n_negative_segments": 22, + "per_segment_delta_pct": [ + 1.3035214222084637, + 0.2701816119184795, + 0.18351899651041204, + 0.6361522294781627, + 0.5246326787235938, + 1.108195946350467, + -0.06101802863440124, + -0.023447914992085078, + -0.16840184161939864, + 0.78293772965424, + 0.4134169650642589, + 0.22982469465429486, + 0.15974336334787917, + -0.20875242059877172, + 1.2304889556707606, + -0.03430544629030514, + 0.8760462986427211, + 1.1039246296095366, + 0.053599636358408885, + 0.1530789269086871, + 0.6209902145366609, + 0.4594081301410445, + 0.18520955010673182, + 0.22152737070409392, + 0.6557885667849117, + 0.3646631932550331, + 0.6685824020953519, + 0.11084917668748198, + 0.555686598191192, + 0.3806965823811807, + 0.16462210783860315, + 0.667311378667828, + 0.7335584909716182, + -0.03861523024472548, + 0.8387174296089841, + 0.8569222216845157, + -0.12718632608590066, + -0.06437734257351121, + 0.34901366427796304, + 0.1887466162223434, + 0.5885467831202331, + -0.004412983775425283, + 0.6019746896834467, + 0.65491672970476, + 0.5089281347694316, + 0.8774195571505872, + -0.06753472562594112, + -0.008589431753541526, + 0.12210890175139039, + 0.44277916465458356, + 0.12140194906081163, + 0.22118553688977868, + 0.29239909549981413, + 0.8125346635329977, + 0.36101782401735366, + 0.1300534760602924, + 0.9403499365467263, + 0.4278115430500462, + 1.0329691359459467, + 0.4014095418041397, + -0.18700240330387027, + -0.5644452158716103, + 0.4935357324118275, + 0.8257616988801515, + 0.5088574668599166, + 0.5711035651086321, + 0.7301830165958316, + 1.6828021522233814, + 0.10914490194328788, + 0.3499924389432027, + 0.41154831498837097, + -0.024491844582385614, + 0.38301216075857897, + 0.5166793528253543, + 0.15664182809443633, + 0.9078956153100768, + 0.26730465027684036, + 0.5129277187168395, + 0.5819124968407741, + 2.1137158652441705, + 0.3281794376304628, + 0.5349628666564237, + 0.18301243547330773, + 0.2590582176738069, + 0.40911491605407546, + 0.2762298004501807, + 0.4699886264754865, + -0.06327402895329942, + 0.20478488094529795, + -0.3186184322339442, + 0.2737456788704218, + 0.6028656329149796, + -0.11468028559300956, + 0.28267440275947886, + 0.4799118601205754, + 0.394575835280358, + 0.5662832524048514, + 0.17433235558078689, + 0.5624891487904026, + 0.1444314029246685, + 0.8533193219704001, + 0.9335490042011783, + 0.339433585634065, + 0.39026558269966166, + 0.8049310349513255, + 0.06814921631851438, + 0.8982833131175113, + 0.805570429469266, + 0.1546086515459846, + 0.5632563826219192, + 0.5949557777359594, + 0.5531083080787503, + 1.2162034223437497, + 0.20135817247723076, + 0.18315631053702253, + 0.07973950042454782, + 0.48762626559293054, + 0.2960674789352018, + 0.5253139060583512, + 0.36157332890024035, + 0.24690728992030944, + 0.46528137034148553, + 0.2535989664796257, + 0.08392263216172721, + 0.2702986121053203, + 0.5209499203793424, + 0.33917840958323087, + 0.40825873415779734, + 0.6866224243803799, + 0.590173446302509, + 0.8353429568372306, + 0.49574717401394197, + 0.2747717216283985, + 0.9331956823159984, + -0.03368632951537459, + 0.035867577094045464, + 0.5939204393764954, + 0.3940742144894089, + 0.2834845077714353, + 0.8873588842390282, + -0.1703582294892659, + 1.217048323592985, + 1.0017179594049783, + -0.32013347312197166, + 0.451587695408076, + 0.4779427673247603, + 1.0652701608916986, + 0.1366504681329635, + 0.056635114808546085, + -0.4764346029347629, + 0.577540035716532, + 0.4842455033089633, + 0.449518477777123, + 0.06461042007142667, + 0.2759717849583845, + 0.5948052918474581, + -0.12212134423750406, + 0.8651001349806089, + 0.29473064145743993, + 0.5400132997204928, + 0.6932078830071269 + ] + }, + "NQ_K4V2_pb0": { + "n": 161, + "mean_delta_pct": 0.2669, + "paired_sigma_pct": 0.3224, + "sem_pct": 0.0254, + "z": 10.5, + "significant_at_2sigma": true, + "n_negative_segments": 39, + "per_segment_delta_pct": [ + 1.1404347797060603, + 0.3935455629552716, + 0.5569080749283642, + -0.16179464226516305, + -0.03592799259263422, + 0.4333734322990078, + -0.16547465534877362, + -0.12338537260954233, + 0.8357381731115199, + 0.04132694447538817, + 0.35972966506696585, + 0.5419956414702094, + -0.13370319033610248, + 0.12126115823118874, + -0.010204916418314183, + 0.3909113578856782, + 0.3358461701366108, + 0.04856765059851014, + 0.43891759926572854, + -0.006327709373841259, + 0.32400110494224904, + 0.1922898025600211, + 0.3785312195445065, + 0.36428183458822777, + -0.14851021973899078, + 0.4471668043333643, + 0.48701594615813576, + 0.47075214857736736, + -0.014605483923383622, + 0.45202949467253656, + 0.1726689592955638, + 0.25717444246227483, + 0.5561860179211794, + 0.24738418081852365, + -0.18803865251328128, + 0.1284267641622705, + 0.5409668643937006, + 0.2747957227542502, + -0.1399178687536008, + 0.0970390963819239, + -0.02537124958909273, + 0.20762706926083502, + 0.2125171409822611, + 0.37882403633696565, + -0.06928529606339782, + 0.22369534388239895, + -0.020980129783547904, + 0.47005175209849465, + 0.4109444660478682, + 0.3613841566218818, + 0.3609517641817976, + 0.1980305100067466, + 0.4179445299428824, + 0.5476631284766624, + 0.042452377421433436, + 0.34217751970501414, + -0.18178217163830043, + -0.21668205492870912, + -0.022415939496961208, + 0.3033731378810311, + 0.2614279851671137, + 0.09004942412251617, + 0.2561440803904649, + 0.31049475444364477, + 0.7462476697797465, + -0.15147972684385774, + 0.4562702653628343, + 0.2958349189466901, + 0.10861775077975175, + -0.07061427397944486, + -0.1849925867060339, + 0.03544556603422446, + -0.012556297453571601, + 0.44143886910618063, + -0.1473660098300717, + 0.19276943132083132, + -0.023591493347268983, + 0.3683192051886618, + 0.4139892836595561, + 0.24990064268768003, + -0.14126233964017412, + 0.8913916387356351, + 0.6736184630984389, + -0.056414673192268915, + 0.42040217883872694, + 0.2562955594488792, + 0.19104577616253213, + 0.5011444383876886, + 0.6571859310858937, + 0.1627940492015948, + 0.7277901148849, + -0.25969590620903016, + 0.06354611116225706, + -0.005860998388418458, + 0.44109929033832757, + 0.1931381474916585, + 0.3581112323863205, + -0.08235754464097292, + 0.5999460173035385, + 0.34223155854736176, + 0.14681643891855378, + 0.0221007587237124, + 0.5940859724101889, + 0.6853752725822516, + -0.3009730714206552, + 0.533440868085841, + 0.6081362051156733, + 0.7167153971274841, + 0.018650372735729864, + 0.6161795522150977, + 0.8389482313809171, + -0.3788056931328362, + 1.3926560139993591, + 0.47801191038753066, + 0.2399461407924824, + 0.42946114743910363, + 0.4542415256512172, + 0.4360117565157192, + 0.9706486439711096, + -0.143134111742374, + 0.10091147710694942, + 0.2581133260016802, + 0.3428169812047963, + 0.40499028934975556, + 0.5512129911145772, + 0.055937605200839076, + 0.7417866836491804, + 0.20570528542409275, + 0.06995758352792661, + 0.10980983521078984, + 0.5590652511199381, + 0.301155426277459, + 0.37526374241325244, + -0.4093043393790486, + 0.9194684237517753, + 0.1373155840947547, + 0.20935700604469265, + 0.24958870596299948, + 0.7846715675350298, + 1.3876112850366185, + -0.09642194213064645, + 0.5313714682576703, + -0.21795085736223946, + 0.27953005715423196, + 0.4501571206464308, + -0.12359454833881413, + 0.4419467368395083, + 0.30433646225625566, + 0.17898102030196456, + -0.14845345767661777, + -0.10439337925208038, + 0.8244057348140055, + 0.13272727264758155, + 0.40427833524542534, + 0.09621252871413445, + 0.44480466764856574, + 0.2870340450325618, + -0.24840324514793619, + 0.10526321750325643, + -0.12008930292335354, + 0.42356896915502124 + ] + } + }, + "niah_results": { + "FP16": { + "4096": { + "hits": 4, + "n": 5, + "rate": 0.8, + "trials": [ + { + "ctx_len": 4096, + "depth": 0.1, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42. This pass", + "hit": true + }, + { + "ctx_len": 4096, + "depth": 0.3, + "actual_input_len": 4045, + "answer": "The secret passcode is not provided in the text you've given. The text only contains repet", + "hit": false + }, + { + "ctx_len": 4096, + "depth": 0.5, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42. This pass", + "hit": true + }, + { + "ctx_len": 4096, + "depth": 0.7, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42. This pass", + "hit": true + }, + { + "ctx_len": 4096, + "depth": 0.9, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42.", + "hit": true + } + ] + }, + "8192": { + "hits": 0, + "n": 5, + "rate": 0.0, + "trials": [ + { + "ctx_len": 8192, + "depth": 0.1, + "error": "yTorch, and 47.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + }, + { + "ctx_len": 8192, + "depth": 0.3, + "error": "yTorch, and 87.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + }, + { + "ctx_len": 8192, + "depth": 0.5, + "error": "yTorch, and 87.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + }, + { + "ctx_len": 8192, + "depth": 0.7, + "error": "yTorch, and 87.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + }, + { + "ctx_len": 8192, + "depth": 0.9, + "error": "yTorch, and 87.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + } + ] + } + }, + "KVarN_k4v2": { + "4096": { + "hits": 5, + "n": 5, + "rate": 1.0, + "trials": [ + { + "ctx_len": 4096, + "depth": 0.1, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42. This pass", + "hit": true + }, + { + "ctx_len": 4096, + "depth": 0.3, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42.", + "hit": true + }, + { + "ctx_len": 4096, + "depth": 0.5, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42. This pass", + "hit": true + }, + { + "ctx_len": 4096, + "depth": 0.7, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42.", + "hit": true + }, + { + "ctx_len": 4096, + "depth": 0.9, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42.", + "hit": true + } + ] + }, + "8192": { + "hits": 0, + "n": 5, + "rate": 0.0, + "trials": [ + { + "ctx_len": 8192, + "depth": 0.1, + "error": "yTorch, and 47.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + }, + { + "ctx_len": 8192, + "depth": 0.3, + "error": "yTorch, and 47.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + }, + { + "ctx_len": 8192, + "depth": 0.5, + "error": "yTorch, and 47.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + }, + { + "ctx_len": 8192, + "depth": 0.7, + "error": "yTorch, and 47.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + }, + { + "ctx_len": 8192, + "depth": 0.9, + "error": "yTorch, and 47.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + } + ] + } + }, + "NQ_K4V2_pb0": { + "4096": { + "hits": 4, + "n": 5, + "rate": 0.8, + "trials": [ + { + "ctx_len": 4096, + "depth": 0.1, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42. This pass", + "hit": true + }, + { + "ctx_len": 4096, + "depth": 0.3, + "actual_input_len": 4045, + "answer": "The secret passcode is not provided in the text you've given. The text only contains repet", + "hit": false + }, + { + "ctx_len": 4096, + "depth": 0.5, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42.", + "hit": true + }, + { + "ctx_len": 4096, + "depth": 0.7, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42.", + "hit": true + }, + { + "ctx_len": 4096, + "depth": 0.9, + "actual_input_len": 4045, + "answer": "The secret passcode is: 7X9K-MINT-42.", + "hit": true + } + ] + }, + "8192": { + "hits": 0, + "n": 5, + "rate": 0.0, + "trials": [ + { + "ctx_len": 8192, + "depth": 0.1, + "error": "yTorch, and 47.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + }, + { + "ctx_len": 8192, + "depth": 0.3, + "error": "yTorch, and 47.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + }, + { + "ctx_len": 8192, + "depth": 0.5, + "error": "yTorch, and 47.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + }, + { + "ctx_len": 8192, + "depth": 0.7, + "error": "yTorch, and 47.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + }, + { + "ctx_len": 8192, + "depth": 0.9, + "error": "yTorch, and 47.69 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)\n", + "hit": false + } + ] + } + } + }, + "base_fp16_ppl": 5.0513, + "n_segments": 161, + "model_config": { + "n_kv_heads": 8, + "head_dim": 128, + "rope_theta": 1000000.0, + "n_layers": 32 + }, + "gpu": { + "name": "Tesla T4", + "sm": "75", + "n_gpus": 2, + "total_gb": 14.6 + }, + "transformers": "5.12.1", + "run_at_utc": "2026-06-16T15:28:38Z", + "errors": {}, + "device_map": { + "model.embed_tokens": "0", + "model.layers.0": "0", + "model.layers.1": "0", + "model.layers.2": "0", + "model.layers.3": "0", + "model.layers.4": "0", + "model.layers.5": "0", + "model.layers.6": "0", + "model.layers.7": "0", + "model.layers.8": "0", + "model.layers.9": "0", + "model.layers.10": "0", + "model.layers.11": "0", + "model.layers.12": "0", + "model.layers.13": "0", + "model.layers.14": "0", + "model.layers.15": "0", + "model.layers.16": "1", + "model.layers.17": "1", + "model.layers.18": "1", + "model.layers.19": "1", + "model.layers.20": "1", + "model.layers.21": "1", + "model.layers.22": "1", + "model.layers.23": "1", + "model.layers.24": "1", + "model.layers.25": "1", + "model.layers.26": "1", + "model.layers.27": "1", + "model.layers.28": "1", + "model.layers.29": "1", + "model.layers.30": "1", + "model.layers.31": "1", + "model.norm": "1", + "model.rotary_emb": "1", + "lm_head": "1" + }, + "run_complete_utc": "2026-06-16T17:47:25Z" +} \ No newline at end of file diff --git a/paper/nexusquant.pdf b/paper/nexusquant.pdf index a9409cd..5fe8429 100644 Binary files a/paper/nexusquant.pdf and b/paper/nexusquant.pdf differ