From a2e01921f9c5443ea89ba21414f47ff158813a28 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Mon, 23 Jun 2025 14:35:16 -0400
Subject: [PATCH 01/42] start on binary format

---
 src/BinaryApplicator.cpp | 347 +++++++++++++++++++++++++++++++++++++++
 src/BinaryApplicator.hpp |  52 ++++++
 src/CMakeLists.txt       |   2 +
 src/FormatConverter.cpp  |  15 ++
 src/FormatConverter.hpp  |   3 +-
 src/cg-conv.cpp          |   6 +
 src/cg3.h                |   1 +
 src/options_conv.hpp     |   4 +
 8 files changed, 429 insertions(+), 1 deletion(-)
 create mode 100644 src/BinaryApplicator.cpp
 create mode 100644 src/BinaryApplicator.hpp

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
new file mode 100644
index 00000000..65d2f278
--- /dev/null
+++ b/src/BinaryApplicator.cpp
@@ -0,0 +1,347 @@
+/*
+* Copyright (C) 2007-2025, GrammarSoft ApS
+* Developed by Tino Didriksen <mail@tinodidriksen.com>
+* Design by Eckhard Bick <eckhard.bick@mail.dk>, Tino Didriksen <mail@tinodidriksen.com>
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this progam.  If not, see <https://www.gnu.org/licenses/>.
+*/
+
+#include "BinaryApplicator.hpp"
+#include "Grammar.hpp"
+
+namespace CG3 {
+
+BinaryApplicator::BinaryApplicator(std::ostream& ux_err)
+  : GrammarApplicator(ux_err)
+{
+}
+
+void BinaryApplicator::runGrammarOnText(std::istream& input, std::ostream& output) {
+  ux_stdin = &input;
+  ux_stdout = &output;
+
+  if (!input.good()) {
+    u_fprintf(ux_stderr, "Error: Input is null - nothing to parse!\n");
+    CG3Quit(1);
+  }
+  if (input.eof()) {
+    u_fprintf(ux_stderr, "Error: Input is empty - nothing to parse!\n");
+    CG3Quit(1);
+  }
+  if (!output) {
+    u_fprintf(ux_stderr, "Error: Output is null - cannot write to nothing!\n");
+    CG3Quit(1);
+  }
+
+  if (!grammar) {
+    u_fprintf(ux_stderr, "Error: No grammar provided - cannot continue! Hint: call setGrammar() first.\n");
+    CG3Quit(1);
+  }
+
+  index();
+
+  uint32_t resetAfter = ((num_windows + 4) * 2 + 1);
+  bool flushAfter = false;
+
+  gWindow->window_span = num_windows;
+
+  while (!input.eof()) {
+    flushAfter = readWindow();
+    gWindow->shuffleWindowsDown();
+    runGrammarOnWindow();
+    ++numWindows;
+    if (numWindows % resetAfter == 0) {
+      resetIndexes();
+    }
+    if (flushAfter) {
+      while (!gWindow->next.empty()) {
+	gWindow->shuffleWindowsDown();
+	runGrammarOnWindow();
+      }
+      gWindow->shuffleWindowsDown();
+      while (!gWindow->previous.empty()) {
+	SingleWindow* tmp = gWindow->previous.front();
+	printSingleWindow(tmp, output);
+	free_swindow(tmp);
+	gWindow->previous.erase(gWindow->previous.begin());
+      }
+    }
+  }
+}
+
+#define READ_U16_INTO(dest) \
+  do { \
+    (dest) = reinterpret_cast<uint16_t*>(&buf[pos])[0]; \
+    pos += 2; \
+  } while (false)
+
+#define READ_U32_INTO(dest) \
+  do { \
+    (dest) = reinterpret_cast<uint32_t*>(&buf[pos])[0]; \
+    pos += 4; \
+  } while (false)
+
+#define READ_STR_INTO(dest)			\
+  do { \
+    uint16_t tl = reinterpret_cast<uint16_t*>(&buf[pos])[0]; \
+    pos += 2; \
+    (dest).clear(); \
+    (dest).resize(tl, 0); \
+    int32_t olen = 0; \
+    UErrorCode status = U_ZERO_ERROR; \
+    u_strFromUTF8(&(dest)[0], tl, &olen, &buf[pos], tl, &status); \
+    (dest).resize(olen); \
+    pos += tl; \
+  } while (false)
+
+bool BinaryApplicator::readWindow() {
+  SingleWindow* cSWindow = gWindow->allocAppendSingleWindow();
+  
+  uint32_t cs = 0;
+  readRaw(*ux_stdin, cs);
+
+  if (ux_stdin->eof()) {
+    return true;
+  }
+
+  std::string buf(cs, 0);
+  ux_stdin->read(&buf[0], cs);
+  uint32_t pos = 0;
+
+  // TODO: flags
+  uint16_t flags;
+  READ_U16_INTO(flags);
+  if (flags & BFW_FLUSH) {
+    cSWindow->flush_after = true;
+  }
+
+  TagVector window_tags;
+  uint16_t tag_count;
+  READ_U16_INTO(tag_count);
+  for (uint16_t i = 0; i < tag_count; i++) {
+    UString tg;
+    READ_STR_INTO(tg);
+    u_fprintf(ux_stderr, "pos = %u, tg = %S, i = %u / %u\n", pos, tg.data(), i, tag_count);
+    window_tags.push_back(addTag(tg));
+  }
+
+  uint16_t var_count;
+  READ_U16_INTO(var_count);
+  // TODO
+
+  READ_STR_INTO(cSWindow->text);
+  READ_STR_INTO(cSWindow->text_post);
+
+  uint16_t cohort_count;
+  READ_U16_INTO(cohort_count);
+  uint16_t tag;
+  for (uint16_t cn = 0; cn < cohort_count; cn++) {
+    Cohort* cCohort = alloc_cohort(cSWindow);
+    cCohort->global_number = gWindow->cohort_counter++;
+
+    READ_U16_INTO(flags);
+    /*if (flags & BFC_DELETED) {
+      cCohort->type |= CT_DELETED;
+      }*/
+
+    READ_U16_INTO(tag);
+    cCohort->wordform = window_tags[tag];
+
+    READ_U16_INTO(tag_count);
+    if (tag_count) {
+      cCohort->wread = alloc_reading(cCohort);
+      for (uint16_t tn = 0; tn < tag_count; tn++) {
+	READ_U16_INTO(tag);
+	addTagToReading(*cCohort->wread, window_tags[tag]);
+      }
+    }
+
+    READ_U32_INTO(cCohort->dep_self);
+    READ_U32_INTO(cCohort->dep_parent);
+
+    READ_STR_INTO(cCohort->text);
+    READ_STR_INTO(cCohort->wblank);
+
+    uint16_t reading_count;
+    READ_U16_INTO(reading_count);
+    Reading* prev = nullptr;
+    for (uint16_t rn = 0; rn < reading_count; rn++) {
+      Reading* cReading = alloc_reading(cCohort);
+      addTagToReading(*cReading, cCohort->wordform);
+
+      READ_U16_INTO(flags);
+      if (flags & BFR_DELETED) {
+	cReading->deleted = 1;
+      }
+
+      READ_U16_INTO(tag_count);
+      for (uint16_t tn = 0; tn < tag_count; tn++) {
+	READ_U16_INTO(tag);
+	addTagToReading(*cReading, window_tags[tag]);
+      }
+      
+      if (prev && flags & BFR_SUBREADING) {
+	prev->next = cReading;
+      }
+      else {
+	cCohort->appendReading(cReading);
+      }
+      prev = cReading;
+    }
+  }
+
+  return cSWindow->flush_after;
+}
+
+#define WRITE_U16_INTO(n, buffer) \
+  do { \
+    std::string tmp(2, 0);	       \
+    uint16_t tmp_n = (n); \
+    tmp.assign(reinterpret_cast<char*>(&tmp_n), 2);	\
+    (buffer) += tmp; \
+  } while (false)
+
+#define WRITE_U32_INTO(n, buffer) \
+  do { \
+    std::string tmp(4, 0);	       \
+    uint32_t tmp_n = (n); \
+    tmp.assign(reinterpret_cast<char*>(&tmp_n), 4);	\
+    (buffer) += tmp; \
+  } while (false)
+
+#define WRITE_TAG_INTO(tag_, buffer) \
+  do { \
+    if (tag_index.find((tag_)) == tag_index.end()) { \
+      tag_index[(tag_)] = tags_to_write.size(); \
+      tags_to_write.push_back((tag_)); \
+      u_fprintf(ux_stderr, "adding tag %S\n", (tag_)->tag.data());	\
+    } \
+    WRITE_U16_INTO(tag_index[(tag_)], buffer); \
+  } while (false)
+
+#define WRITE_STR_INTO(s, buffer) \
+  do { \
+    std::string tmp((s).size() * 4, 0);		\
+    int32_t olen = 0; \
+    UErrorCode status = U_ZERO_ERROR; \
+    u_strToUTF8(&tmp[0], SI32((s).size() * 4 - 1), &olen, (s).data(), SI32((s).size()), &status); \
+    tmp.resize(olen); \
+    WRITE_U16_INTO(UI16(olen), (buffer)); \
+    (buffer) += tmp; \
+  } while (false)
+
+void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& output, bool profiling) {
+  TagVector tags_to_write;
+  std::map<Tag*, uint32_t> tag_index;
+
+  std::string cohort_buffer;
+  uint16_t cohort_count = 0;
+  for (auto& cohort : window->all_cohorts) {
+    if (cohort->local_number == 0 || (cohort->type & CT_REMOVED)) {
+      continue;
+    }
+    cohort_count++;
+
+    uint16_t flags = 0;
+    WRITE_U16_INTO(flags, cohort_buffer);
+
+    WRITE_TAG_INTO(cohort->wordform, cohort_buffer);
+    if (cohort->wread) {
+      std::string tag_buffer;
+      uint16_t tag_count = 0;
+      for (auto tter : cohort->wread->tags_list) {
+	if (tter == cohort->wordform->hash) {
+	  continue;
+	}
+	WRITE_TAG_INTO(grammar->single_tags[tter], tag_buffer);
+	tag_count++;
+      }
+      WRITE_U16_INTO(tag_count, cohort_buffer);
+      cohort_buffer += tag_buffer;
+    }
+    else {
+      WRITE_U16_INTO(0, cohort_buffer);
+    }
+
+    WRITE_U32_INTO(cohort->dep_self, cohort_buffer);
+    WRITE_U32_INTO(cohort->dep_parent, cohort_buffer);
+
+    WRITE_STR_INTO(cohort->text, cohort_buffer);
+    WRITE_STR_INTO(cohort->wblank, cohort_buffer);
+    
+    std::string reading_buffer;
+    uint16_t reading_count = 0;
+    std::sort(cohort->readings.begin(), cohort->readings.end(), Reading::cmp_number);
+    for (auto top_reading : cohort->readings) {
+      if (top_reading->noprint) {
+	continue;
+      }
+      auto reading = top_reading;
+      while (reading) {
+	reading_count++;
+	uint16_t flags = 0;
+	if (reading != top_reading) {
+	  flags |= BFR_SUBREADING;
+	}
+	std::string tag_buffer;
+	uint16_t tag_count = 0;
+	if (reading->baseform) {
+	  WRITE_TAG_INTO(grammar->single_tags[reading->baseform], tag_buffer);
+	  tag_count++;
+	}
+	for (auto& tter : reading->tags_list) {
+	  auto tag = grammar->single_tags[tter];
+	  if (tag->type & T_BASEFORM) {
+	    continue;
+	  }
+	  WRITE_TAG_INTO(tag, tag_buffer);
+	  tag_count++;
+	}
+	WRITE_U16_INTO(tag_count, reading_buffer);
+	reading_buffer += tag_buffer;
+	reading = reading->next;
+      }
+    }
+    WRITE_U16_INTO(reading_count, cohort_buffer);
+    cohort_buffer += reading_buffer;
+  }
+  
+  std::string header_buffer;
+
+  uint16_t flags = 0;
+  if (window->flush_after) {
+    flags |= BFW_FLUSH;
+  }
+  WRITE_U16_INTO(flags, header_buffer);
+
+  WRITE_U16_INTO(tags_to_write.size(), header_buffer);
+  for (auto& tag : tags_to_write) {
+    WRITE_STR_INTO(tag->tag, header_buffer);
+  }
+
+  // TODO: variables
+  WRITE_U16_INTO(0, header_buffer);
+
+  WRITE_STR_INTO(window->text, header_buffer);
+  WRITE_STR_INTO(window->text_post, header_buffer);
+
+  WRITE_U16_INTO(cohort_count, header_buffer);
+
+  uint32_t total_size = header_buffer.size() + cohort_buffer.size();
+  writeRaw(output, total_size);
+  output.write(header_buffer.data(), header_buffer.size());
+  output.write(cohort_buffer.data(), cohort_buffer.size());
+  output.flush();
+}
+}
diff --git a/src/BinaryApplicator.hpp b/src/BinaryApplicator.hpp
new file mode 100644
index 00000000..1fcef4a9
--- /dev/null
+++ b/src/BinaryApplicator.hpp
@@ -0,0 +1,52 @@
+/*
+* Copyright (C) 2007-2025, GrammarSoft ApS
+* Developed by Tino Didriksen <mail@tinodidriksen.com>
+* Design by Eckhard Bick <eckhard.bick@mail.dk>, Tino Didriksen <mail@tinodidriksen.com>
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation, either version 3 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this progam.  If not, see <https://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+#ifndef GRAMMARAPPLICATORBINARY_H
+#define GRAMMARAPPLICATORBINARY_H
+
+#include "GrammarApplicator.hpp"
+
+namespace CG3 {
+
+enum BinaryFormatFlags {
+  // Window
+  BFW_FLUSH      = (1 << 1),
+  // Cohort
+  BFC_DELETED    = (1 << 1),
+  // Reading
+  BFR_SUBREADING = (1 << 1),
+  BFR_DELETED    = (1 << 2),
+};
+
+class BinaryApplicator : public virtual GrammarApplicator {
+public:
+  BinaryApplicator(std::ostream& ux_err);
+
+  void runGrammarOnText(std::istream& input, std::ostream& output);
+
+protected:
+  void printSingleWindow(SingleWindow* window, std::ostream& output, bool profiling = false) override;
+
+private:
+  bool readWindow();
+};
+}
+
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c3466559..cd4b2256 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -80,6 +80,8 @@ set(LIBCG3_SOURCES
 	AST.hpp
 	ApertiumApplicator.cpp
 	ApertiumApplicator.hpp
+	BinaryApplicator.cpp
+	BinaryApplicator.hpp
 	BinaryGrammar.cpp
 	BinaryGrammar.hpp
 	BinaryGrammar_read.cpp
diff --git a/src/FormatConverter.cpp b/src/FormatConverter.cpp
index f88f07b6..62fae5cc 100644
--- a/src/FormatConverter.cpp
+++ b/src/FormatConverter.cpp
@@ -96,6 +96,7 @@ cg3_sformat detectFormat(std::string_view buf8) {
 FormatConverter::FormatConverter(std::ostream& ux_err)
   : GrammarApplicator(ux_err)
   , ApertiumApplicator(ux_err)
+  , BinaryApplicator(ux_err)
   , FSTApplicator(ux_err)
   , JsonlApplicator(ux_err)
   , MatxinApplicator(ux_err)
@@ -151,6 +152,10 @@ void FormatConverter::runGrammarOnText(std::istream& input, std::ostream& output
 		JsonlApplicator::runGrammarOnText(input, output);
 		break;
 	}
+	case CG3SF_BINARY: {
+		BinaryApplicator::runGrammarOnText(input, output);
+		break;
+	}
 	default:
 		CG3Quit();
 	}
@@ -182,6 +187,8 @@ void FormatConverter::printCohort(Cohort* cohort, std::ostream& output, bool pro
 		JsonlApplicator::printCohort(cohort, output, profiling);
 		break;
 	}
+	case CG3SF_BINARY:
+		break;
 	default:
 		CG3Quit();
 	}
@@ -213,6 +220,10 @@ void FormatConverter::printSingleWindow(SingleWindow* window, std::ostream& outp
 		JsonlApplicator::printSingleWindow(window, output, profiling);
 		break;
 	}
+	case CG3SF_BINARY: {
+		BinaryApplicator::printSingleWindow(window, output, profiling);
+		break;
+	}
 	default:
 		CG3Quit();
 }
@@ -224,6 +235,8 @@ void FormatConverter::printStreamCommand(UStringView cmd, std::ostream& output)
 		JsonlApplicator::printStreamCommand(cmd, output);
 		break;
 	}
+	case CG3SF_BINARY:
+		break;
 	case CG3SF_CG:
 	case CG3SF_APERTIUM:
 	case CG3SF_FST:
@@ -242,6 +255,8 @@ void FormatConverter::printPlainTextLine(UStringView line, std::ostream& output)
 		JsonlApplicator::printPlainTextLine(line, output);
 		break;
 	}
+	case CG3SF_BINARY:
+		break;
 	case CG3SF_CG:
 	case CG3SF_APERTIUM:
 	case CG3SF_FST:
diff --git a/src/FormatConverter.hpp b/src/FormatConverter.hpp
index 61c93730..16f6cff6 100644
--- a/src/FormatConverter.hpp
+++ b/src/FormatConverter.hpp
@@ -22,6 +22,7 @@
 #define c6d28b7452ec699b_FORMATCONVERTER_H
 
 #include "ApertiumApplicator.hpp"
+#include "BinaryApplicator.hpp"
 #include "FSTApplicator.hpp"
 #include "JsonlApplicator.hpp"
 #include "MatxinApplicator.hpp"
@@ -34,7 +35,7 @@ namespace CG3 {
 
 cg3_sformat detectFormat(std::string_view str);
 
-class FormatConverter : public ApertiumApplicator, public FSTApplicator, public JsonlApplicator, public MatxinApplicator, public NicelineApplicator, public PlaintextApplicator {
+class FormatConverter : public ApertiumApplicator, public BinaryApplicator, public FSTApplicator, public JsonlApplicator, public MatxinApplicator, public NicelineApplicator, public PlaintextApplicator {
 public:
 	FormatConverter(std::ostream& ux_err);
 
diff --git a/src/cg-conv.cpp b/src/cg-conv.cpp
index d136ff16..e88bb122 100644
--- a/src/cg-conv.cpp
+++ b/src/cg-conv.cpp
@@ -151,6 +151,9 @@ int main(int argc, char* argv[]) {
 	else if (options_conv[IN_JSONL].doesOccur) {
 		fmt = CG3SF_JSONL;
 	}
+	else if (options_conv[IN_BINARY].doesOccur) {
+		fmt = CG3SF_BINARY;
+	}
 
 	if (options_conv[IN_AUTO].doesOccur || fmt == CG3SF_INVALID) {
 		_instream = applicator.detectFormat(std::cin);
@@ -210,6 +213,9 @@ int main(int argc, char* argv[]) {
 	else if (options_conv[OUT_JSONL].doesOccur) {
 		applicator.fmt_output = CG3SF_JSONL;
 	}
+	else if (options_conv[OUT_BINARY].doesOccur) {
+		applicator.fmt_output = CG3SF_BINARY;
+	}
 
 	if (options_conv[UNICODE_TAGS].doesOccur) {
 		applicator.unicode_tags = true;
diff --git a/src/cg3.h b/src/cg3.h
index abdc0863..27ee497a 100644
--- a/src/cg3.h
+++ b/src/cg3.h
@@ -80,6 +80,7 @@ typedef enum {
 	CG3SF_FST,
 	CG3SF_PLAIN,
 	CG3SF_JSONL,
+	CG3SF_BINARY,
 } cg3_sformat;
 
 // Default usage: if (!cg3_init(stdin, stdout, stderr)) { exit(1); }
diff --git a/src/options_conv.hpp b/src/options_conv.hpp
index da1bef57..c56e9f7f 100644
--- a/src/options_conv.hpp
+++ b/src/options_conv.hpp
@@ -41,6 +41,7 @@ enum OPTIONS {
 	IN_FST,
 	IN_PLAIN,
 	IN_JSONL,
+	IN_BINARY,
 	ADD_TAGS,
 	OUT_CG,
 	OUT_CG2,
@@ -50,6 +51,7 @@ enum OPTIONS {
 	OUT_NICELINE,
 	OUT_PLAIN,
 	OUT_JSONL,
+	OUT_BINARY,
 	FST_WFACTOR,
 	FST_WTAG,
 	SUB_DELIMITER,
@@ -75,6 +77,7 @@ std::array<UOption, NUM_OPTIONS_CONV> options_conv{
 	UOption{"in-fst",       'f', UOPT_NO_ARG,       "sets input format to HFST/XFST"},
 	UOption{"in-plain",     'x', UOPT_NO_ARG,       "sets input format to plain text"},
 	UOption{"in-jsonl",     'j', UOPT_NO_ARG,       "sets input format to JSONL (experimental, specs below)"},
+	UOption{"in-binary",    'z', UOPT_NO_ARG,       "sets input format to binary (experimental)"},
 	UOption{"add-tags",       0, UOPT_NO_ARG,       "adds minimal analysis to readings (implies -x)"},
 	UOption{"out-cg",       'C', UOPT_NO_ARG,       "sets output format to CG (default)"},
 	UOption{"V",            'V', UOPT_NO_ARG},
@@ -84,6 +87,7 @@ std::array<UOption, NUM_OPTIONS_CONV> options_conv{
 	UOption{"out-niceline", 'N', UOPT_NO_ARG,       "sets output format to Niceline CG"},
 	UOption{"out-plain",    'X', UOPT_NO_ARG,       "sets output format to plain text"},
 	UOption{"out-jsonl",    'J', UOPT_NO_ARG,       "sets output format to JSONL (experimental, specs below)"},
+	UOption{"out-binary",   'Z', UOPT_NO_ARG,       "sets output format to binary (experimental)"},
 	UOption{"wfactor",      'W', UOPT_REQUIRES_ARG, "FST weight factor (defaults to 1.0)"},
 	UOption{"wtag",           0, UOPT_REQUIRES_ARG, "FST weight tag prefix (defaults to W)"},
 	UOption{"sub-delim",    'S', UOPT_REQUIRES_ARG, "FST sub-reading delimiters (defaults to #)"},

From db2ac151335b74c09c3b38fa79cf9ef0d85207fb Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Mon, 23 Jun 2025 21:52:20 -0400
Subject: [PATCH 02/42] assorted fixes

---
 src/BinaryApplicator.cpp | 80 ++++++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 31 deletions(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 65d2f278..4ff5df07 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -56,28 +56,41 @@ void BinaryApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
 
   gWindow->window_span = num_windows;
 
+  auto flush = [&]() {
+    if (gWindow->back()) {
+      gWindow->back()->flush_after = true;
+    }
+
+    while (!gWindow->next.empty()) {
+      gWindow->shuffleWindowsDown();
+      runGrammarOnWindow();
+    }
+
+    gWindow->shuffleWindowsDown();
+    while (!gWindow->previous.empty()) {
+      SingleWindow* tmp = gWindow->previous.front();
+      printSingleWindow(tmp, output);
+      free_swindow(tmp);
+      gWindow->previous.erase(gWindow->previous.begin());
+    }
+    flushAfter = false;
+  };
+
   while (!input.eof()) {
     flushAfter = readWindow();
-    gWindow->shuffleWindowsDown();
-    runGrammarOnWindow();
     ++numWindows;
-    if (numWindows % resetAfter == 0) {
-      resetIndexes();
-    }
-    if (flushAfter) {
-      while (!gWindow->next.empty()) {
-	gWindow->shuffleWindowsDown();
-	runGrammarOnWindow();
-      }
+    if (gWindow->next.size() > num_windows) {
       gWindow->shuffleWindowsDown();
-      while (!gWindow->previous.empty()) {
-	SingleWindow* tmp = gWindow->previous.front();
-	printSingleWindow(tmp, output);
-	free_swindow(tmp);
-	gWindow->previous.erase(gWindow->previous.begin());
+      runGrammarOnWindow();
+      if (numWindows % resetAfter == 0) {
+	resetIndexes();
       }
     }
+    if (flushAfter) {
+      flush();
+    }
   }
+  flush();
 }
 
 #define READ_U16_INTO(dest) \
@@ -107,6 +120,7 @@ void BinaryApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
 
 bool BinaryApplicator::readWindow() {
   SingleWindow* cSWindow = gWindow->allocAppendSingleWindow();
+  initEmptySingleWindow(cSWindow);
   
   uint32_t cs = 0;
   readRaw(*ux_stdin, cs);
@@ -132,7 +146,6 @@ bool BinaryApplicator::readWindow() {
   for (uint16_t i = 0; i < tag_count; i++) {
     UString tg;
     READ_STR_INTO(tg);
-    u_fprintf(ux_stderr, "pos = %u, tg = %S, i = %u / %u\n", pos, tg.data(), i, tag_count);
     window_tags.push_back(addTag(tg));
   }
 
@@ -149,6 +162,7 @@ bool BinaryApplicator::readWindow() {
   for (uint16_t cn = 0; cn < cohort_count; cn++) {
     Cohort* cCohort = alloc_cohort(cSWindow);
     cCohort->global_number = gWindow->cohort_counter++;
+    numCohorts++;
 
     READ_U16_INTO(flags);
     /*if (flags & BFC_DELETED) {
@@ -181,9 +195,9 @@ bool BinaryApplicator::readWindow() {
       addTagToReading(*cReading, cCohort->wordform);
 
       READ_U16_INTO(flags);
-      if (flags & BFR_DELETED) {
-	cReading->deleted = 1;
-      }
+
+      READ_U16_INTO(tag);
+      cReading->baseform = window_tags[tag]->hash;
 
       READ_U16_INTO(tag_count);
       for (uint16_t tn = 0; tn < tag_count; tn++) {
@@ -191,14 +205,21 @@ bool BinaryApplicator::readWindow() {
 	addTagToReading(*cReading, window_tags[tag]);
       }
       
-      if (prev && flags & BFR_SUBREADING) {
+      if (prev && (flags & BFR_SUBREADING)) {
 	prev->next = cReading;
       }
+      else if (flags & BFR_DELETED) {
+	cCohort->deleted.push_back(cReading);
+      }
       else {
 	cCohort->appendReading(cReading);
       }
       prev = cReading;
+      ++numReadings;
     }
+
+    insert_if_exists(cCohort->possible_sets, grammar->sets_any);
+    cSWindow->appendCohort(cCohort);
   }
 
   return cSWindow->flush_after;
@@ -220,14 +241,13 @@ bool BinaryApplicator::readWindow() {
     (buffer) += tmp; \
   } while (false)
 
-#define WRITE_TAG_INTO(tag_, buffer) \
+#define WRITE_TAG_INTO(tag, buffer) \
   do { \
-    if (tag_index.find((tag_)) == tag_index.end()) { \
-      tag_index[(tag_)] = tags_to_write.size(); \
-      tags_to_write.push_back((tag_)); \
-      u_fprintf(ux_stderr, "adding tag %S\n", (tag_)->tag.data());	\
+    if (tag_index.find((tag)) == tag_index.end()) { \
+      tag_index[(tag)] = tags_to_write.size(); \
+      tags_to_write.push_back((tag)); \
     } \
-    WRITE_U16_INTO(tag_index[(tag_)], buffer); \
+    WRITE_U16_INTO(tag_index[(tag)], buffer); \
   } while (false)
 
 #define WRITE_STR_INTO(s, buffer) \
@@ -294,15 +314,13 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 	if (reading != top_reading) {
 	  flags |= BFR_SUBREADING;
 	}
+	WRITE_U16_INTO(flags, reading_buffer);
+	WRITE_TAG_INTO(grammar->single_tags[reading->baseform], reading_buffer);
 	std::string tag_buffer;
 	uint16_t tag_count = 0;
-	if (reading->baseform) {
-	  WRITE_TAG_INTO(grammar->single_tags[reading->baseform], tag_buffer);
-	  tag_count++;
-	}
 	for (auto& tter : reading->tags_list) {
 	  auto tag = grammar->single_tags[tter];
-	  if (tag->type & T_BASEFORM) {
+	  if ((tag->type & T_WORDFORM) || (tag->type & T_BASEFORM)) {
 	    continue;
 	  }
 	  WRITE_TAG_INTO(tag, tag_buffer);

From ff5b4385d7a1921f6a306a6b3795bc35d3fad6d5 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Tue, 24 Jun 2025 09:58:03 -0400
Subject: [PATCH 03/42] stream header and detection

---
 src/BinaryApplicator.cpp | 37 +++++++++++++++++++++++++++++++------
 src/FormatConverter.cpp  |  5 +++++
 src/inlines.hpp          |  5 +++++
 src/version.hpp          |  1 +
 4 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 4ff5df07..1f550ec8 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -19,6 +19,7 @@
 
 #include "BinaryApplicator.hpp"
 #include "Grammar.hpp"
+#include "version.hpp"
 
 namespace CG3 {
 
@@ -49,6 +50,23 @@ void BinaryApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
     CG3Quit(1);
   }
 
+  {
+    std::string header(8, 0);
+    if (!input.read(&header[0], 8)) {
+      u_fprintf(ux_stderr, "Error: Could not read stream header!\n");
+      CG3Quit(1);
+    }
+    if (!is_cg3bsf(header)) {
+      u_fprintf(ux_stderr, "Error: Stream does not start with magic bytes - cannot read as binary!\n");
+      CG3Quit(1);
+    }
+    uint32_t version = reinterpret_cast<uint32_t*>(&header[4])[0];
+    if (version != CG3_BINARY_STREAM) {
+      u_fprintf(ux_stderr, "Error: Stream is version %u but this reader only knows version %u!\n", version, CG3_BINARY_STREAM);
+      CG3Quit(1);
+    }
+  }
+
   index();
 
   uint32_t resetAfter = ((num_windows + 4) * 2 + 1);
@@ -119,9 +137,6 @@ void BinaryApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
   } while (false)
 
 bool BinaryApplicator::readWindow() {
-  SingleWindow* cSWindow = gWindow->allocAppendSingleWindow();
-  initEmptySingleWindow(cSWindow);
-  
   uint32_t cs = 0;
   readRaw(*ux_stdin, cs);
 
@@ -129,6 +144,9 @@ bool BinaryApplicator::readWindow() {
     return true;
   }
 
+  SingleWindow* cSWindow = gWindow->allocAppendSingleWindow();
+  initEmptySingleWindow(cSWindow);
+
   std::string buf(cs, 0);
   ux_stdin->read(&buf[0], cs);
   uint32_t pos = 0;
@@ -204,7 +222,7 @@ bool BinaryApplicator::readWindow() {
 	READ_U16_INTO(tag);
 	addTagToReading(*cReading, window_tags[tag]);
       }
-      
+
       if (prev && (flags & BFR_SUBREADING)) {
 	prev->next = cReading;
       }
@@ -262,6 +280,13 @@ bool BinaryApplicator::readWindow() {
   } while (false)
 
 void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& output, bool profiling) {
+  if (window->number == 1) {
+    output.write("CGBF", 4);
+    std::string version;
+    WRITE_U32_INTO(CG3_BINARY_STREAM, version);
+    output.write(version.data(), 4);
+  }
+
   TagVector tags_to_write;
   std::map<Tag*, uint32_t> tag_index;
 
@@ -299,7 +324,7 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 
     WRITE_STR_INTO(cohort->text, cohort_buffer);
     WRITE_STR_INTO(cohort->wblank, cohort_buffer);
-    
+
     std::string reading_buffer;
     uint16_t reading_count = 0;
     std::sort(cohort->readings.begin(), cohort->readings.end(), Reading::cmp_number);
@@ -334,7 +359,7 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
     WRITE_U16_INTO(reading_count, cohort_buffer);
     cohort_buffer += reading_buffer;
   }
-  
+
   std::string header_buffer;
 
   uint16_t flags = 0;
diff --git a/src/FormatConverter.cpp b/src/FormatConverter.cpp
index 62fae5cc..c6715aa1 100644
--- a/src/FormatConverter.cpp
+++ b/src/FormatConverter.cpp
@@ -28,6 +28,11 @@ cg3_sformat detectFormat(std::string_view buf8) {
 	cg3_sformat fmt = CG3SF_INVALID;
 	UErrorCode status = U_ZERO_ERROR;
 
+	if (is_cg3bsf(buf8)) {
+		fmt = CG3SF_BINARY;
+		return fmt;
+	}
+
 	UString buffer(BUF_SIZE, 0);
 	int32_t nr = 0;
 	u_strFromUTF8(&buffer[0], BUF_SIZE, &nr, buf8.data(), SI32(buf8.size()), &status);
diff --git a/src/inlines.hpp b/src/inlines.hpp
index 66a890bc..7db76b32 100644
--- a/src/inlines.hpp
+++ b/src/inlines.hpp
@@ -475,6 +475,11 @@ inline bool is_cg3b(const S& s) {
 	return (s[0] == 'C' && s[1] == 'G' && s[2] == '3' && s[3] == 'B');
 }
 
+template<typename S>
+inline bool is_cg3bsf(const S& s) {
+	return (s[0] == 'C' && s[1] == 'G' && s[2] == 'B' && s[3] == 'F');
+}
+
 inline void insert_if_exists(boost::dynamic_bitset<>& cont, const boost::dynamic_bitset<>* other) {
 	if (other && !other->empty()) {
 		cont.resize(std::max(cont.size(), other->size()));
diff --git a/src/version.hpp b/src/version.hpp
index f16b307c..ced0348e 100644
--- a/src/version.hpp
+++ b/src/version.hpp
@@ -32,5 +32,6 @@ constexpr uint32_t CG3_REVISION = 13898;
 constexpr uint32_t CG3_FEATURE_REV = 13898;
 constexpr uint32_t CG3_TOO_OLD = 10373;
 constexpr uint32_t CG3_EXTERNAL_PROTOCOL = 7226;
+constexpr uint32_t CG3_BINARY_STREAM = 1;
 
 #endif

From 56d68e635a10aa16b0fdcc521aa3c98ed2f212cd Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Thu, 26 Jun 2025 21:02:52 -0400
Subject: [PATCH 04/42] variables; account for ID updates

---
 src/BinaryApplicator.cpp | 81 +++++++++++++++++++++++++++++++++++-----
 src/BinaryApplicator.hpp | 23 ++++++++----
 2 files changed, 86 insertions(+), 18 deletions(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 1f550ec8..e7391dfe 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -92,6 +92,7 @@ void BinaryApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
       gWindow->previous.erase(gWindow->previous.begin());
     }
     flushAfter = false;
+	id_updates.clear();
   };
 
   while (!input.eof()) {
@@ -169,11 +170,36 @@ bool BinaryApplicator::readWindow() {
 
   uint16_t var_count;
   READ_U16_INTO(var_count);
-  // TODO
+  for (uint16_t vn = 0; vn < var_count; vn++) {
+	  char mode = buf[pos];
+	  pos++;
+	  uint16_t tag1, tag2;
+	  READ_U16_INTO(tag1);
+	  READ_U16_INTO(tag2);
+	  auto hash1 = window_tags[tag1]->hash;
+	  if (mode == BFV_SETVAR) {
+		  cSWindow->variables_set[hash1] = window_tags[tag2]->hash;
+		  cSWindow->variables_rem.erase(hash1);
+		  cSWindow->variables_output.insert(hash1);
+	  }
+	  else if (mode == BFV_SETVAR_ANY) {
+		  cSWindow->variables_set[hash1] = grammar->tag_any;
+		  cSWindow->variables_rem.erase(hash1);
+		  cSWindow->variables_output.insert(hash1);
+	  }
+	  else if (mode == BFV_REMVAR) {
+		  cSWindow->variables_set.erase(hash1);
+		  cSWindow->variables_rem.insert(hash1);
+		  cSWindow->variables_output.insert(hash1);
+	  }
+  }
 
   READ_STR_INTO(cSWindow->text);
   READ_STR_INTO(cSWindow->text_post);
 
+  uint32_t id_start = max_input_id;
+  uint32_t offset = gWindow->cohort_counter - max_input_id;
+
   uint16_t cohort_count;
   READ_U16_INTO(cohort_count);
   uint16_t tag;
@@ -192,15 +218,25 @@ bool BinaryApplicator::readWindow() {
 
     READ_U16_INTO(tag_count);
     if (tag_count) {
-      cCohort->wread = alloc_reading(cCohort);
-      for (uint16_t tn = 0; tn < tag_count; tn++) {
-	READ_U16_INTO(tag);
-	addTagToReading(*cCohort->wread, window_tags[tag]);
-      }
+		cCohort->wread = alloc_reading(cCohort);
+		for (uint16_t tn = 0; tn < tag_count; tn++) {
+			READ_U16_INTO(tag);
+			addTagToReading(*cCohort->wread, window_tags[tag]);
+		}
     }
 
-    READ_U32_INTO(cCohort->dep_self);
-    READ_U32_INTO(cCohort->dep_parent);
+	uint32_t self, parent;
+    READ_U32_INTO(self);
+    READ_U32_INTO(parent);
+	if (self > max_input_id) {
+		max_input_id = self;
+	}
+	if (parent != DEP_NO_PARENT) {
+		cCohort->dep_parent = parent + offset;
+	}
+	if (flags & BFC_RIGHTWARD_REL) {
+		id_updates[self] = cCohort->global_number;
+	}
 
     READ_STR_INTO(cCohort->text);
     READ_STR_INTO(cCohort->wblank);
@@ -290,6 +326,31 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
   TagVector tags_to_write;
   std::map<Tag*, uint32_t> tag_index;
 
+  uint16_t var_count = 0;
+  std::string var_buffer;
+  for (auto var : window->variables_output) {
+	  var_count++;
+	  Tag* key = grammar->single_tags[var];
+	  auto iter = window->variables_set.find(var);
+	  if (iter != window->variables_set.end()) {
+		  if (iter->second != grammar->tag_any) {
+			  var_buffer += static_cast<char>(BFV_SETVAR);
+			  WRITE_TAG_INTO(key, var_buffer);
+			  WRITE_TAG_INTO(grammar->single_tags[iter->second], var_buffer);
+		  }
+		  else {
+			  var_buffer += static_cast<char>(BFV_SETVAR_ANY);
+			  WRITE_TAG_INTO(key, var_buffer);
+			  WRITE_U16_INTO(0, var_buffer);
+		  }
+	  }
+	  else {
+		  var_buffer += static_cast<char>(BFV_REMVAR);
+		  WRITE_TAG_INTO(key, var_buffer);
+		  WRITE_U16_INTO(0, var_buffer);
+	  }
+  }
+
   std::string cohort_buffer;
   uint16_t cohort_count = 0;
   for (auto& cohort : window->all_cohorts) {
@@ -373,8 +434,8 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
     WRITE_STR_INTO(tag->tag, header_buffer);
   }
 
-  // TODO: variables
-  WRITE_U16_INTO(0, header_buffer);
+  WRITE_U16_INTO(var_count, header_buffer);
+  header_buffer += var_buffer;
 
   WRITE_STR_INTO(window->text, header_buffer);
   WRITE_STR_INTO(window->text_post, header_buffer);
diff --git a/src/BinaryApplicator.hpp b/src/BinaryApplicator.hpp
index 1fcef4a9..35d0e691 100644
--- a/src/BinaryApplicator.hpp
+++ b/src/BinaryApplicator.hpp
@@ -26,13 +26,18 @@
 namespace CG3 {
 
 enum BinaryFormatFlags {
-  // Window
-  BFW_FLUSH      = (1 << 1),
-  // Cohort
-  BFC_DELETED    = (1 << 1),
-  // Reading
-  BFR_SUBREADING = (1 << 1),
-  BFR_DELETED    = (1 << 2),
+	// Window
+	BFW_FLUSH         = (1 << 1),
+	// Cohort
+	BFC_DELETED       = (1 << 1),
+	BFC_RIGHTWARD_REL = (1 << 2),
+	// Reading
+	BFR_SUBREADING    = (1 << 1),
+	BFR_DELETED       = (1 << 2),
+	// Variables
+	BFV_SETVAR        = 1,
+	BFV_SETVAR_ANY    = 2,
+	BFV_REMVAR        = 3,
 };
 
 class BinaryApplicator : public virtual GrammarApplicator {
@@ -45,7 +50,9 @@ class BinaryApplicator : public virtual GrammarApplicator {
   void printSingleWindow(SingleWindow* window, std::ostream& output, bool profiling = false) override;
 
 private:
-  bool readWindow();
+	bool readWindow();
+	uint32_t max_input_id = 0;
+	std::map<uint32_t, uint32_t> id_updates;
 };
 }
 

From 60a5e3d4a7aa117dab361ed497cacf3ff3902565 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Fri, 27 Jun 2025 11:03:16 -0400
Subject: [PATCH 05/42] start on relations

---
 src/BinaryApplicator.cpp | 132 +++++++++++++++++++++++----------------
 src/BinaryApplicator.hpp |   5 +-
 src/FormatConverter.cpp  |   4 ++
 src/cg-proc.cpp          |  11 +++-
 4 files changed, 92 insertions(+), 60 deletions(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index e7391dfe..7b7045b2 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -92,7 +92,6 @@ void BinaryApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
       gWindow->previous.erase(gWindow->previous.begin());
     }
     flushAfter = false;
-	id_updates.clear();
   };
 
   while (!input.eof()) {
@@ -197,9 +196,6 @@ bool BinaryApplicator::readWindow() {
   READ_STR_INTO(cSWindow->text);
   READ_STR_INTO(cSWindow->text_post);
 
-  uint32_t id_start = max_input_id;
-  uint32_t offset = gWindow->cohort_counter - max_input_id;
-
   uint16_t cohort_count;
   READ_U16_INTO(cohort_count);
   uint16_t tag;
@@ -209,9 +205,10 @@ bool BinaryApplicator::readWindow() {
     numCohorts++;
 
     READ_U16_INTO(flags);
-    /*if (flags & BFC_DELETED) {
-      cCohort->type |= CT_DELETED;
-      }*/
+	if (flags & BFC_RELATED) {
+		cCohort->type |= CT_RELATED;
+		has_relations = true;
+	}
 
     READ_U16_INTO(tag);
     cCohort->wordform = window_tags[tag];
@@ -225,17 +222,25 @@ bool BinaryApplicator::readWindow() {
 		}
     }
 
-	uint32_t self, parent;
-    READ_U32_INTO(self);
-    READ_U32_INTO(parent);
-	if (self > max_input_id) {
-		max_input_id = self;
+	READ_U32_INTO(cCohort->dep_self);
+	READ_U32_INTO(cCohort->dep_parent);
+
+	if (cCohort->dep_parent != DEP_NO_PARENT) {
+		has_dep = true;
 	}
-	if (parent != DEP_NO_PARENT) {
-		cCohort->dep_parent = parent + offset;
+
+	uint16_t rel_count;
+	READ_U16_INTO(rel_count);
+	for (uint16_t rn = 0; rn < rel_count; rn++) {
+		READ_U16_INTO(tag);
+		uint32_t head;
+		READ_U32_INTO(head);
+		cCohort->relations_input[window_tags[tag]->comparison_hash].insert(head);
 	}
-	if (flags & BFC_RIGHTWARD_REL) {
-		id_updates[self] = cCohort->global_number;
+	if (rel_count) {
+		has_relations = true;
+		gWindow->relation_map[cCohort->dep_self] = cCohort->global_number;
+		cCohort->type |= CT_RELATED;
 	}
 
     READ_STR_INTO(cCohort->text);
@@ -360,29 +365,48 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
     cohort_count++;
 
     uint16_t flags = 0;
+	if (cohort->type & CT_RELATED) {
+		flags |= BFC_RELATED;
+	}
     WRITE_U16_INTO(flags, cohort_buffer);
 
     WRITE_TAG_INTO(cohort->wordform, cohort_buffer);
     if (cohort->wread) {
-      std::string tag_buffer;
-      uint16_t tag_count = 0;
-      for (auto tter : cohort->wread->tags_list) {
-	if (tter == cohort->wordform->hash) {
-	  continue;
-	}
-	WRITE_TAG_INTO(grammar->single_tags[tter], tag_buffer);
-	tag_count++;
-      }
-      WRITE_U16_INTO(tag_count, cohort_buffer);
-      cohort_buffer += tag_buffer;
+		std::string tag_buffer;
+		uint16_t tag_count = 0;
+		for (auto tter : cohort->wread->tags_list) {
+			if (tter == cohort->wordform->hash) {
+				continue;
+			}
+			WRITE_TAG_INTO(grammar->single_tags[tter], tag_buffer);
+			tag_count++;
+		}
+		WRITE_U16_INTO(tag_count, cohort_buffer);
+		cohort_buffer += tag_buffer;
     }
     else {
-      WRITE_U16_INTO(0, cohort_buffer);
+		WRITE_U16_INTO(0, cohort_buffer);
     }
 
     WRITE_U32_INTO(cohort->dep_self, cohort_buffer);
     WRITE_U32_INTO(cohort->dep_parent, cohort_buffer);
 
+	std::string rel_buffer;
+	uint16_t rel_count = 0;
+	for (const auto& miter : cohort->relations) {
+		auto it = grammar->single_tags.find(miter.first);
+		if (it == grammar->single_tags.end()) {
+			it = grammar->single_tags.find(miter.first);
+		}
+		for (auto siter : miter.second) {
+			rel_count += 1;
+			WRITE_TAG_INTO(it->second, rel_buffer);
+			WRITE_U32_INTO(siter, rel_buffer);
+		}
+	}
+	WRITE_U16_INTO(rel_count, cohort_buffer);
+	cohort_buffer += rel_buffer;
+
     WRITE_STR_INTO(cohort->text, cohort_buffer);
     WRITE_STR_INTO(cohort->wblank, cohort_buffer);
 
@@ -390,32 +414,32 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
     uint16_t reading_count = 0;
     std::sort(cohort->readings.begin(), cohort->readings.end(), Reading::cmp_number);
     for (auto top_reading : cohort->readings) {
-      if (top_reading->noprint) {
-	continue;
-      }
-      auto reading = top_reading;
-      while (reading) {
-	reading_count++;
-	uint16_t flags = 0;
-	if (reading != top_reading) {
-	  flags |= BFR_SUBREADING;
-	}
-	WRITE_U16_INTO(flags, reading_buffer);
-	WRITE_TAG_INTO(grammar->single_tags[reading->baseform], reading_buffer);
-	std::string tag_buffer;
-	uint16_t tag_count = 0;
-	for (auto& tter : reading->tags_list) {
-	  auto tag = grammar->single_tags[tter];
-	  if ((tag->type & T_WORDFORM) || (tag->type & T_BASEFORM)) {
-	    continue;
-	  }
-	  WRITE_TAG_INTO(tag, tag_buffer);
-	  tag_count++;
-	}
-	WRITE_U16_INTO(tag_count, reading_buffer);
-	reading_buffer += tag_buffer;
-	reading = reading->next;
-      }
+		if (top_reading->noprint) {
+			continue;
+		}
+		auto reading = top_reading;
+		while (reading) {
+			reading_count++;
+			uint16_t flags = 0;
+			if (reading != top_reading) {
+				flags |= BFR_SUBREADING;
+			}
+			WRITE_U16_INTO(flags, reading_buffer);
+			WRITE_TAG_INTO(grammar->single_tags[reading->baseform], reading_buffer);
+			std::string tag_buffer;
+			uint16_t tag_count = 0;
+			for (auto& tter : reading->tags_list) {
+				auto tag = grammar->single_tags[tter];
+				if (tag->type & (T_WORDFORM | T_BASEFORM | T_DEPENDENCY | T_RELATION)) {
+					continue;
+				}
+				WRITE_TAG_INTO(tag, tag_buffer);
+				tag_count++;
+			}
+			WRITE_U16_INTO(tag_count, reading_buffer);
+			reading_buffer += tag_buffer;
+			reading = reading->next;
+		}
     }
     WRITE_U16_INTO(reading_count, cohort_buffer);
     cohort_buffer += reading_buffer;
diff --git a/src/BinaryApplicator.hpp b/src/BinaryApplicator.hpp
index 35d0e691..be9819f2 100644
--- a/src/BinaryApplicator.hpp
+++ b/src/BinaryApplicator.hpp
@@ -29,8 +29,7 @@ enum BinaryFormatFlags {
 	// Window
 	BFW_FLUSH         = (1 << 1),
 	// Cohort
-	BFC_DELETED       = (1 << 1),
-	BFC_RIGHTWARD_REL = (1 << 2),
+	BFC_RELATED       = (1 << 1),
 	// Reading
 	BFR_SUBREADING    = (1 << 1),
 	BFR_DELETED       = (1 << 2),
@@ -51,8 +50,6 @@ class BinaryApplicator : public virtual GrammarApplicator {
 
 private:
 	bool readWindow();
-	uint32_t max_input_id = 0;
-	std::map<uint32_t, uint32_t> id_updates;
 };
 }
 
diff --git a/src/FormatConverter.cpp b/src/FormatConverter.cpp
index c6715aa1..bd0e72d2 100644
--- a/src/FormatConverter.cpp
+++ b/src/FormatConverter.cpp
@@ -132,6 +132,10 @@ void FormatConverter::runGrammarOnText(std::istream& input, std::ostream& output
 	ux_stdin = &input;
 	ux_stdout = &output;
 
+	if (fmt_output == CG3SF_BINARY || fmt_input == CG3SF_BINARY) {
+		grammar->has_relations = true;
+	}
+
 	switch (fmt_input) {
 	case CG3SF_CG: {
 		GrammarApplicator::runGrammarOnText(input, output);
diff --git a/src/cg-proc.cpp b/src/cg-proc.cpp
index 3f58c3aa..f39cb20a 100644
--- a/src/cg-proc.cpp
+++ b/src/cg-proc.cpp
@@ -22,6 +22,7 @@
 #include "TextualParser.hpp"
 #include "BinaryGrammar.hpp"
 #include "ApertiumApplicator.hpp"
+#include "BinaryApplicator.hpp"
 #include "MatxinApplicator.hpp"
 #include "GrammarApplicator.hpp"
 
@@ -48,7 +49,8 @@ void endProgram(char* name) {
 	cout << "	-s, --sections=NUM:	 specify number of sections to process" << endl;
 	cout << "	-f, --stream-format=NUM: set the format of the I/O stream to NUM," << endl;
 	cout << "				   where `0' is VISL format, `1' is Apertium" << endl;
-	cout << "				   format (default: 1)" << endl;
+	cout << "				   format, `2` is Matxin, and `3` is binary" << endl;
+	cout << "                  (default: 1)" << endl;
 	cout << "	-r, --rule=NAME:	 run only the named rule" << endl;
 	cout << "	-t, --trace:		 print debug output on stderr" << endl;
 	cout << "	-w, --wordform-case:	 enforce surface case on lemma/baseform " << endl;
@@ -65,7 +67,8 @@ void endProgram(char* name) {
 	cout << "	-s:	 specify number of sections to process" << endl;
 	cout << "	-f: 	 set the format of the I/O stream to NUM," << endl;
 	cout << "		   where `0' is VISL format, `1' is " << endl;
-	cout << "		   Apertium format and `2' is Matxin (default: 1)" << endl;
+	cout << "		   Apertium format, `2' is Matxin," << endl;
+	cout << "          and `3` is binary (default: 1)" << endl;
 	cout << "	-r:	 run only the named rule" << endl;
 	cout << "	-t:	 print debug output on stderr" << endl;
 	cout << "	-w:	 enforce surface case on lemma/baseform " << endl;
@@ -308,6 +311,10 @@ int main(int argc, char* argv[]) {
 		matxinApplicator->print_only_first = only_first;
 		applicator.reset(matxinApplicator);
 	}
+	else if (stream_format == 3) {
+		BinaryApplicator* binaryApplicator = new BinaryApplicator(std::cerr);
+		applicator.reset(binaryApplicator);
+	}
 	else {
 		ApertiumApplicator* apertiumApplicator = new ApertiumApplicator(std::cerr);
 		apertiumApplicator->wordform_case = wordform_case;

From a5fd9748e3439b55f7d33df3e8ebe9ed21334699 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Fri, 27 Jun 2025 11:30:31 -0400
Subject: [PATCH 06/42] write global_number rather than dep_self

---
 src/BinaryApplicator.cpp | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 7b7045b2..8ac6dca8 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -388,8 +388,20 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 		WRITE_U16_INTO(0, cohort_buffer);
     }
 
-    WRITE_U32_INTO(cohort->dep_self, cohort_buffer);
-    WRITE_U32_INTO(cohort->dep_parent, cohort_buffer);
+    WRITE_U32_INTO(cohort->global_number, cohort_buffer);
+	if (cohort->dep_parent == 0 || cohort->dep_parent == DEP_NO_PARENT) {
+		WRITE_U32_INTO(cohort->dep_parent, cohort_buffer);
+	}
+	else {
+		const Cohort* pr = nullptr;
+		if (gWindow->cohort_map.find(cohort->dep_parent) != gWindow->cohort_map.end()) {
+			const Cohort* pr = gWindow->cohort_map[cohort->dep_parent];
+			WRITE_U32_INTO(pr->global_number, cohort_buffer);
+		}
+		else {
+			WRITE_U32_INTO(DEP_NO_PARENT, cohort_buffer);
+		}
+	}
 
 	std::string rel_buffer;
 	uint16_t rel_count = 0;

From 9f114b4157f84804374581168f0f4d3da1c4ee3d Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Fri, 27 Jun 2025 11:45:51 -0400
Subject: [PATCH 07/42] relations

---
 src/BinaryApplicator.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 8ac6dca8..5054303c 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -224,6 +224,8 @@ bool BinaryApplicator::readWindow() {
 
 	READ_U32_INTO(cCohort->dep_self);
 	READ_U32_INTO(cCohort->dep_parent);
+	gWindow->dep_window[cCohort->dep_self] = cCohort;
+	gWindow->relation_map[cCohort->dep_self] = cCohort->global_number;
 
 	if (cCohort->dep_parent != DEP_NO_PARENT) {
 		has_dep = true;
@@ -235,7 +237,7 @@ bool BinaryApplicator::readWindow() {
 		READ_U16_INTO(tag);
 		uint32_t head;
 		READ_U32_INTO(head);
-		cCohort->relations_input[window_tags[tag]->comparison_hash].insert(head);
+		cCohort->relations_input[window_tags[tag]->hash].insert(head);
 	}
 	if (rel_count) {
 		has_relations = true;

From 9fae2fc5bbdc1cdae4ccb6360e86f30fb05af18b Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Wed, 2 Jul 2025 13:51:17 -0400
Subject: [PATCH 08/42] minor optimizations - don't rehash if we don't need to

---
 src/BinaryApplicator.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 5054303c..8eb6310d 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -161,6 +161,7 @@ bool BinaryApplicator::readWindow() {
   TagVector window_tags;
   uint16_t tag_count;
   READ_U16_INTO(tag_count);
+  window_tags.reserve(tag_count);
   for (uint16_t i = 0; i < tag_count; i++) {
     UString tg;
     READ_STR_INTO(tg);
@@ -218,7 +219,8 @@ bool BinaryApplicator::readWindow() {
 		cCohort->wread = alloc_reading(cCohort);
 		for (uint16_t tn = 0; tn < tag_count; tn++) {
 			READ_U16_INTO(tag);
-			addTagToReading(*cCohort->wread, window_tags[tag]);
+			addTagToReading(*cCohort->wread, window_tags[tag],
+							(tn + 1 == tag_count));
 		}
     }
 
@@ -262,18 +264,18 @@ bool BinaryApplicator::readWindow() {
 
       READ_U16_INTO(tag_count);
       for (uint16_t tn = 0; tn < tag_count; tn++) {
-	READ_U16_INTO(tag);
-	addTagToReading(*cReading, window_tags[tag]);
+		  READ_U16_INTO(tag);
+		  addTagToReading(*cReading, window_tags[tag], (tn+1 == tag_count));
       }
 
       if (prev && (flags & BFR_SUBREADING)) {
-	prev->next = cReading;
+		  prev->next = cReading;
       }
       else if (flags & BFR_DELETED) {
-	cCohort->deleted.push_back(cReading);
+		  cCohort->deleted.push_back(cReading);
       }
       else {
-	cCohort->appendReading(cReading);
+		  cCohort->appendReading(cReading);
       }
       prev = cReading;
       ++numReadings;
@@ -395,7 +397,6 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 		WRITE_U32_INTO(cohort->dep_parent, cohort_buffer);
 	}
 	else {
-		const Cohort* pr = nullptr;
 		if (gWindow->cohort_map.find(cohort->dep_parent) != gWindow->cohort_map.end()) {
 			const Cohort* pr = gWindow->cohort_map[cohort->dep_parent];
 			WRITE_U32_INTO(pr->global_number, cohort_buffer);

From ef63dce2772177a291f0435d17bb57aad3bc26e9 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Wed, 2 Jul 2025 18:40:14 -0400
Subject: [PATCH 09/42] python binary parser

---
 python/CMakeLists.txt |   4 +-
 python/cg3.py         | 124 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+), 2 deletions(-)
 create mode 100644 python/cg3.py

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 479d6606..3529cf89 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -4,7 +4,7 @@ set(PYTHON_EXECUTABLE ${Python_EXECUTABLE})
 
 set(PYTHON_FILE "constraint_grammar.py")
 set(CPP_WRAP_FILE "constraint_grammar_wrap.cpp")
-file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/cg3.py" "from constraint_grammar import *\n")
+set(PYTHON_LIBRARY_FILE "cg3.py")
 
 set(BUILD_DEFS "")
 get_directory_property(_defs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMPILE_DEFINITIONS)
@@ -21,7 +21,7 @@ add_custom_command(OUTPUT ${CPP_WRAP_FILE} ${PYTHON_FILE}
 )
 
 add_custom_target(wrapper ALL
-	DEPENDS ${CPP_WRAP_FILE} ${PYTHON_FILE}
+	DEPENDS ${CPP_WRAP_FILE} ${PYTHON_FILE} ${PYTHON_LIBRARY_FILE}
 	VERBATIM
 )
 
diff --git a/python/cg3.py b/python/cg3.py
new file mode 100644
index 00000000..1f128280
--- /dev/null
+++ b/python/cg3.py
@@ -0,0 +1,124 @@
+from constraint_grammar import *
+
+from collections import defaultdict
+from dataclasses import dataclass, field
+import struct
+from typing import DefaultDict, Dict, List, Optional
+
+@dataclass
+class Reading:
+    lemma: str = ''
+    tags: List[str] = field(default_factory=list)
+	subreading: Optional[Reading] = None
+
+@dataclass
+class Cohort:
+    static: Reading = field(default_factory=Reading)
+    readings: List[Reading] = field(default_factory=list)
+    dep_self: int = 0
+    dep_head: Optional[int] = None
+	relations: DefalutDict[str, List[int]] = field(
+		default_factory=lambda: defaultdict(list))
+	text: str = ''
+	wblank: str = ''
+
+@dataclass
+class Window:
+    cohorts: List[Cohort] = field(default_factory=list)
+	set_vars: Dict[str, Optional[str]] = field(default_factory=dict)
+	rem_vars: List[str] = field(default_factory=list)
+	text: str = ''
+	text_post: str = ''
+	flush_after: bool = False
+
+def parse_binary_window(buf):
+	pos = 0
+    def read_pat(pat):
+        nonlocal pos, buf
+        ret = struct.unpack_from('<'+pat, buf, pos)
+        pos += struct.calcsize('<'+pat)
+        return ret
+    def read_u16():
+        return read_pat('H')[0]
+    def read_u32():
+        return read_pat('I')[0]
+    def read_str():
+        l = read_u16()
+        if l == 0:
+            return b''
+        return read_pat(f'{l}s')[0]
+        return s.decode('utf-8')
+	window = Window()
+    window_flags = read_u16()
+	if window_flags & 1:
+		window.flush_after = True
+    tag_count = read_u16()
+    tags = [read_str() for i in range(tag_count)]
+    def read_tags():
+        nonlocal tags
+        ct = read_u16()
+        if ct == 0:
+            return []
+        idx = read_pat(f'{ct}H')
+        return [tags[t] for t in idx]
+	var_count = read_u16()
+	for i in range(var_count):
+		mode = read_pat('B')[0]
+		t1 = read_u16()
+		t2 = read_u16()
+		if mode == 1:
+			window.set_vars[tags[t1]] = tags[t2]
+		elif mode == 2:
+			window.set_vars[tags[t1]] = None
+		elif mode == 3:
+			window.rem_vars.append(tags[t1])
+    window.text = read_str()
+    window.text_post = read_str()
+    cohort_count = read_u16()
+    for i in range(cohort_count):
+		cohort = Cohort()
+        cohort_flags = read_u16()
+		cohort.static.lemma = tags[read_u16()]
+		cohort.static.tags = read_tags()
+        cohort.dep_self = read_u32()
+        cohort.dep_parent = read_u32()
+        if cohort.dep_parent == 0xffffffff:
+            cohort.dep_parent = None
+        rel_count = read_u16()
+        for i in range(rel_count):
+			tag = tags[read_u16()]
+			head = read_u32()
+			cohort.relations[tag].append(head)
+		cohort.text = read_str()
+		cohort.wblank = read_str()
+        reading_count = read_u16()
+		prev = None
+        for i in range(reading_count):
+            reading_flags = read_u16()
+			reading = Reading()
+            reading.lemma = tags[read_u16()]
+			reading.tags = read_tags()
+			if reading_flags & 1 and prev is not None:
+				prev.subreading = reading
+			else:
+				cohort.readings.append(reading)
+			prev = reading
+		window.cohorts.append(cohort)
+    return window
+
+def parse_binary_stream(fin):
+	header = fin.read(8)
+	label, version = struct.unpack('<4sI', header)
+	if label != b'CGBF':
+		raise ValueError('Binary format header not found!')
+	if version != 1:
+		raise ValueError('Unknown binary format version!')
+	while True:
+		spec = fin.read(4)
+		if len(spec) != 4:
+			break;
+		block_len = struct.unpack('<I', spec)[0]
+		block = fin.read(block_len)
+		if len(block) != block_len:
+			break
+		yield parse_binary_window(block)

From f890e75657c5c7043f0b5d1bafad3f11351cf3f0 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Wed, 2 Jul 2025 18:44:24 -0400
Subject: [PATCH 10/42] fix indentation

---
 python/cg3.py | 96 +++++++++++++++++++++++++--------------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/python/cg3.py b/python/cg3.py
index 1f128280..a843a152 100644
--- a/python/cg3.py
+++ b/python/cg3.py
@@ -7,16 +7,16 @@
 
 @dataclass
 class Reading:
-    lemma: str = ''
-    tags: List[str] = field(default_factory=list)
+	lemma: str = ''
+	tags: List[str] = field(default_factory=list)
 	subreading: Optional[Reading] = None
 
 @dataclass
 class Cohort:
-    static: Reading = field(default_factory=Reading)
-    readings: List[Reading] = field(default_factory=list)
-    dep_self: int = 0
-    dep_head: Optional[int] = None
+	static: Reading = field(default_factory=Reading)
+	readings: List[Reading] = field(default_factory=list)
+	dep_self: int = 0
+	dep_head: Optional[int] = None
 	relations: DefalutDict[str, List[int]] = field(
 		default_factory=lambda: defaultdict(list))
 	text: str = ''
@@ -24,7 +24,7 @@ class Cohort:
 
 @dataclass
 class Window:
-    cohorts: List[Cohort] = field(default_factory=list)
+	cohorts: List[Cohort] = field(default_factory=list)
 	set_vars: Dict[str, Optional[str]] = field(default_factory=dict)
 	rem_vars: List[str] = field(default_factory=list)
 	text: str = ''
@@ -33,34 +33,34 @@ class Window:
 
 def parse_binary_window(buf):
 	pos = 0
-    def read_pat(pat):
-        nonlocal pos, buf
-        ret = struct.unpack_from('<'+pat, buf, pos)
-        pos += struct.calcsize('<'+pat)
-        return ret
-    def read_u16():
-        return read_pat('H')[0]
-    def read_u32():
-        return read_pat('I')[0]
-    def read_str():
-        l = read_u16()
-        if l == 0:
-            return b''
-        return read_pat(f'{l}s')[0]
-        return s.decode('utf-8')
+	def read_pat(pat):
+		nonlocal pos, buf
+		ret = struct.unpack_from('<'+pat, buf, pos)
+		pos += struct.calcsize('<'+pat)
+		return ret
+	def read_u16():
+		return read_pat('H')[0]
+	def read_u32():
+		return read_pat('I')[0]
+	def read_str():
+		l = read_u16()
+		if l == 0:
+			return b''
+		return read_pat(f'{l}s')[0]
+		return s.decode('utf-8')
 	window = Window()
-    window_flags = read_u16()
+	window_flags = read_u16()
 	if window_flags & 1:
 		window.flush_after = True
-    tag_count = read_u16()
-    tags = [read_str() for i in range(tag_count)]
-    def read_tags():
-        nonlocal tags
-        ct = read_u16()
-        if ct == 0:
-            return []
-        idx = read_pat(f'{ct}H')
-        return [tags[t] for t in idx]
+	tag_count = read_u16()
+	tags = [read_str() for i in range(tag_count)]
+	def read_tags():
+		nonlocal tags
+		ct = read_u16()
+		if ct == 0:
+			return []
+		idx = read_pat(f'{ct}H')
+		return [tags[t] for t in idx]
 	var_count = read_u16()
 	for i in range(var_count):
 		mode = read_pat('B')[0]
@@ -72,31 +72,31 @@ def read_tags():
 			window.set_vars[tags[t1]] = None
 		elif mode == 3:
 			window.rem_vars.append(tags[t1])
-    window.text = read_str()
-    window.text_post = read_str()
-    cohort_count = read_u16()
-    for i in range(cohort_count):
+	window.text = read_str()
+	window.text_post = read_str()
+	cohort_count = read_u16()
+	for i in range(cohort_count):
 		cohort = Cohort()
-        cohort_flags = read_u16()
+		cohort_flags = read_u16()
 		cohort.static.lemma = tags[read_u16()]
 		cohort.static.tags = read_tags()
-        cohort.dep_self = read_u32()
-        cohort.dep_parent = read_u32()
-        if cohort.dep_parent == 0xffffffff:
-            cohort.dep_parent = None
-        rel_count = read_u16()
-        for i in range(rel_count):
+		cohort.dep_self = read_u32()
+		cohort.dep_parent = read_u32()
+		if cohort.dep_parent == 0xffffffff:
+			cohort.dep_parent = None
+		rel_count = read_u16()
+		for i in range(rel_count):
 			tag = tags[read_u16()]
 			head = read_u32()
 			cohort.relations[tag].append(head)
 		cohort.text = read_str()
 		cohort.wblank = read_str()
-        reading_count = read_u16()
+		reading_count = read_u16()
 		prev = None
-        for i in range(reading_count):
-            reading_flags = read_u16()
+		for i in range(reading_count):
+			reading_flags = read_u16()
 			reading = Reading()
-            reading.lemma = tags[read_u16()]
+			reading.lemma = tags[read_u16()]
 			reading.tags = read_tags()
 			if reading_flags & 1 and prev is not None:
 				prev.subreading = reading
@@ -104,7 +104,7 @@ def read_tags():
 				cohort.readings.append(reading)
 			prev = reading
 		window.cohorts.append(cohort)
-    return window
+	return window
 
 def parse_binary_stream(fin):
 	header = fin.read(8)

From ad0279faa802fbb0cab9f76413f806895d4b42ed Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Wed, 2 Jul 2025 18:57:19 -0400
Subject: [PATCH 11/42] stop using CI as compiler

---
 python/cg3.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/python/cg3.py b/python/cg3.py
index a843a152..35ae217b 100644
--- a/python/cg3.py
+++ b/python/cg3.py
@@ -9,7 +9,7 @@
 class Reading:
 	lemma: str = ''
 	tags: List[str] = field(default_factory=list)
-	subreading: Optional[Reading] = None
+	subreading: Optional['Reading'] = None
 
 @dataclass
 class Cohort:
@@ -17,7 +17,7 @@ class Cohort:
 	readings: List[Reading] = field(default_factory=list)
 	dep_self: int = 0
 	dep_head: Optional[int] = None
-	relations: DefalutDict[str, List[int]] = field(
+	relations: DefaultDict[str, List[int]] = field(
 		default_factory=lambda: defaultdict(list))
 	text: str = ''
 	wblank: str = ''
@@ -45,9 +45,8 @@ def read_u32():
 	def read_str():
 		l = read_u16()
 		if l == 0:
-			return b''
-		return read_pat(f'{l}s')[0]
-		return s.decode('utf-8')
+			return ''
+		return read_pat(f'{l}s')[0].decode('utf-8')
 	window = Window()
 	window_flags = read_u16()
 	if window_flags & 1:

From 9ea2ea76ca3fb753d4caaf6ab2a31a07321fd611 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Wed, 2 Jul 2025 19:49:04 -0400
Subject: [PATCH 12/42] add docs

---
 manual/streamformats.xml | 48 ++++++++++++++++++++++++++++++++++++++++
 python/cg3.py            |  8 +++++++
 2 files changed, 56 insertions(+)

diff --git a/manual/streamformats.xml b/manual/streamformats.xml
index 5038aa53..c988c762 100644
--- a/manual/streamformats.xml
+++ b/manual/streamformats.xml
@@ -183,4 +183,52 @@
     </para>
   </section>
 
+  <section id="stream-binary">
+    <title>Binary Format</title>
+    <indexterm>
+      <primary>Binary Stream Format</primary>
+    </indexterm>
+    <para>
+      The binary format can be generated by <link linkend="cg-conv">cg-conv</link> and can be parsed either by cg-conv or by the Python bindings.
+      It is designed for faster parsing than the textual formats.
+      The intended usecase is cases where the same input needs to be processed multiple times (such as when testing several grammars).
+    </para>
+    <para>
+      The stream begins with a header containing <code>CGBF</code> followed by a 4-byte version number (currently <code>1</code>).
+      After that, each window begins with 4 bytes specifying the length of the block and then the following structure:
+      <screen>
+        window flags [2]
+          &gt; 1 = flush_after
+        tags [array of str]
+        variables [array]
+          mode
+            &gt; 1 = SETVAR (var = val)
+            &gt; 2 = SETVAR (var = *)
+            &gt; 3 = REMVAR
+          var [tag]
+          val or 0 [tag]
+        text [str]
+        text_post [str]
+        cohorts [array]
+          flags [2]
+          wordform [tag]
+          static_tags [array of tag]
+          dep_self [4]
+          dep_parent or 0xFFFFFFFF [4]
+          relations [array]
+            tag [tag]
+            head [4]
+          text [str]
+          wblank [str]
+          readings [array]
+            flags [2]
+              &gt; 1 = is subreading of predecessor
+            baseform [tag]
+            tags [array of tag]
+      </screen>
+      Where arrays and strings are both encoded with a 2-byte length followed by the specified number of objects or UTF-8 bytes.
+      Each item of type <code>[tag]</code> is a 2-byte index into the window-wide <code>tags</code> array.
+    </para>
+  </section>
+
 </chapter>
diff --git a/python/cg3.py b/python/cg3.py
index 35ae217b..4b003f80 100644
--- a/python/cg3.py
+++ b/python/cg3.py
@@ -32,6 +32,11 @@ class Window:
 	flush_after: bool = False
 
 def parse_binary_window(buf):
+	'''Given a bytestring `buf` containing a single window
+	(not including the length header), parse and return a Window()
+	object. For most applications you probbaly want parse_binary_stream()
+	instead.'''
+
 	pos = 0
 	def read_pat(pat):
 		nonlocal pos, buf
@@ -106,6 +111,9 @@ def read_tags():
 	return window
 
 def parse_binary_stream(fin):
+	'''Given a file `fin`, yield a series of Window() objects.
+	raises ValueError if stream header is missing or invalid.'''
+
 	header = fin.read(8)
 	label, version = struct.unpack('<4sI', header)
 	if label != b'CGBF':

From 03fe90d88108c1f7e1a2c282619028564979dd58 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Thu, 17 Jul 2025 14:50:40 -0400
Subject: [PATCH 13/42] typo in python; --dep-delimit for conv

---
 python/cg3.py             | 2 +-
 src/GrammarApplicator.cpp | 3 +++
 src/cg-conv.cpp           | 9 +++++++++
 src/options.cpp           | 1 +
 src/options.hpp           | 1 +
 src/options_conv.hpp      | 3 +++
 6 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/python/cg3.py b/python/cg3.py
index 4b003f80..979a5db9 100644
--- a/python/cg3.py
+++ b/python/cg3.py
@@ -16,7 +16,7 @@ class Cohort:
 	static: Reading = field(default_factory=Reading)
 	readings: List[Reading] = field(default_factory=list)
 	dep_self: int = 0
-	dep_head: Optional[int] = None
+	dep_parent: Optional[int] = None
 	relations: DefaultDict[str, List[int]] = field(
 		default_factory=lambda: defaultdict(list))
 	text: str = ''
diff --git a/src/GrammarApplicator.cpp b/src/GrammarApplicator.cpp
index 786f19fd..7155d376 100644
--- a/src/GrammarApplicator.cpp
+++ b/src/GrammarApplicator.cpp
@@ -1009,6 +1009,9 @@ void GrammarApplicator::setOptions(UConverter* conv) {
 	if (options[PRINT_IDS].doesOccur) {
 		print_ids = true;
 	}
+	if (options[PRINT_DEP].doesOccur) {
+		has_dep = true;
+	}
 	if (options[NUM_WINDOWS].doesOccur) {
 		num_windows = std::stoul(options[NUM_WINDOWS].value);
 	}
diff --git a/src/cg-conv.cpp b/src/cg-conv.cpp
index e88bb122..719d29ad 100644
--- a/src/cg-conv.cpp
+++ b/src/cg-conv.cpp
@@ -232,6 +232,15 @@ int main(int argc, char* argv[]) {
 		applicator.parse_dep = true;
 		applicator.has_dep = true;
 	}
+	if (options_conv[DEP_DELIMIT].doesOccur) {
+		if (!options_conv[DEP_DELIMIT].value.empty()) {
+			applicator.dep_delimit = std::stoul(options_conv[DEP_DELIMIT].value);
+		}
+		else {
+			applicator.dep_delimit = 10;
+		}
+		applicator.parse_dep = true;
+	}
 	applicator.is_conv = true;
 	applicator.trace = true;
 	applicator.verbosity_level = 0;
diff --git a/src/options.cpp b/src/options.cpp
index 0bb10e38..70049057 100644
--- a/src/options.cpp
+++ b/src/options.cpp
@@ -73,6 +73,7 @@ options_t options{
 	UOption{"unicode-tags",          0, UOPT_NO_ARG,       "outputs Unicode code points for things like ->"},
 	UOption{"unique-tags",           0, UOPT_NO_ARG,       "outputs unique tags only once per reading"},
 	UOption{"print-ids",             0, UOPT_NO_ARG,       "always output IDs"},
+	UOption{"print-dep",             0, UOPT_NO_ARG,       "always output dependencies"},
 
 	UOption{"num-windows",           0, UOPT_REQUIRES_ARG, "number of windows to keep in before/ahead buffers; defaults to 2"},
 	UOption{"always-span",           0, UOPT_NO_ARG,       "forces scanning tests to always span across window boundaries"},
diff --git a/src/options.hpp b/src/options.hpp
index 9bdef8a4..39a112fc 100644
--- a/src/options.hpp
+++ b/src/options.hpp
@@ -71,6 +71,7 @@ enum OPTIONS {
 	UNICODE_TAGS,
 	UNIQUE_TAGS,
 	PRINT_IDS,
+	PRINT_DEP,
 	NUM_WINDOWS,
 	ALWAYS_SPAN,
 	SOFT_LIMIT,
diff --git a/src/options_conv.hpp b/src/options_conv.hpp
index c56e9f7f..66e097da 100644
--- a/src/options_conv.hpp
+++ b/src/options_conv.hpp
@@ -27,6 +27,7 @@ namespace OptionsConv {
 
 using ::Options::UOption;
 using ::Options::UOPT_NO_ARG;
+using ::Options::UOPT_OPTIONAL_ARG;
 using ::Options::UOPT_REQUIRES_ARG;
 
 enum OPTIONS {
@@ -59,6 +60,7 @@ enum OPTIONS {
 	SUB_LTR,
 	ORDERED,
 	PARSE_DEP,
+	DEP_DELIMIT,
 	UNICODE_TAGS,
 	PIPE_DELETED,
 	NO_BREAK,
@@ -95,6 +97,7 @@ std::array<UOption, NUM_OPTIONS_CONV> options_conv{
 	UOption{"ltr",          'l', UOPT_NO_ARG,       "sets sub-reading direction to LTR"},
 	UOption{"ordered",      'o', UOPT_NO_ARG,       "tag order matters mode"},
 	UOption{"parse-dep",    'D', UOPT_NO_ARG,       "parse dependency (defaults to treating as normal tags)"},
+	UOption{"dep-delimit",    0, UOPT_OPTIONAL_ARG, "delimit windows based on dependency; defaults to 10"},
 	UOption{"unicode-tags",   0, UOPT_NO_ARG,       "outputs Unicode code points for things like ->"},
 	UOption{"deleted",        0, UOPT_NO_ARG,       "read deleted readings as such, instead of as text"},
 	UOption{"no-break",     'B', UOPT_NO_ARG,       "inhibits any extra whitespace in output"},

From 1670d657d59e60451bbe50dc0a22c09623e380f6 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Sat, 26 Jul 2025 13:15:13 -0400
Subject: [PATCH 14/42] add baseform properly

---
 src/BinaryApplicator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 8eb6310d..f58d7f89 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -260,7 +260,7 @@ bool BinaryApplicator::readWindow() {
       READ_U16_INTO(flags);
 
       READ_U16_INTO(tag);
-      cReading->baseform = window_tags[tag]->hash;
+	  addTagToReading(*cReading, window_tags[tag]);
 
       READ_U16_INTO(tag_count);
       for (uint16_t tn = 0; tn < tag_count; tn++) {

From c149504388ffe9a90efe712f0b0a10949b756c0c Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Sat, 26 Jul 2025 13:23:16 -0400
Subject: [PATCH 15/42] also for static tags

---
 src/BinaryApplicator.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index f58d7f89..ab7e0bd3 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -217,6 +217,7 @@ bool BinaryApplicator::readWindow() {
     READ_U16_INTO(tag_count);
     if (tag_count) {
 		cCohort->wread = alloc_reading(cCohort);
+		addTagToReading(*cCohort->wread, cCohort->wordform);
 		for (uint16_t tn = 0; tn < tag_count; tn++) {
 			READ_U16_INTO(tag);
 			addTagToReading(*cCohort->wread, window_tags[tag],

From f17ac6288fc89760317a0100643b7bac33d1cf92 Mon Sep 17 00:00:00 2001
From: Tino Didriksen <mail@tinodidriksen.com>
Date: Wed, 20 Aug 2025 18:55:23 +0200
Subject: [PATCH 16/42] Add format conversion in main; Add tests for binary
 format, currently 39 / 69 tests passing

---
 .gitignore      |  1 +
 scripts/cg-sort | 16 +++++++++++++++-
 src/main.cpp    | 50 +++++++++++++++++++++++++++++++++++++++++++++++--
 src/options.cpp | 17 ++++++++++++++++-
 src/options.hpp | 15 +++++++++++++++
 test/runall.pl  | 26 +++++++++++++++++++++----
 6 files changed, 117 insertions(+), 8 deletions(-)

diff --git a/.gitignore b/.gitignore
index 96b6ff81..af4fd7cb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ Makefile
 /test/**/output*.txt
 /test/**/untraced*.txt
 /test/**/std*.txt
+/test/**/*.bsf*.txt
 /test/**/*.out.cg3
 /test/**/*.cg3b
 /test/**/*.bin
diff --git a/scripts/cg-sort b/scripts/cg-sort
index d12c0e55..05d7f106 100755
--- a/scripts/cg-sort
+++ b/scripts/cg-sort
@@ -14,7 +14,7 @@ use Getopt::Long;
 Getopt::Long::Configure('bundling');
 Getopt::Long::Configure('no_ignore_case');
 my %opts = ();
-GetOptions(\%opts, ('weight|w:s', 'reverse|r', 'first|1', 'help|?'));
+GetOptions(\%opts, ('weight|w:s', 'mapping|m:s', 'reverse|r', 'first|1', 'help|?'));
 
 sub print_help {
    print <<'XOUT';
@@ -25,6 +25,7 @@ Pipe a CG stream through this to sort and unique the readings of each cohort.
 Options:
  -?, --help       outputs this help
  -w, --weight     sorts by a numeric tag; defaults to W
+ -m, --mapping    sorts mapping tags with given prefix; defaults to @
  -r, --reverse    reverses the sort order
  -1, --first      only keep the first reading
 
@@ -41,6 +42,11 @@ if (exists($opts{weight}) && length($opts{weight})) {
    $W = $opts{weight};
 }
 
+my $M = '@';
+if (exists($opts{mapping}) && length($opts{mapping})) {
+   $M = $opts{mapping};
+}
+
 my $in_cohort = 0;
 my %readings = ();
 my %deleted = ();
@@ -66,6 +72,14 @@ sub print_sorted_readings {
    if (!@_) {
       return;
    }
+   if (exists($opts{mapping})) {
+      foreach (@_) {
+         my @tags = ($_ =~ m@ ($M\S+)@g);
+         @tags = sort @tags;
+         my $t = join(' ', @tags);
+         $_ =~ s@( $M\S+)+@ $t@;
+      }
+   }
    if (exists($opts{weight})) {
       @_ = sort sort_weight @_;
    }
diff --git a/src/main.cpp b/src/main.cpp
index 8980723f..744488aa 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -22,7 +22,7 @@
 #include "TextualParser.hpp"
 #include "GrammarWriter.hpp"
 #include "BinaryGrammar.hpp"
-#include "GrammarApplicator.hpp"
+#include "FormatConverter.hpp"
 #include "version.hpp"
 
 #include "options.hpp"
@@ -362,9 +362,55 @@ int main(int argc, char* argv[]) {
 	}
 
 	if (!options[GRAMMAR_ONLY].doesOccur) {
-		GrammarApplicator applicator(*ux_stderr);
+		FormatConverter applicator(*ux_stderr);
+		applicator.fmt_input = CG3SF_CG;
+
+		if (options[IN_CG].doesOccur) {
+			applicator.fmt_input = CG3SF_CG;
+		}
+		else if (options[IN_NICELINE].doesOccur) {
+			applicator.fmt_input = CG3SF_NICELINE;
+		}
+		else if (options[IN_APERTIUM].doesOccur) {
+			applicator.fmt_input = CG3SF_APERTIUM;
+		}
+		else if (options[IN_FST].doesOccur) {
+			applicator.fmt_input = CG3SF_FST;
+		}
+		else if (options[IN_PLAIN].doesOccur) {
+			applicator.fmt_input = CG3SF_PLAIN;
+		}
+		else if (options[IN_JSONL].doesOccur) {
+			applicator.fmt_input = CG3SF_JSONL;
+		}
+		else if (options[IN_BINARY].doesOccur) {
+			applicator.fmt_input = CG3SF_BINARY;
+		}
+
 		applicator.setGrammar(&grammar);
 		applicator.setOptions(conv);
+
+		applicator.fmt_output = CG3SF_CG;
+		if (options[OUT_APERTIUM].doesOccur) {
+			applicator.fmt_output = CG3SF_APERTIUM;
+			applicator.unicode_tags = true;
+		}
+		else if (options[OUT_FST].doesOccur) {
+			applicator.fmt_output = CG3SF_FST;
+		}
+		else if (options[OUT_NICELINE].doesOccur) {
+			applicator.fmt_output = CG3SF_NICELINE;
+		}
+		else if (options[OUT_PLAIN].doesOccur) {
+			applicator.fmt_output = CG3SF_PLAIN;
+		}
+		else if (options[OUT_JSONL].doesOccur) {
+			applicator.fmt_output = CG3SF_JSONL;
+		}
+		else if (options[OUT_BINARY].doesOccur) {
+			applicator.fmt_output = CG3SF_BINARY;
+		}
+
 		if (options[PROFILING].doesOccur) {
 			applicator.profiler = profiler.get();
 		}
diff --git a/src/options.cpp b/src/options.cpp
index 70049057..124fded3 100644
--- a/src/options.cpp
+++ b/src/options.cpp
@@ -95,7 +95,22 @@ options_t options{
 	UOption{"show-tag-hashes",       0, UOPT_NO_ARG,       "prints a list of tags and their hashes as they are parsed during the run"},
 	UOption{"show-set-hashes",       0, UOPT_NO_ARG,       "prints a list of sets and their hashes; implies --grammar-only"},
 	UOption{"dump-ast",              0, UOPT_NO_ARG,       "prints the grammar parse tree; implies --grammar-only"},
-	UOption{"no-break",            'B', UOPT_NO_ARG,       "inhibits any extra whitespace in output"},
+	UOption{"no-break",              0, UOPT_NO_ARG,       "inhibits any extra whitespace in output"},
+	UOption{"in-cg",                 0, UOPT_NO_ARG,       "sets input format to CG (default)"},
+	UOption{"in-niceline",           0, UOPT_NO_ARG,       "sets input format to Niceline CG"},
+	UOption{"in-apertium",           0, UOPT_NO_ARG,       "sets input format to Apertium"},
+	UOption{"in-fst",                0, UOPT_NO_ARG,       "sets input format to HFST/XFST"},
+	UOption{"in-plain",              0, UOPT_NO_ARG,       "sets input format to plain text"},
+	UOption{"in-jsonl",              0, UOPT_NO_ARG,       "sets input format to JSONL (experimental)"},
+	UOption{"in-binary",             0, UOPT_NO_ARG,       "sets input format to binary (experimental)"},
+	UOption{"out-cg",                0, UOPT_NO_ARG,       "sets output format to CG (default)"},
+	UOption{"out-apertium",          0, UOPT_NO_ARG,       "sets output format to Apertium"},
+	UOption{"out-fst",               0, UOPT_NO_ARG,       "sets output format to HFST/XFST"},
+	UOption{"out-matxin",            0, UOPT_NO_ARG,       "sets output format to Matxin"},
+	UOption{"out-niceline",          0, UOPT_NO_ARG,       "sets output format to Niceline CG"},
+	UOption{"out-plain",             0, UOPT_NO_ARG,       "sets output format to plain text"},
+	UOption{"out-jsonl",             0, UOPT_NO_ARG,       "sets output format to JSONL (experimental)"},
+	UOption{"out-binary",            0, UOPT_NO_ARG,       "sets output format to binary (experimental)"},
 };
 
 options_t options_default = options;
diff --git a/src/options.hpp b/src/options.hpp
index 39a112fc..82453656 100644
--- a/src/options.hpp
+++ b/src/options.hpp
@@ -92,6 +92,21 @@ enum OPTIONS {
 	SHOW_SET_HASHES,
 	DUMP_AST,
 	NO_BREAK,
+	IN_CG,
+	IN_NICELINE,
+	IN_APERTIUM,
+	IN_FST,
+	IN_PLAIN,
+	IN_JSONL,
+	IN_BINARY,
+	OUT_CG,
+	OUT_APERTIUM,
+	OUT_FST,
+	OUT_MATXIN,
+	OUT_NICELINE,
+	OUT_PLAIN,
+	OUT_JSONL,
+	OUT_BINARY,
 	NUM_OPTIONS,
 };
 
diff --git a/test/runall.pl b/test/runall.pl
index 609af943..df4341f5 100755
--- a/test/runall.pl
+++ b/test/runall.pl
@@ -2,9 +2,9 @@
 use strict;
 use warnings;
 use Cwd qw(realpath);
+use FindBin qw($Bin);
 
-my ($bindir, $sep) = $0 =~ /^(.*)(\\|\/).*/;
-$bindir = realpath $bindir;
+my $bindir = realpath $Bin;
 chdir $bindir or die("Error: Could not change directory to $bindir !");
 
 # Search paths for the binary
@@ -25,6 +25,9 @@
 	'grammar.cg3b',
 	'diff.bin.txt',
 	'output.bin.txt',
+	'diff.bsf.txt',
+	'expected.bsf.txt',
+	'output.bsf.txt',
 	'grammar.out.cg3',
 	'diff.out.txt',
 	'output.out.txt',
@@ -75,12 +78,27 @@ sub run_pl {
 	}
 
 	if (-s "diff.bin.txt") {
-		print STDERR "Fail ($gf)\n";
+		print STDERR "Fail ($gf) ";
 		$good = 0;
 	} else {
-		print STDERR "Success\n";
+		print STDERR "Success ";
+	}
+
+	# Normal run, but with binary I/O
+	my $conv = $binary;
+	$conv =~ s@vislcg3(\.exe)?$@cg-conv@g;
+	`cat input.txt | "$conv" --in-cg --out-binary 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$conv" --in-binary --out-cg 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
+	`cat expected.txt | $bindir/../scripts/cg-untrace | "$bindir/../scripts/cg-sort" -m > expected.bsf.txt`;
+	`diff -B expected.bsf.txt output.bsf.txt >diff.bsf.txt`;
+
+	if (-s "diff.bsf.txt") {
+		print STDERR "Fail";
+		$good = 0;
+	} else {
+		print STDERR "Success";
 	}
 
+	print STDERR "\n";
 	return $good;
 }
 

From 373bbcdab08a3ef705ad7b7e6b1b9bb3797c6b33 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Wed, 20 Aug 2025 16:09:17 -0400
Subject: [PATCH 17/42] handle empty cohorts (41/69)

---
 src/BinaryApplicator.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index ab7e0bd3..3747f1a8 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -253,6 +253,7 @@ bool BinaryApplicator::readWindow() {
 
     uint16_t reading_count;
     READ_U16_INTO(reading_count);
+	if (!reading_count) initEmptyCohort(*cCohort);
     Reading* prev = nullptr;
     for (uint16_t rn = 0; rn < reading_count; rn++) {
       Reading* cReading = alloc_reading(cCohort);

From 151c291b470c885b2f1d7528dfd76ebdda5e6c12 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Wed, 20 Aug 2025 16:13:01 -0400
Subject: [PATCH 18/42] add endtag to last cohort (42/69)

---
 src/BinaryApplicator.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 3747f1a8..f276d9ee 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -283,6 +283,12 @@ bool BinaryApplicator::readWindow() {
       ++numReadings;
     }
 
+	if (cn+1 == cohort_count) {
+		for (auto iter : cCohort->readings) {
+			addTagToReading(*iter, endtag);
+		}
+	}
+
     insert_if_exists(cCohort->possible_sets, grammar->sets_any);
     cSWindow->appendCohort(cCohort);
   }

From 5bd496c612b25a28b5a37620b437a6b5fd909918 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Wed, 20 Aug 2025 16:38:15 -0400
Subject: [PATCH 19/42] args to cg-conv (45/69)

---
 test/T_CopyCohort/bsfargs.txt            |  1 +
 test/T_Dependency/bsfargs.txt            |  1 +
 test/T_Dependency_Loops/bsfargs.txt      |  1 +
 test/T_Dependency_OutOfRange/bsfargs.txt |  1 +
 test/T_MergeCohorts/bsfargs.txt          |  1 +
 test/T_Movement/bsfargs.txt              |  1 +
 test/T_RemCohort/bsfargs.txt             |  1 +
 test/T_SplitCohort/bsfargs.txt           |  1 +
 test/T_SwitchParent/bsfargs.txt          |  1 +
 test/runall.pl                           | 10 +++++++---
 10 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 test/T_CopyCohort/bsfargs.txt
 create mode 100644 test/T_Dependency/bsfargs.txt
 create mode 100644 test/T_Dependency_Loops/bsfargs.txt
 create mode 100644 test/T_Dependency_OutOfRange/bsfargs.txt
 create mode 100644 test/T_MergeCohorts/bsfargs.txt
 create mode 100644 test/T_Movement/bsfargs.txt
 create mode 100644 test/T_RemCohort/bsfargs.txt
 create mode 100644 test/T_SplitCohort/bsfargs.txt
 create mode 100644 test/T_SwitchParent/bsfargs.txt

diff --git a/test/T_CopyCohort/bsfargs.txt b/test/T_CopyCohort/bsfargs.txt
new file mode 100644
index 00000000..a288fd24
--- /dev/null
+++ b/test/T_CopyCohort/bsfargs.txt
@@ -0,0 +1 @@
+--parse-dep
\ No newline at end of file
diff --git a/test/T_Dependency/bsfargs.txt b/test/T_Dependency/bsfargs.txt
new file mode 100644
index 00000000..f5db7bec
--- /dev/null
+++ b/test/T_Dependency/bsfargs.txt
@@ -0,0 +1 @@
+--dep-delimit
\ No newline at end of file
diff --git a/test/T_Dependency_Loops/bsfargs.txt b/test/T_Dependency_Loops/bsfargs.txt
new file mode 100644
index 00000000..a288fd24
--- /dev/null
+++ b/test/T_Dependency_Loops/bsfargs.txt
@@ -0,0 +1 @@
+--parse-dep
\ No newline at end of file
diff --git a/test/T_Dependency_OutOfRange/bsfargs.txt b/test/T_Dependency_OutOfRange/bsfargs.txt
new file mode 100644
index 00000000..a288fd24
--- /dev/null
+++ b/test/T_Dependency_OutOfRange/bsfargs.txt
@@ -0,0 +1 @@
+--parse-dep
\ No newline at end of file
diff --git a/test/T_MergeCohorts/bsfargs.txt b/test/T_MergeCohorts/bsfargs.txt
new file mode 100644
index 00000000..f5db7bec
--- /dev/null
+++ b/test/T_MergeCohorts/bsfargs.txt
@@ -0,0 +1 @@
+--dep-delimit
\ No newline at end of file
diff --git a/test/T_Movement/bsfargs.txt b/test/T_Movement/bsfargs.txt
new file mode 100644
index 00000000..f5db7bec
--- /dev/null
+++ b/test/T_Movement/bsfargs.txt
@@ -0,0 +1 @@
+--dep-delimit
\ No newline at end of file
diff --git a/test/T_RemCohort/bsfargs.txt b/test/T_RemCohort/bsfargs.txt
new file mode 100644
index 00000000..a288fd24
--- /dev/null
+++ b/test/T_RemCohort/bsfargs.txt
@@ -0,0 +1 @@
+--parse-dep
\ No newline at end of file
diff --git a/test/T_SplitCohort/bsfargs.txt b/test/T_SplitCohort/bsfargs.txt
new file mode 100644
index 00000000..a288fd24
--- /dev/null
+++ b/test/T_SplitCohort/bsfargs.txt
@@ -0,0 +1 @@
+--parse-dep
\ No newline at end of file
diff --git a/test/T_SwitchParent/bsfargs.txt b/test/T_SwitchParent/bsfargs.txt
new file mode 100644
index 00000000..f5db7bec
--- /dev/null
+++ b/test/T_SwitchParent/bsfargs.txt
@@ -0,0 +1 @@
+--dep-delimit
\ No newline at end of file
diff --git a/test/runall.pl b/test/runall.pl
index df4341f5..c991c9e9 100755
--- a/test/runall.pl
+++ b/test/runall.pl
@@ -37,7 +37,7 @@
 my $binary = "vislcg3";
 
 sub run_pl {
-	my ($binary,$override,$args) = @_;
+	my ($binary,$override,$args,$bsfargs) = @_;
 	my $good = 1;
 
 	# Normal run
@@ -87,7 +87,7 @@ sub run_pl {
 	# Normal run, but with binary I/O
 	my $conv = $binary;
 	$conv =~ s@vislcg3(\.exe)?$@cg-conv@g;
-	`cat input.txt | "$conv" --in-cg --out-binary 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$conv" --in-binary --out-cg 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
+	`cat input.txt | "$conv" --in-cg --out-binary $bsfargs 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$conv" --in-binary --out-cg 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
 	`cat expected.txt | $bindir/../scripts/cg-untrace | "$bindir/../scripts/cg-sort" -m > expected.bsf.txt`;
 	`diff -B expected.bsf.txt output.bsf.txt >diff.bsf.txt`;
 
@@ -159,6 +159,10 @@ sub run_pl {
 	if (-s 'args.txt') {
 		$args = `cat args.txt`;
 	}
+	my $bsfargs = '';
+	if (-s 'bsfargs.txt') {
+		$bsfargs = `cat bsfargs.txt`;
+	}
 	if (-x 'run.pl') {
 		`./run.pl "$binary" \Q$c\E $args`;
 		if ($?) {
@@ -167,7 +171,7 @@ sub run_pl {
 		}
 	}
 	else {
-		if (!run_pl($binary, $c, $args)) {
+		if (!run_pl($binary, $c, $args, $bsfargs)) {
 			$bad = 1;
 			$failed += 1;
 		}

From 329b5159ba33005bf0863e5212989a651d5fd9d6 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Wed, 20 Aug 2025 16:44:42 -0400
Subject: [PATCH 20/42] unique_tags (46/69)

---
 src/BinaryApplicator.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index f276d9ee..15ef82c8 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -451,11 +451,18 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 			WRITE_TAG_INTO(grammar->single_tags[reading->baseform], reading_buffer);
 			std::string tag_buffer;
 			uint16_t tag_count = 0;
+			uint32SortedVector unique;
 			for (auto& tter : reading->tags_list) {
 				auto tag = grammar->single_tags[tter];
 				if (tag->type & (T_WORDFORM | T_BASEFORM | T_DEPENDENCY | T_RELATION)) {
 					continue;
 				}
+				if (unique_tags) {
+					if (unique.find(tter) != unique.end()) {
+						continue;
+					}
+					unique.insert(tter);
+				}
 				WRITE_TAG_INTO(tag, tag_buffer);
 				tag_count++;
 			}

From 628168b2075cb66f4a217ea5f6479f69950d1426 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Thu, 21 Aug 2025 15:41:14 -0400
Subject: [PATCH 21/42] delimiters in tests (49/69)

---
 test/T_CmdArgs/bsfgrammar.cg3         | 1 +
 test/T_ContextTestJump/bsfgrammar.cg3 | 1 +
 test/T_With/bsfgrammar.cg3            | 1 +
 test/runall.pl                        | 6 +++++-
 4 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 test/T_CmdArgs/bsfgrammar.cg3
 create mode 100644 test/T_ContextTestJump/bsfgrammar.cg3
 create mode 100644 test/T_With/bsfgrammar.cg3

diff --git a/test/T_CmdArgs/bsfgrammar.cg3 b/test/T_CmdArgs/bsfgrammar.cg3
new file mode 100644
index 00000000..3b0c07fe
--- /dev/null
+++ b/test/T_CmdArgs/bsfgrammar.cg3
@@ -0,0 +1 @@
+DELIMITERS = "<.>" ;
\ No newline at end of file
diff --git a/test/T_ContextTestJump/bsfgrammar.cg3 b/test/T_ContextTestJump/bsfgrammar.cg3
new file mode 100644
index 00000000..3b0c07fe
--- /dev/null
+++ b/test/T_ContextTestJump/bsfgrammar.cg3
@@ -0,0 +1 @@
+DELIMITERS = "<.>" ;
\ No newline at end of file
diff --git a/test/T_With/bsfgrammar.cg3 b/test/T_With/bsfgrammar.cg3
new file mode 100644
index 00000000..e393de09
--- /dev/null
+++ b/test/T_With/bsfgrammar.cg3
@@ -0,0 +1 @@
+DELIMITERS = "<.>" ;
diff --git a/test/runall.pl b/test/runall.pl
index c991c9e9..c5b783da 100755
--- a/test/runall.pl
+++ b/test/runall.pl
@@ -87,7 +87,11 @@ sub run_pl {
 	# Normal run, but with binary I/O
 	my $conv = $binary;
 	$conv =~ s@vislcg3(\.exe)?$@cg-conv@g;
-	`cat input.txt | "$conv" --in-cg --out-binary $bsfargs 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$conv" --in-binary --out-cg 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
+	if (-s "bsfgrammar.cg3") {
+		`cat input.txt | "$binary" --in-cg --out-binary -g bsfgrammar.cg3 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$conv" --in-binary --out-cg 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
+	} else {
+		`cat input.txt | "$conv" --in-cg --out-binary $bsfargs 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$conv" --in-binary --out-cg 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
+	}
 	`cat expected.txt | $bindir/../scripts/cg-untrace | "$bindir/../scripts/cg-sort" -m > expected.bsf.txt`;
 	`diff -B expected.bsf.txt output.bsf.txt >diff.bsf.txt`;
 

From daf2d86354f9b1a5f2df1316ad9290aac70c6975 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Thu, 21 Aug 2025 16:15:41 -0400
Subject: [PATCH 22/42] print end tag

---
 src/BinaryApplicator.cpp                      |  4 +++-
 test/T_CmdArgs/{bsfgrammar.cg3 => conv1.cg3}  |  0
 .../{bsfgrammar.cg3 => conv1.cg3}             |  0
 test/T_CopyCohort/bsfargs.txt                 |  1 -
 test/T_CopyCohort/conv1.cg3                   |  1 +
 test/T_Delimit/conv2.cg3                      |  1 +
 test/T_Dependency/bsfargs.txt                 |  1 -
 test/T_Dependency/conv1.cg3                   |  1 +
 test/T_Dependency_Loops/bsfargs.txt           |  1 -
 test/T_Dependency_Loops/conv1.cg3             |  1 +
 test/T_Dependency_OutOfRange/bsfargs.txt      |  1 -
 test/T_Dependency_OutOfRange/conv1.cg3        |  1 +
 test/T_JumpExecute/conv2.cg3                  |  1 +
 test/T_MergeCohorts/bsfargs.txt               |  1 -
 test/T_MergeCohorts/conv1.cg3                 |  1 +
 test/T_Movement/bsfargs.txt                   |  1 -
 test/T_Movement/conv1.cg3                     |  1 +
 test/T_RemCohort/bsfargs.txt                  |  1 -
 test/T_RemCohort/conv1.cg3                    |  1 +
 test/T_Select/conv2.cg3                       |  1 +
 test/T_SplitCohort/bsfargs.txt                |  1 -
 test/T_SplitCohort/conv1.cg3                  |  1 +
 test/T_SwitchParent/bsfargs.txt               |  1 -
 test/T_SwitchParent/conv1.cg3                 |  1 +
 test/T_With/{bsfgrammar.cg3 => conv1.cg3}     |  0
 test/runall.pl                                | 20 ++++++++++---------
 26 files changed, 26 insertions(+), 19 deletions(-)
 rename test/T_CmdArgs/{bsfgrammar.cg3 => conv1.cg3} (100%)
 rename test/T_ContextTestJump/{bsfgrammar.cg3 => conv1.cg3} (100%)
 delete mode 100644 test/T_CopyCohort/bsfargs.txt
 create mode 100644 test/T_CopyCohort/conv1.cg3
 create mode 100644 test/T_Delimit/conv2.cg3
 delete mode 100644 test/T_Dependency/bsfargs.txt
 create mode 100644 test/T_Dependency/conv1.cg3
 delete mode 100644 test/T_Dependency_Loops/bsfargs.txt
 create mode 100644 test/T_Dependency_Loops/conv1.cg3
 delete mode 100644 test/T_Dependency_OutOfRange/bsfargs.txt
 create mode 100644 test/T_Dependency_OutOfRange/conv1.cg3
 create mode 100644 test/T_JumpExecute/conv2.cg3
 delete mode 100644 test/T_MergeCohorts/bsfargs.txt
 create mode 100644 test/T_MergeCohorts/conv1.cg3
 delete mode 100644 test/T_Movement/bsfargs.txt
 create mode 100644 test/T_Movement/conv1.cg3
 delete mode 100644 test/T_RemCohort/bsfargs.txt
 create mode 100644 test/T_RemCohort/conv1.cg3
 create mode 100644 test/T_Select/conv2.cg3
 delete mode 100644 test/T_SplitCohort/bsfargs.txt
 create mode 100644 test/T_SplitCohort/conv1.cg3
 delete mode 100644 test/T_SwitchParent/bsfargs.txt
 create mode 100644 test/T_SwitchParent/conv1.cg3
 rename test/T_With/{bsfgrammar.cg3 => conv1.cg3} (100%)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 15ef82c8..db0cbcce 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -285,7 +285,9 @@ bool BinaryApplicator::readWindow() {
 
 	if (cn+1 == cohort_count) {
 		for (auto iter : cCohort->readings) {
-			addTagToReading(*iter, endtag);
+			if (iter->tags.find(endtag) == iter->tags.end()) {
+				addTagToReading(*iter, endtag);
+			}
 		}
 	}
 
diff --git a/test/T_CmdArgs/bsfgrammar.cg3 b/test/T_CmdArgs/conv1.cg3
similarity index 100%
rename from test/T_CmdArgs/bsfgrammar.cg3
rename to test/T_CmdArgs/conv1.cg3
diff --git a/test/T_ContextTestJump/bsfgrammar.cg3 b/test/T_ContextTestJump/conv1.cg3
similarity index 100%
rename from test/T_ContextTestJump/bsfgrammar.cg3
rename to test/T_ContextTestJump/conv1.cg3
diff --git a/test/T_CopyCohort/bsfargs.txt b/test/T_CopyCohort/bsfargs.txt
deleted file mode 100644
index a288fd24..00000000
--- a/test/T_CopyCohort/bsfargs.txt
+++ /dev/null
@@ -1 +0,0 @@
---parse-dep
\ No newline at end of file
diff --git a/test/T_CopyCohort/conv1.cg3 b/test/T_CopyCohort/conv1.cg3
new file mode 100644
index 00000000..d167d8b3
--- /dev/null
+++ b/test/T_CopyCohort/conv1.cg3
@@ -0,0 +1 @@
+CmdArgs += --parse-dep ;
diff --git a/test/T_Delimit/conv2.cg3 b/test/T_Delimit/conv2.cg3
new file mode 100644
index 00000000..f15e28c3
--- /dev/null
+++ b/test/T_Delimit/conv2.cg3
@@ -0,0 +1 @@
+CmdArgs += -e ;
diff --git a/test/T_Dependency/bsfargs.txt b/test/T_Dependency/bsfargs.txt
deleted file mode 100644
index f5db7bec..00000000
--- a/test/T_Dependency/bsfargs.txt
+++ /dev/null
@@ -1 +0,0 @@
---dep-delimit
\ No newline at end of file
diff --git a/test/T_Dependency/conv1.cg3 b/test/T_Dependency/conv1.cg3
new file mode 100644
index 00000000..0f36431e
--- /dev/null
+++ b/test/T_Dependency/conv1.cg3
@@ -0,0 +1 @@
+CmdArgs += --dep-delimit ;
diff --git a/test/T_Dependency_Loops/bsfargs.txt b/test/T_Dependency_Loops/bsfargs.txt
deleted file mode 100644
index a288fd24..00000000
--- a/test/T_Dependency_Loops/bsfargs.txt
+++ /dev/null
@@ -1 +0,0 @@
---parse-dep
\ No newline at end of file
diff --git a/test/T_Dependency_Loops/conv1.cg3 b/test/T_Dependency_Loops/conv1.cg3
new file mode 100644
index 00000000..d167d8b3
--- /dev/null
+++ b/test/T_Dependency_Loops/conv1.cg3
@@ -0,0 +1 @@
+CmdArgs += --parse-dep ;
diff --git a/test/T_Dependency_OutOfRange/bsfargs.txt b/test/T_Dependency_OutOfRange/bsfargs.txt
deleted file mode 100644
index a288fd24..00000000
--- a/test/T_Dependency_OutOfRange/bsfargs.txt
+++ /dev/null
@@ -1 +0,0 @@
---parse-dep
\ No newline at end of file
diff --git a/test/T_Dependency_OutOfRange/conv1.cg3 b/test/T_Dependency_OutOfRange/conv1.cg3
new file mode 100644
index 00000000..d167d8b3
--- /dev/null
+++ b/test/T_Dependency_OutOfRange/conv1.cg3
@@ -0,0 +1 @@
+CmdArgs += --parse-dep ;
diff --git a/test/T_JumpExecute/conv2.cg3 b/test/T_JumpExecute/conv2.cg3
new file mode 100644
index 00000000..f15e28c3
--- /dev/null
+++ b/test/T_JumpExecute/conv2.cg3
@@ -0,0 +1 @@
+CmdArgs += -e ;
diff --git a/test/T_MergeCohorts/bsfargs.txt b/test/T_MergeCohorts/bsfargs.txt
deleted file mode 100644
index f5db7bec..00000000
--- a/test/T_MergeCohorts/bsfargs.txt
+++ /dev/null
@@ -1 +0,0 @@
---dep-delimit
\ No newline at end of file
diff --git a/test/T_MergeCohorts/conv1.cg3 b/test/T_MergeCohorts/conv1.cg3
new file mode 100644
index 00000000..0f36431e
--- /dev/null
+++ b/test/T_MergeCohorts/conv1.cg3
@@ -0,0 +1 @@
+CmdArgs += --dep-delimit ;
diff --git a/test/T_Movement/bsfargs.txt b/test/T_Movement/bsfargs.txt
deleted file mode 100644
index f5db7bec..00000000
--- a/test/T_Movement/bsfargs.txt
+++ /dev/null
@@ -1 +0,0 @@
---dep-delimit
\ No newline at end of file
diff --git a/test/T_Movement/conv1.cg3 b/test/T_Movement/conv1.cg3
new file mode 100644
index 00000000..0f36431e
--- /dev/null
+++ b/test/T_Movement/conv1.cg3
@@ -0,0 +1 @@
+CmdArgs += --dep-delimit ;
diff --git a/test/T_RemCohort/bsfargs.txt b/test/T_RemCohort/bsfargs.txt
deleted file mode 100644
index a288fd24..00000000
--- a/test/T_RemCohort/bsfargs.txt
+++ /dev/null
@@ -1 +0,0 @@
---parse-dep
\ No newline at end of file
diff --git a/test/T_RemCohort/conv1.cg3 b/test/T_RemCohort/conv1.cg3
new file mode 100644
index 00000000..d167d8b3
--- /dev/null
+++ b/test/T_RemCohort/conv1.cg3
@@ -0,0 +1 @@
+CmdArgs += --parse-dep ;
diff --git a/test/T_Select/conv2.cg3 b/test/T_Select/conv2.cg3
new file mode 100644
index 00000000..f15e28c3
--- /dev/null
+++ b/test/T_Select/conv2.cg3
@@ -0,0 +1 @@
+CmdArgs += -e ;
diff --git a/test/T_SplitCohort/bsfargs.txt b/test/T_SplitCohort/bsfargs.txt
deleted file mode 100644
index a288fd24..00000000
--- a/test/T_SplitCohort/bsfargs.txt
+++ /dev/null
@@ -1 +0,0 @@
---parse-dep
\ No newline at end of file
diff --git a/test/T_SplitCohort/conv1.cg3 b/test/T_SplitCohort/conv1.cg3
new file mode 100644
index 00000000..d167d8b3
--- /dev/null
+++ b/test/T_SplitCohort/conv1.cg3
@@ -0,0 +1 @@
+CmdArgs += --parse-dep ;
diff --git a/test/T_SwitchParent/bsfargs.txt b/test/T_SwitchParent/bsfargs.txt
deleted file mode 100644
index f5db7bec..00000000
--- a/test/T_SwitchParent/bsfargs.txt
+++ /dev/null
@@ -1 +0,0 @@
---dep-delimit
\ No newline at end of file
diff --git a/test/T_SwitchParent/conv1.cg3 b/test/T_SwitchParent/conv1.cg3
new file mode 100644
index 00000000..0f36431e
--- /dev/null
+++ b/test/T_SwitchParent/conv1.cg3
@@ -0,0 +1 @@
+CmdArgs += --dep-delimit ;
diff --git a/test/T_With/bsfgrammar.cg3 b/test/T_With/conv1.cg3
similarity index 100%
rename from test/T_With/bsfgrammar.cg3
rename to test/T_With/conv1.cg3
diff --git a/test/runall.pl b/test/runall.pl
index c5b783da..06139155 100755
--- a/test/runall.pl
+++ b/test/runall.pl
@@ -37,7 +37,7 @@
 my $binary = "vislcg3";
 
 sub run_pl {
-	my ($binary,$override,$args,$bsfargs) = @_;
+	my ($binary,$override,$args) = @_;
 	my $good = 1;
 
 	# Normal run
@@ -87,10 +87,16 @@ sub run_pl {
 	# Normal run, but with binary I/O
 	my $conv = $binary;
 	$conv =~ s@vislcg3(\.exe)?$@cg-conv@g;
-	if (-s "bsfgrammar.cg3") {
-		`cat input.txt | "$binary" --in-cg --out-binary -g bsfgrammar.cg3 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$conv" --in-binary --out-cg 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
+	if (-s "conv1.cg3") {
+		`cat input.txt | "$binary" --in-cg --out-binary -g conv1.cg3 2>stderr.bsf.conv1.txt >stdout.bsf.conv1.bin`;
 	} else {
-		`cat input.txt | "$conv" --in-cg --out-binary $bsfargs 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$conv" --in-binary --out-cg 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
+		`cat input.txt | "$conv" --in-cg --out-binary 2>stderr.bsf.conv1.txt >stdout.bsf.conv1.bin`;
+	}
+	`cat stdout.bsf.conv1.bin | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt >stdout.bsf.vislcg3.bin`;
+	if (-s "conv2.cg3") {
+		`cat stdout.bsf.vislcg3.bin | "$binary" --in-binary --out-cg -g conv2.cg3 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
+	} else {
+		`cat stdout.bsf.vislcg3.bin | "$conv" --in-binary --out-cg 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
 	}
 	`cat expected.txt | $bindir/../scripts/cg-untrace | "$bindir/../scripts/cg-sort" -m > expected.bsf.txt`;
 	`diff -B expected.bsf.txt output.bsf.txt >diff.bsf.txt`;
@@ -163,10 +169,6 @@ sub run_pl {
 	if (-s 'args.txt') {
 		$args = `cat args.txt`;
 	}
-	my $bsfargs = '';
-	if (-s 'bsfargs.txt') {
-		$bsfargs = `cat bsfargs.txt`;
-	}
 	if (-x 'run.pl') {
 		`./run.pl "$binary" \Q$c\E $args`;
 		if ($?) {
@@ -175,7 +177,7 @@ sub run_pl {
 		}
 	}
 	else {
-		if (!run_pl($binary, $c, $args, $bsfargs)) {
+		if (!run_pl($binary, $c, $args)) {
 			$bad = 1;
 			$failed += 1;
 		}

From e192cbfecdfd24f44631bbf7f47a1eae6d2b6241 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Thu, 21 Aug 2025 16:23:30 -0400
Subject: [PATCH 23/42] dep_has_spanned (Omniscan)

---
 python/cg3.py             | 3 +++
 src/BinaryApplicator.cpp  | 6 ++++++
 src/BinaryApplicator.hpp  | 1 +
 test/T_Omniscan/conv1.cg3 | 1 +
 4 files changed, 11 insertions(+)
 create mode 100644 test/T_Omniscan/conv1.cg3

diff --git a/python/cg3.py b/python/cg3.py
index 979a5db9..9c136ff5 100644
--- a/python/cg3.py
+++ b/python/cg3.py
@@ -30,6 +30,7 @@ class Window:
 	text: str = ''
 	text_post: str = ''
 	flush_after: bool = False
+	dep_has_spanned: bool = False
 
 def parse_binary_window(buf):
 	'''Given a bytestring `buf` containing a single window
@@ -56,6 +57,8 @@ def read_str():
 	window_flags = read_u16()
 	if window_flags & 1:
 		window.flush_after = True
+	if window_flags & 2:
+		window.dep_has_spanned = True
 	tag_count = read_u16()
 	tags = [read_str() for i in range(tag_count)]
 	def read_tags():
diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index db0cbcce..7ad040eb 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -157,6 +157,9 @@ bool BinaryApplicator::readWindow() {
   if (flags & BFW_FLUSH) {
     cSWindow->flush_after = true;
   }
+  if (flags & BFW_DEP_SPAN) {
+	  dep_has_spanned = true;
+  }
 
   TagVector window_tags;
   uint16_t tag_count;
@@ -483,6 +486,9 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
   if (window->flush_after) {
     flags |= BFW_FLUSH;
   }
+  if (dep_has_spanned) {
+	  flags |= BFW_DEP_SPAN;
+  }
   WRITE_U16_INTO(flags, header_buffer);
 
   WRITE_U16_INTO(tags_to_write.size(), header_buffer);
diff --git a/src/BinaryApplicator.hpp b/src/BinaryApplicator.hpp
index be9819f2..ad3859cc 100644
--- a/src/BinaryApplicator.hpp
+++ b/src/BinaryApplicator.hpp
@@ -28,6 +28,7 @@ namespace CG3 {
 enum BinaryFormatFlags {
 	// Window
 	BFW_FLUSH         = (1 << 1),
+	BFW_DEP_SPAN      = (1 << 2),
 	// Cohort
 	BFC_RELATED       = (1 << 1),
 	// Reading
diff --git a/test/T_Omniscan/conv1.cg3 b/test/T_Omniscan/conv1.cg3
new file mode 100644
index 00000000..be353a43
--- /dev/null
+++ b/test/T_Omniscan/conv1.cg3
@@ -0,0 +1 @@
+Delimiters = "<$.>" ;
\ No newline at end of file

From c6e610ff52cddf35a1bd0da8cc06da8c16868a64 Mon Sep 17 00:00:00 2001
From: Tino Didriksen <mail@tinodidriksen.com>
Date: Fri, 22 Aug 2025 13:04:32 +0200
Subject: [PATCH 24/42] Include Static (57/69)

---
 .gitignore                             |  1 +
 test/T_CmdArgs/conv1.cg3               |  1 -
 test/T_ContextTestJump/conv1.cg3       |  1 -
 test/T_CopyCohort/conv1.cg3            |  1 -
 test/T_Delimit/conv2.cg3               |  1 -
 test/T_Dependency/conv1.cg3            |  1 -
 test/T_Dependency_Loops/conv1.cg3      |  1 -
 test/T_Dependency_OutOfRange/conv1.cg3 |  1 -
 test/T_JumpExecute/conv2.cg3           |  1 -
 test/T_MergeCohorts/conv1.cg3          |  1 -
 test/T_Movement/conv1.cg3              |  1 -
 test/T_Omniscan/conv1.cg3              |  1 -
 test/T_RemCohort/conv1.cg3             |  1 -
 test/T_Select/conv2.cg3                |  1 -
 test/T_SplitCohort/conv1.cg3           |  1 -
 test/T_SwitchParent/conv1.cg3          |  1 -
 test/T_With/conv1.cg3                  |  1 -
 test/runall.pl                         | 15 +++------------
 18 files changed, 4 insertions(+), 28 deletions(-)
 delete mode 100644 test/T_CmdArgs/conv1.cg3
 delete mode 100644 test/T_ContextTestJump/conv1.cg3
 delete mode 100644 test/T_CopyCohort/conv1.cg3
 delete mode 100644 test/T_Delimit/conv2.cg3
 delete mode 100644 test/T_Dependency/conv1.cg3
 delete mode 100644 test/T_Dependency_Loops/conv1.cg3
 delete mode 100644 test/T_Dependency_OutOfRange/conv1.cg3
 delete mode 100644 test/T_JumpExecute/conv2.cg3
 delete mode 100644 test/T_MergeCohorts/conv1.cg3
 delete mode 100644 test/T_Movement/conv1.cg3
 delete mode 100644 test/T_Omniscan/conv1.cg3
 delete mode 100644 test/T_RemCohort/conv1.cg3
 delete mode 100644 test/T_Select/conv2.cg3
 delete mode 100644 test/T_SplitCohort/conv1.cg3
 delete mode 100644 test/T_SwitchParent/conv1.cg3
 delete mode 100644 test/T_With/conv1.cg3

diff --git a/.gitignore b/.gitignore
index af4fd7cb..f930269e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,6 +33,7 @@ Makefile
 /test/**/untraced*.txt
 /test/**/std*.txt
 /test/**/*.bsf*.txt
+/test/**/*.bsf.cg3
 /test/**/*.out.cg3
 /test/**/*.cg3b
 /test/**/*.bin
diff --git a/test/T_CmdArgs/conv1.cg3 b/test/T_CmdArgs/conv1.cg3
deleted file mode 100644
index 3b0c07fe..00000000
--- a/test/T_CmdArgs/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-DELIMITERS = "<.>" ;
\ No newline at end of file
diff --git a/test/T_ContextTestJump/conv1.cg3 b/test/T_ContextTestJump/conv1.cg3
deleted file mode 100644
index 3b0c07fe..00000000
--- a/test/T_ContextTestJump/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-DELIMITERS = "<.>" ;
\ No newline at end of file
diff --git a/test/T_CopyCohort/conv1.cg3 b/test/T_CopyCohort/conv1.cg3
deleted file mode 100644
index d167d8b3..00000000
--- a/test/T_CopyCohort/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-CmdArgs += --parse-dep ;
diff --git a/test/T_Delimit/conv2.cg3 b/test/T_Delimit/conv2.cg3
deleted file mode 100644
index f15e28c3..00000000
--- a/test/T_Delimit/conv2.cg3
+++ /dev/null
@@ -1 +0,0 @@
-CmdArgs += -e ;
diff --git a/test/T_Dependency/conv1.cg3 b/test/T_Dependency/conv1.cg3
deleted file mode 100644
index 0f36431e..00000000
--- a/test/T_Dependency/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-CmdArgs += --dep-delimit ;
diff --git a/test/T_Dependency_Loops/conv1.cg3 b/test/T_Dependency_Loops/conv1.cg3
deleted file mode 100644
index d167d8b3..00000000
--- a/test/T_Dependency_Loops/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-CmdArgs += --parse-dep ;
diff --git a/test/T_Dependency_OutOfRange/conv1.cg3 b/test/T_Dependency_OutOfRange/conv1.cg3
deleted file mode 100644
index d167d8b3..00000000
--- a/test/T_Dependency_OutOfRange/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-CmdArgs += --parse-dep ;
diff --git a/test/T_JumpExecute/conv2.cg3 b/test/T_JumpExecute/conv2.cg3
deleted file mode 100644
index f15e28c3..00000000
--- a/test/T_JumpExecute/conv2.cg3
+++ /dev/null
@@ -1 +0,0 @@
-CmdArgs += -e ;
diff --git a/test/T_MergeCohorts/conv1.cg3 b/test/T_MergeCohorts/conv1.cg3
deleted file mode 100644
index 0f36431e..00000000
--- a/test/T_MergeCohorts/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-CmdArgs += --dep-delimit ;
diff --git a/test/T_Movement/conv1.cg3 b/test/T_Movement/conv1.cg3
deleted file mode 100644
index 0f36431e..00000000
--- a/test/T_Movement/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-CmdArgs += --dep-delimit ;
diff --git a/test/T_Omniscan/conv1.cg3 b/test/T_Omniscan/conv1.cg3
deleted file mode 100644
index be353a43..00000000
--- a/test/T_Omniscan/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-Delimiters = "<$.>" ;
\ No newline at end of file
diff --git a/test/T_RemCohort/conv1.cg3 b/test/T_RemCohort/conv1.cg3
deleted file mode 100644
index d167d8b3..00000000
--- a/test/T_RemCohort/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-CmdArgs += --parse-dep ;
diff --git a/test/T_Select/conv2.cg3 b/test/T_Select/conv2.cg3
deleted file mode 100644
index f15e28c3..00000000
--- a/test/T_Select/conv2.cg3
+++ /dev/null
@@ -1 +0,0 @@
-CmdArgs += -e ;
diff --git a/test/T_SplitCohort/conv1.cg3 b/test/T_SplitCohort/conv1.cg3
deleted file mode 100644
index d167d8b3..00000000
--- a/test/T_SplitCohort/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-CmdArgs += --parse-dep ;
diff --git a/test/T_SwitchParent/conv1.cg3 b/test/T_SwitchParent/conv1.cg3
deleted file mode 100644
index 0f36431e..00000000
--- a/test/T_SwitchParent/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-CmdArgs += --dep-delimit ;
diff --git a/test/T_With/conv1.cg3 b/test/T_With/conv1.cg3
deleted file mode 100644
index e393de09..00000000
--- a/test/T_With/conv1.cg3
+++ /dev/null
@@ -1 +0,0 @@
-DELIMITERS = "<.>" ;
diff --git a/test/runall.pl b/test/runall.pl
index 06139155..f504f367 100755
--- a/test/runall.pl
+++ b/test/runall.pl
@@ -87,18 +87,9 @@ sub run_pl {
 	# Normal run, but with binary I/O
 	my $conv = $binary;
 	$conv =~ s@vislcg3(\.exe)?$@cg-conv@g;
-	if (-s "conv1.cg3") {
-		`cat input.txt | "$binary" --in-cg --out-binary -g conv1.cg3 2>stderr.bsf.conv1.txt >stdout.bsf.conv1.bin`;
-	} else {
-		`cat input.txt | "$conv" --in-cg --out-binary 2>stderr.bsf.conv1.txt >stdout.bsf.conv1.bin`;
-	}
-	`cat stdout.bsf.conv1.bin | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt >stdout.bsf.vislcg3.bin`;
-	if (-s "conv2.cg3") {
-		`cat stdout.bsf.vislcg3.bin | "$binary" --in-binary --out-cg -g conv2.cg3 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
-	} else {
-		`cat stdout.bsf.vislcg3.bin | "$conv" --in-binary --out-cg 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
-	}
-	`cat expected.txt | $bindir/../scripts/cg-untrace | "$bindir/../scripts/cg-sort" -m > expected.bsf.txt`;
+	`echo "Include Static grammar.cg3 ;" > grammar.bsf.cg3`;
+	`cat input.txt | "$binary" $args --in-cg --out-binary -g grammar.bsf.cg3 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$binary" $args --in-binary --out-cg -g grammar.bsf.cg3 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
+	`cat expected.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m > expected.bsf.txt`;
 	`diff -B expected.bsf.txt output.bsf.txt >diff.bsf.txt`;
 
 	if (-s "diff.bsf.txt") {

From c6155ccca306c34445ebcfc5ca3769f3babf1bba Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Fri, 22 Aug 2025 09:56:35 -0400
Subject: [PATCH 25/42] fix flag offsets

---
 src/BinaryApplicator.hpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/BinaryApplicator.hpp b/src/BinaryApplicator.hpp
index ad3859cc..edcace99 100644
--- a/src/BinaryApplicator.hpp
+++ b/src/BinaryApplicator.hpp
@@ -27,13 +27,13 @@ namespace CG3 {
 
 enum BinaryFormatFlags {
 	// Window
-	BFW_FLUSH         = (1 << 1),
-	BFW_DEP_SPAN      = (1 << 2),
+	BFW_FLUSH         = (1 << 0),
+	BFW_DEP_SPAN      = (1 << 1),
 	// Cohort
-	BFC_RELATED       = (1 << 1),
+	BFC_RELATED       = (1 << 0),
 	// Reading
-	BFR_SUBREADING    = (1 << 1),
-	BFR_DELETED       = (1 << 2),
+	BFR_SUBREADING    = (1 << 0),
+	BFR_DELETED       = (1 << 1),
 	// Variables
 	BFV_SETVAR        = 1,
 	BFV_SETVAR_ANY    = 2,

From ad8e6414a85152e5aa4cd3ff234780df581682f3 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Fri, 22 Aug 2025 10:30:26 -0400
Subject: [PATCH 26/42] split mappings

---
 src/BinaryApplicator.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 7ad040eb..3f23b3b3 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -268,10 +268,19 @@ bool BinaryApplicator::readWindow() {
 	  addTagToReading(*cReading, window_tags[tag]);
 
       READ_U16_INTO(tag_count);
+	  TagList mappings;
       for (uint16_t tn = 0; tn < tag_count; tn++) {
 		  READ_U16_INTO(tag);
-		  addTagToReading(*cReading, window_tags[tag], (tn+1 == tag_count));
+		  if (window_tags[tag]->type & T_MAPPING) {
+			  mappings.push_back(window_tags[tag]);
+		  }
+		  else {
+			  addTagToReading(*cReading, window_tags[tag]);
+		  }
       }
+	  if (!mappings.empty()) {
+		  splitMappings(mappings, *cCohort, *cReading, true);
+	  }
 
       if (prev && (flags & BFR_SUBREADING)) {
 		  prev->next = cReading;

From 32d3b3272d47cf5b529ba092cd07748b2bc001af Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Fri, 22 Aug 2025 11:43:16 -0400
Subject: [PATCH 27/42] ensure tags are mapping tags

---
 src/BinaryApplicator.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 3f23b3b3..de1ba341 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -169,6 +169,12 @@ bool BinaryApplicator::readWindow() {
     UString tg;
     READ_STR_INTO(tg);
     window_tags.push_back(addTag(tg));
+	if (tg[0] == grammar->mapping_prefix) {
+		window_tags.back()->type |= T_MAPPING;
+	}
+	else {
+		window_tags.back()->type &= ~T_MAPPING;
+	}
   }
 
   uint16_t var_count;

From e70fd683d61696edc6fccadf2f529b3d9dc18c36 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Fri, 22 Aug 2025 12:13:36 -0400
Subject: [PATCH 28/42] stabilize-relations (#142)

---
 scripts/cg-stabilize-relations | 22 ++++++++++++++++++++++
 test/runall.pl                 |  4 ++--
 2 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100755 scripts/cg-stabilize-relations

diff --git a/scripts/cg-stabilize-relations b/scripts/cg-stabilize-relations
new file mode 100755
index 00000000..d7bdb805
--- /dev/null
+++ b/scripts/cg-stabilize-relations
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+
+import argparse
+import re
+import sys
+
+parser = argparse.ArgumentParser('Pipe a CG stream through this to stabilize IDs and relations so they have consistent numbers')
+args = parser.parse_args()
+
+id_map = {}
+
+tag = re.compile(r'\b(ID:|R:\w+:)(\d+)\b')
+def repl(matchobj):
+    global id_map
+    n = matchobj.group(2)
+    if n not in id_map:
+        id_map[n] = str(len(id_map) + 1)
+    return matchobj.group(1) + id_map[n]
+
+for line in sys.stdin:
+    sys.stdout.write(tag.sub(repl, line))
+
diff --git a/test/runall.pl b/test/runall.pl
index f504f367..08029204 100755
--- a/test/runall.pl
+++ b/test/runall.pl
@@ -88,8 +88,8 @@ sub run_pl {
 	my $conv = $binary;
 	$conv =~ s@vislcg3(\.exe)?$@cg-conv@g;
 	`echo "Include Static grammar.cg3 ;" > grammar.bsf.cg3`;
-	`cat input.txt | "$binary" $args --in-cg --out-binary -g grammar.bsf.cg3 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$binary" $args --in-binary --out-cg -g grammar.bsf.cg3 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
-	`cat expected.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m > expected.bsf.txt`;
+	`cat input.txt | "$binary" $args --in-cg --out-binary -g grammar.bsf.cg3 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$binary" $args --in-binary --out-cg -g grammar.bsf.cg3 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m | "$bindir/../scripts/cg-stabilize-relations" | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
+	`cat expected.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m | "$bindir/../scripts/cg-stabilize-relations" > expected.bsf.txt`;
 	`diff -B expected.bsf.txt output.bsf.txt >diff.bsf.txt`;
 
 	if (-s "diff.bsf.txt") {

From 389cdd4d5d6946333ab508d7c2e55dd77599f702 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Fri, 22 Aug 2025 12:16:04 -0400
Subject: [PATCH 29/42] some tests have FLUSH in them

---
 test/runall.pl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/runall.pl b/test/runall.pl
index 08029204..e9c4b157 100755
--- a/test/runall.pl
+++ b/test/runall.pl
@@ -88,8 +88,9 @@ sub run_pl {
 	my $conv = $binary;
 	$conv =~ s@vislcg3(\.exe)?$@cg-conv@g;
 	`echo "Include Static grammar.cg3 ;" > grammar.bsf.cg3`;
-	`cat input.txt | "$binary" $args --in-cg --out-binary -g grammar.bsf.cg3 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$binary" $args --in-binary --out-cg -g grammar.bsf.cg3 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m | "$bindir/../scripts/cg-stabilize-relations" | grep -v '<STREAMCMD:FLUSH>' >output.bsf.txt`;
+	`cat input.txt | "$binary" $args --in-cg --out-binary -g grammar.bsf.cg3 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$binary" $args --in-binary --out-cg -g grammar.bsf.cg3 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m | "$bindir/../scripts/cg-stabilize-relations" >output.bsf.txt`;
 	`cat expected.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m | "$bindir/../scripts/cg-stabilize-relations" > expected.bsf.txt`;
+	`echo '<STREAMCMD:FLUSH>' >> expected.bsf.txt`;
 	`diff -B expected.bsf.txt output.bsf.txt >diff.bsf.txt`;
 
 	if (-s "diff.bsf.txt") {

From 22a6be86c6e46348e5b682a472052643ec1a66f6 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Fri, 22 Aug 2025 12:41:14 -0400
Subject: [PATCH 30/42] parent.local_number == 0 -> parent = 0

---
 src/BinaryApplicator.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index de1ba341..09646a4a 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -427,7 +427,12 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 	else {
 		if (gWindow->cohort_map.find(cohort->dep_parent) != gWindow->cohort_map.end()) {
 			const Cohort* pr = gWindow->cohort_map[cohort->dep_parent];
-			WRITE_U32_INTO(pr->global_number, cohort_buffer);
+			if (pr->local_number == 0) {
+				WRITE_U32_INTO(0, cohort_buffer);
+			}
+			else {
+				WRITE_U32_INTO(pr->global_number, cohort_buffer);
+			}
 		}
 		else {
 			WRITE_U32_INTO(DEP_NO_PARENT, cohort_buffer);

From 21f8c02eff5f1e2d2cb9bc494e3e1dffe1e5468b Mon Sep 17 00:00:00 2001
From: Tino Didriksen <mail@tinodidriksen.com>
Date: Fri, 29 Aug 2025 11:54:06 +0200
Subject: [PATCH 31/42] dep_window happens in appendCohort (62/69 passing
 tests); Minor other fixes and cleanup

---
 CMakeLists.txt                              |  2 +-
 include/getopt/getopt.cpp                   |  6 +-
 scripts/cg-stabilize-relations              |  1 -
 src/ApertiumApplicator.cpp                  |  6 +-
 src/BinaryApplicator.cpp                    | 41 ++++++------
 src/BinaryGrammar_read.cpp                  | 24 +++----
 src/BinaryGrammar_read_10043.cpp            | 22 +++---
 src/Grammar.cpp                             |  2 +-
 src/GrammarApplicator.cpp                   | 19 +++---
 src/GrammarApplicator_matchSet.cpp          |  4 +-
 src/GrammarApplicator_reflow.cpp            |  2 +-
 src/GrammarApplicator_runContextualTest.cpp |  2 +-
 src/GrammarApplicator_runGrammar.cpp        | 14 ++--
 src/GrammarApplicator_runRules.cpp          |  4 +-
 src/GrammarWriter.cpp                       |  6 +-
 src/MatxinApplicator.cpp                    | 16 ++---
 src/MweSplitApplicator.cpp                  |  2 +-
 src/NicelineApplicator.cpp                  | 14 ++--
 src/PlaintextApplicator.cpp                 |  8 +--
 src/TextualParser.cpp                       | 74 ++++++++++++---------
 src/Window.cpp                              |  6 +-
 src/cg-conv.cpp                             |  4 +-
 src/cg-mwesplit.cpp                         |  4 +-
 src/cg-proc.cpp                             |  2 +-
 src/inlines.hpp                             |  2 +-
 src/main.cpp                                |  4 +-
 src/parser_helpers.hpp                      |  2 +-
 src/uextras.hpp                             |  2 +-
 28 files changed, 153 insertions(+), 142 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index deccf986..2eeaca5e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -149,7 +149,7 @@ if(EMSCRIPTEN)
 	endif()
 endif()
 
-add_definitions(-DUNISTR_FROM_CHAR_EXPLICIT=explicit -DUNISTR_FROM_STRING_EXPLICIT=explicit)
+add_definitions(-DUNISTR_FROM_CHAR_EXPLICIT=explicit -DUNISTR_FROM_STRING_EXPLICIT=explicit -DU_CHARSET_IS_UTF8=1)
 include_directories("include")
 include_directories("src")
 
diff --git a/include/getopt/getopt.cpp b/include/getopt/getopt.cpp
index 83e71dc2..8bb0ac3e 100644
--- a/include/getopt/getopt.cpp
+++ b/include/getopt/getopt.cpp
@@ -37,14 +37,14 @@ int getopt(int argc, char **argv, const char *opts) {
 		   argv[optind][0] != '-' || argv[optind][1] == '\0')
 			return(EOF);
 		else if (strcmp(argv[optind], "--") == 0) {
-			optind++;
+			++optind;
 			return(EOF);
 		}
 	optopt = c = argv[optind][sp];
 	if (c == ':' || (cp=strchr(opts, c)) == 0) {
 		ERR(": illegal option -- ", (char)c);
 		if (argv[optind][++sp] == '\0') {
-			optind++;
+			++optind;
 			sp = 1;
 		}
 		return('?');
@@ -65,7 +65,7 @@ int getopt(int argc, char **argv, const char *opts) {
 	else {
 		if (argv[optind][++sp] == '\0') {
 			sp = 1;
-			optind++;
+			++optind;
 		}
 		optarg = nullptr;
 	}
diff --git a/scripts/cg-stabilize-relations b/scripts/cg-stabilize-relations
index d7bdb805..30f93e87 100755
--- a/scripts/cg-stabilize-relations
+++ b/scripts/cg-stabilize-relations
@@ -19,4 +19,3 @@ def repl(matchobj):
 
 for line in sys.stdin:
     sys.stdout.write(tag.sub(repl, line))
-
diff --git a/src/ApertiumApplicator.cpp b/src/ApertiumApplicator.cpp
index 8c89187c..a8a76646 100644
--- a/src/ApertiumApplicator.cpp
+++ b/src/ApertiumApplicator.cpp
@@ -382,7 +382,7 @@ void ApertiumApplicator::runGrammarOnText(std::istream& input, std::ostream& out
 
 			lCohort = cCohort = alloc_cohort(cSWindow);
 			cCohort->global_number = gWindow->cohort_counter++;
-			numCohorts++;
+			++numCohorts;
 
 			cCohort->text = blank;
 			blank.clear();
@@ -782,8 +782,8 @@ void ApertiumApplicator::printReading(const Reading* reading, std::ostream& outp
 			if (reading->parent->dep_parent == 0) {
 				pr = reading->parent->parent->cohorts[0];
 			}
-			else if (reading->parent->parent->parent->cohort_map.find(reading->parent->dep_parent) != reading->parent->parent->parent->cohort_map.end()) {
-				pr = reading->parent->parent->parent->cohort_map[reading->parent->dep_parent];
+			else if (gWindow->cohort_map.find(reading->parent->dep_parent) != gWindow->cohort_map.end()) {
+				pr = gWindow->cohort_map[reading->parent->dep_parent];
 			}
 		}
 
diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 09646a4a..2cb46a04 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -165,7 +165,7 @@ bool BinaryApplicator::readWindow() {
   uint16_t tag_count;
   READ_U16_INTO(tag_count);
   window_tags.reserve(tag_count);
-  for (uint16_t i = 0; i < tag_count; i++) {
+  for (uint16_t i = 0; i < tag_count; ++i) {
     UString tg;
     READ_STR_INTO(tg);
     window_tags.push_back(addTag(tg));
@@ -179,9 +179,9 @@ bool BinaryApplicator::readWindow() {
 
   uint16_t var_count;
   READ_U16_INTO(var_count);
-  for (uint16_t vn = 0; vn < var_count; vn++) {
+  for (uint16_t vn = 0; vn < var_count; ++vn) {
 	  char mode = buf[pos];
-	  pos++;
+	  ++pos;
 	  uint16_t tag1, tag2;
 	  READ_U16_INTO(tag1);
 	  READ_U16_INTO(tag2);
@@ -209,10 +209,10 @@ bool BinaryApplicator::readWindow() {
   uint16_t cohort_count;
   READ_U16_INTO(cohort_count);
   uint16_t tag;
-  for (uint16_t cn = 0; cn < cohort_count; cn++) {
+  for (uint16_t cn = 0; cn < cohort_count; ++cn) {
     Cohort* cCohort = alloc_cohort(cSWindow);
     cCohort->global_number = gWindow->cohort_counter++;
-    numCohorts++;
+    ++numCohorts;
 
     READ_U16_INTO(flags);
 	if (flags & BFC_RELATED) {
@@ -227,7 +227,7 @@ bool BinaryApplicator::readWindow() {
     if (tag_count) {
 		cCohort->wread = alloc_reading(cCohort);
 		addTagToReading(*cCohort->wread, cCohort->wordform);
-		for (uint16_t tn = 0; tn < tag_count; tn++) {
+		for (uint16_t tn = 0; tn < tag_count; ++tn) {
 			READ_U16_INTO(tag);
 			addTagToReading(*cCohort->wread, window_tags[tag],
 							(tn + 1 == tag_count));
@@ -236,7 +236,6 @@ bool BinaryApplicator::readWindow() {
 
 	READ_U32_INTO(cCohort->dep_self);
 	READ_U32_INTO(cCohort->dep_parent);
-	gWindow->dep_window[cCohort->dep_self] = cCohort;
 	gWindow->relation_map[cCohort->dep_self] = cCohort->global_number;
 
 	if (cCohort->dep_parent != DEP_NO_PARENT) {
@@ -245,7 +244,7 @@ bool BinaryApplicator::readWindow() {
 
 	uint16_t rel_count;
 	READ_U16_INTO(rel_count);
-	for (uint16_t rn = 0; rn < rel_count; rn++) {
+	for (uint16_t rn = 0; rn < rel_count; ++rn) {
 		READ_U16_INTO(tag);
 		uint32_t head;
 		READ_U32_INTO(head);
@@ -264,7 +263,7 @@ bool BinaryApplicator::readWindow() {
     READ_U16_INTO(reading_count);
 	if (!reading_count) initEmptyCohort(*cCohort);
     Reading* prev = nullptr;
-    for (uint16_t rn = 0; rn < reading_count; rn++) {
+    for (uint16_t rn = 0; rn < reading_count; ++rn) {
       Reading* cReading = alloc_reading(cCohort);
       addTagToReading(*cReading, cCohort->wordform);
 
@@ -275,7 +274,7 @@ bool BinaryApplicator::readWindow() {
 
       READ_U16_INTO(tag_count);
 	  TagList mappings;
-      for (uint16_t tn = 0; tn < tag_count; tn++) {
+      for (uint16_t tn = 0; tn < tag_count; ++tn) {
 		  READ_U16_INTO(tag);
 		  if (window_tags[tag]->type & T_MAPPING) {
 			  mappings.push_back(window_tags[tag]);
@@ -319,7 +318,7 @@ bool BinaryApplicator::readWindow() {
 #define WRITE_U16_INTO(n, buffer) \
   do { \
     std::string tmp(2, 0);	       \
-    uint16_t tmp_n = (n); \
+    auto tmp_n = static_cast<uint16_t>(n); \
     tmp.assign(reinterpret_cast<char*>(&tmp_n), 2);	\
     (buffer) += tmp; \
   } while (false)
@@ -327,7 +326,7 @@ bool BinaryApplicator::readWindow() {
 #define WRITE_U32_INTO(n, buffer) \
   do { \
     std::string tmp(4, 0);	       \
-    uint32_t tmp_n = (n); \
+    auto tmp_n = static_cast<uint32_t>(n); \
     tmp.assign(reinterpret_cast<char*>(&tmp_n), 4);	\
     (buffer) += tmp; \
   } while (false)
@@ -335,7 +334,7 @@ bool BinaryApplicator::readWindow() {
 #define WRITE_TAG_INTO(tag, buffer) \
   do { \
     if (tag_index.find((tag)) == tag_index.end()) { \
-      tag_index[(tag)] = tags_to_write.size(); \
+      tag_index[(tag)] = UI32(tags_to_write.size()); \
       tags_to_write.push_back((tag)); \
     } \
     WRITE_U16_INTO(tag_index[(tag)], buffer); \
@@ -366,7 +365,7 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
   uint16_t var_count = 0;
   std::string var_buffer;
   for (auto var : window->variables_output) {
-	  var_count++;
+	  ++var_count;
 	  Tag* key = grammar->single_tags[var];
 	  auto iter = window->variables_set.find(var);
 	  if (iter != window->variables_set.end()) {
@@ -394,7 +393,7 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
     if (cohort->local_number == 0 || (cohort->type & CT_REMOVED)) {
       continue;
     }
-    cohort_count++;
+    ++cohort_count;
 
     uint16_t flags = 0;
 	if (cohort->type & CT_RELATED) {
@@ -411,7 +410,7 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 				continue;
 			}
 			WRITE_TAG_INTO(grammar->single_tags[tter], tag_buffer);
-			tag_count++;
+			++tag_count;
 		}
 		WRITE_U16_INTO(tag_count, cohort_buffer);
 		cohort_buffer += tag_buffer;
@@ -426,7 +425,7 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 	}
 	else {
 		if (gWindow->cohort_map.find(cohort->dep_parent) != gWindow->cohort_map.end()) {
-			const Cohort* pr = gWindow->cohort_map[cohort->dep_parent];
+			auto pr = gWindow->cohort_map[cohort->dep_parent];
 			if (pr->local_number == 0) {
 				WRITE_U32_INTO(0, cohort_buffer);
 			}
@@ -447,7 +446,7 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 			it = grammar->single_tags.find(miter.first);
 		}
 		for (auto siter : miter.second) {
-			rel_count += 1;
+			++rel_count;
 			WRITE_TAG_INTO(it->second, rel_buffer);
 			WRITE_U32_INTO(siter, rel_buffer);
 		}
@@ -467,7 +466,7 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 		}
 		auto reading = top_reading;
 		while (reading) {
-			reading_count++;
+			++reading_count;
 			uint16_t flags = 0;
 			if (reading != top_reading) {
 				flags |= BFR_SUBREADING;
@@ -489,7 +488,7 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 					unique.insert(tter);
 				}
 				WRITE_TAG_INTO(tag, tag_buffer);
-				tag_count++;
+				++tag_count;
 			}
 			WRITE_U16_INTO(tag_count, reading_buffer);
 			reading_buffer += tag_buffer;
@@ -524,7 +523,7 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 
   WRITE_U16_INTO(cohort_count, header_buffer);
 
-  uint32_t total_size = header_buffer.size() + cohort_buffer.size();
+  auto total_size = UI32(header_buffer.size() + cohort_buffer.size());
   writeRaw(output, total_size);
   output.write(header_buffer.data(), header_buffer.size());
   output.write(cohort_buffer.data(), cohort_buffer.size());
diff --git a/src/BinaryGrammar_read.cpp b/src/BinaryGrammar_read.cpp
index a006b114..9d37e63a 100644
--- a/src/BinaryGrammar_read.cpp
+++ b/src/BinaryGrammar_read.cpp
@@ -122,7 +122,7 @@ int BinaryGrammar::parse_grammar(std::istream& input) {
 	auto num_single_tags = u32tmp;
 	grammar->num_tags = num_single_tags;
 	grammar->single_tags_list.resize(num_single_tags);
-	for (uint32_t i = 0; i < num_single_tags; i++) {
+	for (uint32_t i = 0; i < num_single_tags; ++i) {
 		Tag* t = grammar->allocateTag();
 
 		auto fields = readBE<uint32_t>(input);
@@ -254,7 +254,7 @@ int BinaryGrammar::parse_grammar(std::istream& input) {
 		u32tmp = readBE<uint32_t>(input);
 	}
 	auto num_pref_targets = u32tmp;
-	for (uint32_t i = 0; i < num_pref_targets; i++) {
+	for (uint32_t i = 0; i < num_pref_targets; ++i) {
 		u32tmp = readBE<uint32_t>(input);
 		grammar->preferred_targets.push_back(u32tmp);
 	}
@@ -264,7 +264,7 @@ int BinaryGrammar::parse_grammar(std::istream& input) {
 		u32tmp = readBE<uint32_t>(input);
 	}
 	auto num_par_pairs = u32tmp;
-	for (uint32_t i = 0; i < num_par_pairs; i++) {
+	for (uint32_t i = 0; i < num_par_pairs; ++i) {
 		auto left = readBE<uint32_t>(input);
 		auto right = readBE<uint32_t>(input);
 		grammar->parentheses[left] = right;
@@ -276,7 +276,7 @@ int BinaryGrammar::parse_grammar(std::istream& input) {
 		u32tmp = readBE<uint32_t>(input);
 	}
 	auto num_par_anchors = u32tmp;
-	for (uint32_t i = 0; i < num_par_anchors; i++) {
+	for (uint32_t i = 0; i < num_par_anchors; ++i) {
 		auto left = readBE<uint32_t>(input);
 		auto right = readBE<uint32_t>(input);
 		grammar->anchors[left] = right;
@@ -288,7 +288,7 @@ int BinaryGrammar::parse_grammar(std::istream& input) {
 	}
 	auto num_sets = u32tmp;
 	grammar->sets_list.resize(num_sets);
-	for (uint32_t i = 0; i < num_sets; i++) {
+	for (uint32_t i = 0; i < num_sets; ++i) {
 		Set* s = grammar->allocateSet();
 
 		auto fields = readBE<uint32_t>(input);
@@ -316,7 +316,7 @@ int BinaryGrammar::parse_grammar(std::istream& input) {
 		if (fields & (1 << 4)) {
 			u32tmp = readBE<uint32_t>(input);
 			auto num_set_ops = u32tmp;
-			for (uint32_t j = 0; j < num_set_ops; j++) {
+			for (uint32_t j = 0; j < num_set_ops; ++j) {
 				u32tmp = readBE<uint32_t>(input);
 				s->set_ops.push_back(u32tmp);
 			}
@@ -324,7 +324,7 @@ int BinaryGrammar::parse_grammar(std::istream& input) {
 		if (fields & (1 << 5)) {
 			u32tmp = readBE<uint32_t>(input);
 			auto num_sets = u32tmp;
-			for (uint32_t j = 0; j < num_sets; j++) {
+			for (uint32_t j = 0; j < num_sets; ++j) {
 				u32tmp = readBE<uint32_t>(input);
 				s->sets.push_back(u32tmp);
 			}
@@ -370,7 +370,7 @@ int BinaryGrammar::parse_grammar(std::istream& input) {
 		u32tmp = readBE<uint32_t>(input);
 	}
 	auto num_contexts = u32tmp;
-	for (uint32_t i = 0; i < num_contexts; i++) {
+	for (uint32_t i = 0; i < num_contexts; ++i) {
 		ContextualTest* t = readContextualTest(input);
 		grammar->contexts[t->hash] = t;
 	}
@@ -381,7 +381,7 @@ int BinaryGrammar::parse_grammar(std::istream& input) {
 	}
 	auto num_rules = u32tmp;
 	grammar->rule_by_number.resize(num_rules);
-	for (uint32_t i = 0; i < num_rules; i++) {
+	for (uint32_t i = 0; i < num_rules; ++i) {
 		Rule* r = grammar->allocateRule();
 
 		auto fields = readBE<uint32_t>(input);
@@ -460,7 +460,7 @@ int BinaryGrammar::parse_grammar(std::istream& input) {
 
 		u32tmp = readBE<uint32_t>(input);
 		auto num_dep_tests = u32tmp;
-		for (uint32_t j = 0; j < num_dep_tests; j++) {
+		for (uint32_t j = 0; j < num_dep_tests; ++j) {
 			u32tmp = readBE<uint32_t>(input);
 			ContextualTest* t = grammar->contexts[u32tmp];
 			r->addContextualTest(t, r->dep_tests);
@@ -468,7 +468,7 @@ int BinaryGrammar::parse_grammar(std::istream& input) {
 
 		u32tmp = readBE<uint32_t>(input);
 		auto num_tests = u32tmp;
-		for (uint32_t j = 0; j < num_tests; j++) {
+		for (uint32_t j = 0; j < num_tests; ++j) {
 			u32tmp = readBE<uint32_t>(input);
 			ContextualTest* t = grammar->contexts[u32tmp];
 			r->addContextualTest(t, r->tests);
@@ -477,7 +477,7 @@ int BinaryGrammar::parse_grammar(std::istream& input) {
 		if (fields & (1 << 15)) {
 			u32tmp = readBE<uint32_t>(input);
 			auto num_sub_rules = u32tmp;
-			for (uint32_t j = 0; j < num_sub_rules; j++) {
+			for (uint32_t j = 0; j < num_sub_rules; ++j) {
 				u32tmp = readBE<uint32_t>(input);
 				r->sub_rules.push_back(grammar->rule_by_number[u32tmp]);
 			}
diff --git a/src/BinaryGrammar_read_10043.cpp b/src/BinaryGrammar_read_10043.cpp
index b3c6b3e6..906ccb43 100644
--- a/src/BinaryGrammar_read_10043.cpp
+++ b/src/BinaryGrammar_read_10043.cpp
@@ -83,7 +83,7 @@ int BinaryGrammar::readBinaryGrammar_10043(std::istream& input) {
 	auto num_single_tags = u32tmp;
 	grammar->num_tags = num_single_tags;
 	grammar->single_tags_list.resize(num_single_tags);
-	for (uint32_t i = 0; i < num_single_tags; i++) {
+	for (uint32_t i = 0; i < num_single_tags; ++i) {
 		Tag* t = grammar->allocateTag();
 
 		auto fields = readBE<uint32_t>(input);
@@ -190,7 +190,7 @@ int BinaryGrammar::readBinaryGrammar_10043(std::istream& input) {
 		u32tmp = readBE<uint32_t>(input);
 	}
 	auto num_pref_targets = u32tmp;
-	for (uint32_t i = 0; i < num_pref_targets; i++) {
+	for (uint32_t i = 0; i < num_pref_targets; ++i) {
 		u32tmp = readBE<uint32_t>(input);
 		grammar->preferred_targets.push_back(u32tmp);
 	}
@@ -200,7 +200,7 @@ int BinaryGrammar::readBinaryGrammar_10043(std::istream& input) {
 		u32tmp = readBE<uint32_t>(input);
 	}
 	auto num_par_pairs = u32tmp;
-	for (uint32_t i = 0; i < num_par_pairs; i++) {
+	for (uint32_t i = 0; i < num_par_pairs; ++i) {
 		auto left = readBE<uint32_t>(input);
 		auto right = readBE<uint32_t>(input);
 		grammar->parentheses[left] = right;
@@ -212,7 +212,7 @@ int BinaryGrammar::readBinaryGrammar_10043(std::istream& input) {
 		u32tmp = readBE<uint32_t>(input);
 	}
 	uint32_t num_par_anchors = u32tmp;
-	for (uint32_t i = 0; i < num_par_anchors; i++) {
+	for (uint32_t i = 0; i < num_par_anchors; ++i) {
 		auto left = readBE<uint32_t>(input);
 		auto right = readBE<uint32_t>(input);
 		grammar->anchors[left] = right;
@@ -224,7 +224,7 @@ int BinaryGrammar::readBinaryGrammar_10043(std::istream& input) {
 	}
 	auto num_sets = u32tmp;
 	grammar->sets_list.resize(num_sets);
-	for (uint32_t i = 0; i < num_sets; i++) {
+	for (uint32_t i = 0; i < num_sets; ++i) {
 		Set* s = grammar->allocateSet();
 
 		auto fields = readBE<uint32_t>(input);
@@ -252,14 +252,14 @@ int BinaryGrammar::readBinaryGrammar_10043(std::istream& input) {
 		if (fields & (1 << 4)) {
 			u32tmp = readBE<uint32_t>(input);
 			auto num_set_ops = u32tmp;
-			for (uint32_t j = 0; j < num_set_ops; j++) {
+			for (uint32_t j = 0; j < num_set_ops; ++j) {
 				u32tmp = readBE<uint32_t>(input);
 				s->set_ops.push_back(u32tmp);
 			}
 		}
 		if (fields & (1 << 5)) {
 			auto num_sets = readBE<uint32_t>(input);
-			for (uint32_t j = 0; j < num_sets; j++) {
+			for (uint32_t j = 0; j < num_sets; ++j) {
 				u32tmp = readBE<uint32_t>(input);
 				s->sets.push_back(u32tmp);
 			}
@@ -302,7 +302,7 @@ int BinaryGrammar::readBinaryGrammar_10043(std::istream& input) {
 	}
 	auto num_contexts = u32tmp;
 	contexts_list.resize(num_contexts);
-	for (uint32_t i = 0; i < num_contexts; i++) {
+	for (uint32_t i = 0; i < num_contexts; ++i) {
 		ContextualTest* t = readContextualTest_10043(input);
 		grammar->contexts[t->hash] = t;
 		contexts_list[i] = t;
@@ -314,7 +314,7 @@ int BinaryGrammar::readBinaryGrammar_10043(std::istream& input) {
 	}
 	auto num_rules = u32tmp;
 	grammar->rule_by_number.resize(num_rules);
-	for (uint32_t i = 0; i < num_rules; i++) {
+	for (uint32_t i = 0; i < num_rules; ++i) {
 		Rule* r = grammar->allocateRule();
 
 		auto fields = readBE<uint32_t>(input);
@@ -387,14 +387,14 @@ int BinaryGrammar::readBinaryGrammar_10043(std::istream& input) {
 		}
 
 		auto num_dep_tests = readBE<uint32_t>(input);
-		for (uint32_t j = 0; j < num_dep_tests; j++) {
+		for (uint32_t j = 0; j < num_dep_tests; ++j) {
 			u32tmp = readBE<uint32_t>(input);
 			ContextualTest* t = contexts_list[u32tmp - 1];
 			r->addContextualTest(t, r->dep_tests);
 		}
 
 		auto num_tests = readBE<uint32_t>(input);
-		for (uint32_t j = 0; j < num_tests; j++) {
+		for (uint32_t j = 0; j < num_tests; ++j) {
 			u32tmp = readBE<uint32_t>(input);
 			ContextualTest* t = contexts_list[u32tmp - 1];
 			r->addContextualTest(t, r->tests);
diff --git a/src/Grammar.cpp b/src/Grammar.cpp
index c0785472..1dc50f41 100644
--- a/src/Grammar.cpp
+++ b/src/Grammar.cpp
@@ -552,7 +552,7 @@ Tag* Grammar::allocateTag(const UChar* txt) {
 
 Tag* Grammar::addTag(Tag* tag) {
 	uint32_t hash = tag->rehash();
-	for (uint32_t seed = 0; seed < 10000; seed++) {
+	for (uint32_t seed = 0; seed < 10000; ++seed) {
 		uint32_t ih = hash + seed;
 		Taguint32HashMap::iterator it;
 		if ((it = single_tags.find(ih)) != single_tags.end()) {
diff --git a/src/GrammarApplicator.cpp b/src/GrammarApplicator.cpp
index 7155d376..1628956f 100644
--- a/src/GrammarApplicator.cpp
+++ b/src/GrammarApplicator.cpp
@@ -178,7 +178,7 @@ void GrammarApplicator::index() {
 
 	if (sections.empty()) {
 		int32_t smax = SI32(grammar->sections.size());
-		for (int32_t i = 0; i < smax; i++) {
+		for (int32_t i = 0; i < smax; ++i) {
 			for (auto r : grammar->rules) {
 				if (r->section < 0 || r->section > i) {
 					continue;
@@ -190,8 +190,8 @@ void GrammarApplicator::index() {
 	}
 	else {
 		numsections = UI32(sections.size());
-		for (uint32_t n = 0; n < numsections; n++) {
-			for (uint32_t e = 0; e <= n; e++) {
+		for (uint32_t n = 0; n < numsections; ++n) {
+			for (uint32_t e = 0; e <= n; ++e) {
 				for (auto r : grammar->rules) {
 					if (r->section != SI32(sections[e]) - 1) {
 						continue;
@@ -229,7 +229,7 @@ void GrammarApplicator::index() {
 Tag* GrammarApplicator::addTag(Tag* tag) {
 	uint32_t hash = tag->rehash();
 	uint32_t seed = 0;
-	for (; seed < 10000; seed++) {
+	for (; seed < 10000; ++seed) {
 		uint32_t ih = hash + seed;
 		Taguint32HashMap::iterator it;
 		if ((it = grammar->single_tags.find(ih)) != grammar->single_tags.end()) {
@@ -387,7 +387,7 @@ void GrammarApplicator::printReading(const Reading* reading, std::ostream& outpu
 			}
 			unique.insert(tter);
 		}
-		const Tag* tag = grammar->single_tags[tter];
+		auto tag = grammar->single_tags[tter];
 		if (tag->type & T_DEPENDENCY && has_dep && !dep_original) {
 			continue;
 		}
@@ -401,14 +401,13 @@ void GrammarApplicator::printReading(const Reading* reading, std::ostream& outpu
 		if (!reading->parent->dep_self) {
 			reading->parent->dep_self = reading->parent->global_number;
 		}
-		const Cohort* pr = nullptr;
-		pr = reading->parent;
+		auto pr = reading->parent;
 		if (reading->parent->dep_parent != DEP_NO_PARENT) {
 			if (reading->parent->dep_parent == 0) {
 				pr = reading->parent->parent->cohorts[0];
 			}
-			else if (reading->parent->parent->parent->cohort_map.find(reading->parent->dep_parent) != reading->parent->parent->parent->cohort_map.end()) {
-				pr = reading->parent->parent->parent->cohort_map[reading->parent->dep_parent];
+			else if (gWindow->cohort_map.find(reading->parent->dep_parent) != gWindow->cohort_map.end()) {
+				pr = gWindow->cohort_map[reading->parent->dep_parent];
 			}
 		}
 
@@ -681,7 +680,7 @@ void GrammarApplicator::pipeOutSingleWindow(const SingleWindow& window, Process&
 	auto cs = UI32(window.cohorts.size()) - 1;
 	writeRaw(ss, cs);
 
-	for (uint32_t c = 1; c < cs + 1; c++) {
+	for (uint32_t c = 1; c < cs + 1; ++c) {
 		pipeOutCohort(window.cohorts[c], ss);
 	}
 
diff --git a/src/GrammarApplicator_matchSet.cpp b/src/GrammarApplicator_matchSet.cpp
index 8a5e2834..7b6e5d9e 100644
--- a/src/GrammarApplicator_matchSet.cpp
+++ b/src/GrammarApplicator_matchSet.cpp
@@ -803,12 +803,12 @@ bool GrammarApplicator::doesSetMatchReading(const Reading& reading, const uint32
 				++i;
 			}
 			if (match) {
-				match_sub++;
+				++match_sub;
 				retval = true;
 				break;
 			}
 			if (failfast) {
-				match_sub++;
+				++match_sub;
 				retval = false;
 				break;
 			}
diff --git a/src/GrammarApplicator_reflow.cpp b/src/GrammarApplicator_reflow.cpp
index 644701fa..5301c335 100644
--- a/src/GrammarApplicator_reflow.cpp
+++ b/src/GrammarApplicator_reflow.cpp
@@ -645,7 +645,7 @@ void GrammarApplicator::splitMappings(TagList& mappings, Cohort& cohort, Reading
 			nr->mapping = ttag;
 		}
 		cohort.appendReading(nr);
-		numReadings++;
+		++numReadings;
 	}
 
 	reading.mapped = mapped;
diff --git a/src/GrammarApplicator_runContextualTest.cpp b/src/GrammarApplicator_runContextualTest.cpp
index e519f97d..55e0417f 100644
--- a/src/GrammarApplicator_runContextualTest.cpp
+++ b/src/GrammarApplicator_runContextualTest.cpp
@@ -477,7 +477,7 @@ Cohort* GrammarApplicator::runContextualTest(SingleWindow* sWindow, size_t posit
 				goto label_gotACohort;
 			}
 
-			for (uint32_t i = 1; left || right; i++) {
+			for (uint32_t i = 1; left || right; ++i) {
 				if (left) {
 					rvs = 0;
 					cohort = runSingleTest(left, lpos - i, test, rvs, &retval, deep, origin);
diff --git a/src/GrammarApplicator_runGrammar.cpp b/src/GrammarApplicator_runGrammar.cpp
index 69db28e8..033fc156 100644
--- a/src/GrammarApplicator_runGrammar.cpp
+++ b/src/GrammarApplicator_runGrammar.cpp
@@ -80,7 +80,7 @@ Reading* GrammarApplicator::initEmptyCohort(Cohort& cCohort) {
 	addTagToReading(*cReading, cCohort.wordform);
 	cReading->noprint = true;
 	cCohort.appendReading(cReading);
-	numReadings++;
+	++numReadings;
 	return cReading;
 }
 
@@ -212,7 +212,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 				lSWindow = cSWindow;
 				cSWindow = nullptr;
 				cCohort = nullptr;
-				numCohorts++;
+				++numCohorts;
 				did_soft_lookback = false;
 			}
 			if (cCohort && (cSWindow->cohorts.size() >= hard_limit || (!dep_delimit && grammar->delimiters && doesSetMatchCohortNormal(*cCohort, grammar->delimiters->number)))) {
@@ -230,7 +230,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 				lSWindow = cSWindow;
 				cSWindow = nullptr;
 				cCohort = nullptr;
-				numCohorts++;
+				++numCohorts;
 				did_soft_lookback = false;
 			}
 			if (!cSWindow) {
@@ -247,7 +247,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 
 				lSWindow = cSWindow;
 				cCohort = nullptr;
-				numWindows++;
+				++numWindows;
 				did_soft_lookback = false;
 			}
 			if (cCohort && cSWindow) {
@@ -271,7 +271,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 			lCohort = cCohort;
 			lReading = nullptr;
 			indents.clear();
-			numCohorts++;
+			++numCohorts;
 			cCohort->line_number = numLines;
 
 			space += 2;
@@ -396,7 +396,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 				readings->back()->rehash();
 			}
 			indents.push_back(std::make_pair(indent, cReading));
-			numReadings++;
+			++numReadings;
 
 			// Check whether the cohort still belongs to the window, as per --dep-delimit
 			if (!is_deleted && dep_delimit && dep_highest_seen && (cCohort->dep_self <= dep_highest_seen || cCohort->dep_self - dep_highest_seen > dep_delimit)) {
@@ -645,7 +645,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 						lSWindow = cSWindow;
 						cSWindow = nullptr;
 						cCohort = nullptr;
-						numCohorts++;
+						++numCohorts;
 						did_soft_lookback = false;
 					}
 					else if (lCohort) {
diff --git a/src/GrammarApplicator_runRules.cpp b/src/GrammarApplicator_runRules.cpp
index 95a31058..fad57670 100644
--- a/src/GrammarApplicator_runRules.cpp
+++ b/src/GrammarApplicator_runRules.cpp
@@ -1332,7 +1332,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
 				if (selected.size() < target->readings.size() && !selected.empty()) {
 					ReadingList drop;
 					size_t si = 0;
-					for (size_t ri = 0; ri < target->readings.size(); ri++) {
+					for (size_t ri = 0; ri < target->readings.size(); ++ri) {
 						// Manually trace, since reading_cb doesn't get called on non-matching readings
 						Reading* rd = target->readings[ri];
 						if (rule->sub_reading != 32767) {
@@ -1342,7 +1342,7 @@ uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const
 							rd->hit_by.push_back(rule->number);
 						}
 						if (si < selected.size() && target->readings[ri] == selected[si]) {
-							si++;
+							++si;
 						}
 						else {
 							target->readings[ri]->deleted = true;
diff --git a/src/GrammarWriter.cpp b/src/GrammarWriter.cpp
index 53aff716..e58b5be3 100644
--- a/src/GrammarWriter.cpp
+++ b/src/GrammarWriter.cpp
@@ -79,7 +79,7 @@ void GrammarWriter::printSet(std::ostream& output, const Set& curset) {
 		}
 		u_fprintf(output, "SET %S = ", n);
 		u_fprintf(output, "%S ", grammar->sets_list[curset.sets[0]]->name.data());
-		for (uint32_t i = 0; i < curset.sets.size() - 1; i++) {
+		for (uint32_t i = 0; i < curset.sets.size() - 1; ++i) {
 			u_fprintf(output, "%S %S ", stringbits[curset.set_ops[i]].data(), grammar->sets_list[curset.sets[i + 1]]->name.data());
 		}
 		u_fprintf(output, " ;\n\n");
@@ -287,7 +287,7 @@ void GrammarWriter::printRule(std::ostream& to, const Rule& rule) {
 	}
 	u_fprintf(to, " ");
 
-	for (uint32_t i = 0; i < FLAGS_COUNT; i++) {
+	for (uint32_t i = 0; i < FLAGS_COUNT; ++i) {
 		if (i == FL_BEFORE || i == FL_AFTER || i == FL_WITHCHILD) {
 			continue;
 		}
@@ -544,7 +544,7 @@ void GrammarWriter::printContextualTest(std::ostream& to, const ContextualTest&
 			u_fprintf(to, "(");
 			printContextualTest(to, **iter);
 			u_fprintf(to, ")");
-			iter++;
+			++iter;
 			if (iter != test.ors.end()) {
 				u_fprintf(to, " OR ");
 			}
diff --git a/src/MatxinApplicator.cpp b/src/MatxinApplicator.cpp
index 0f9acf2e..bbd338bf 100644
--- a/src/MatxinApplicator.cpp
+++ b/src/MatxinApplicator.cpp
@@ -177,7 +177,7 @@ void MatxinApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
 				lSWindow = cSWindow;
 				cSWindow = nullptr;
 				cCohort = nullptr;
-				numCohorts++;
+				++numCohorts;
 			} // end >= soft_limit
 			if (cCohort && (cSWindow->cohorts.size() >= hard_limit || (grammar->delimiters && doesSetMatchCohortNormal(*cCohort, grammar->delimiters->number)))) {
 				if (!is_conv && cSWindow->cohorts.size() >= hard_limit) {
@@ -192,7 +192,7 @@ void MatxinApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
 				lSWindow = cSWindow;
 				cSWindow = nullptr;
 				cCohort = nullptr;
-				numCohorts++;
+				++numCohorts;
 			} // end >= hard_limit
 			// If we don't have a current window, create one
 			if (!cSWindow) {
@@ -216,7 +216,7 @@ void MatxinApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
 				lSWindow->text = firstblank;
 				firstblank.clear();
 				cCohort = nullptr;
-				numWindows++;
+				++numWindows;
 			} // created at least one cSWindow by now
 
 			// If the current cohort is looking ok, and we have an available
@@ -258,7 +258,7 @@ void MatxinApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
 
 			//u_fprintf(output, "# %S\n", wordform);
 			cCohort->wordform = addTag(wordform);
-			numCohorts++;
+			++numCohorts;
 
 			// We're now at the beginning of the readings
 			UString current_reading;
@@ -319,7 +319,7 @@ void MatxinApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
 					}
 
 					cCohort->appendReading(cReading);
-					numReadings++;
+					++numReadings;
 
 					current_reading.clear();
 
@@ -339,7 +339,7 @@ void MatxinApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
 					}
 
 					cCohort->appendReading(cReading);
-					numReadings++;
+					++numReadings;
 
 					current_reading.clear();
 					continue; // while not $
@@ -790,7 +790,7 @@ void MatxinApplicator::procNode(int& depth, std::map<int, Node>& nodes, std::map
 	const UChar* si = node.si.data() + !node.si.empty();
 
 	if (n != 0) {
-		for (int i = 0; i < depth * 2; i++) {
+		for (int i = 0; i < depth * 2; ++i) {
 			u_fprintf(output, " ");
 		}
 
@@ -818,7 +818,7 @@ void MatxinApplicator::procNode(int& depth, std::map<int, Node>& nodes, std::map
 	}
 
 	if (n != 0) {
-		for (int i = 0; i < depth * 2; i++) {
+		for (int i = 0; i < depth * 2; ++i) {
 			u_fprintf(output, " ");
 		}
 
diff --git a/src/MweSplitApplicator.cpp b/src/MweSplitApplicator.cpp
index 1869deb4..585fe568 100644
--- a/src/MweSplitApplicator.cpp
+++ b/src/MweSplitApplicator.cpp
@@ -178,7 +178,7 @@ void MweSplitApplicator::printSingleWindow(SingleWindow* window, std::ostream& o
 	}
 
 	auto cs = UI32(window->cohorts.size());
-	for (uint32_t c = 0; c < cs; c++) {
+	for (uint32_t c = 0; c < cs; ++c) {
 		Cohort* cohort = window->cohorts[c];
 		std::vector<Cohort*> cs = splitMwe(cohort);
 		for (auto& iter : cs) {
diff --git a/src/NicelineApplicator.cpp b/src/NicelineApplicator.cpp
index 24537121..9536c5ac 100644
--- a/src/NicelineApplicator.cpp
+++ b/src/NicelineApplicator.cpp
@@ -137,7 +137,7 @@ void NicelineApplicator::runGrammarOnText(std::istream& input, std::ostream& out
 				lSWindow = cSWindow;
 				cSWindow = nullptr;
 				cCohort = nullptr;
-				numCohorts++;
+				++numCohorts;
 				did_soft_lookback = false;
 			}
 			if (cCohort && (cSWindow->cohorts.size() >= hard_limit || (!dep_delimit && grammar->delimiters && doesSetMatchCohortNormal(*cCohort, grammar->delimiters->number)))) {
@@ -153,7 +153,7 @@ void NicelineApplicator::runGrammarOnText(std::istream& input, std::ostream& out
 				lSWindow = cSWindow;
 				cSWindow = nullptr;
 				cCohort = nullptr;
-				numCohorts++;
+				++numCohorts;
 				did_soft_lookback = false;
 			}
 			if (!cSWindow) {
@@ -163,7 +163,7 @@ void NicelineApplicator::runGrammarOnText(std::istream& input, std::ostream& out
 
 				lSWindow = cSWindow;
 				cCohort = nullptr;
-				numWindows++;
+				++numWindows;
 				did_soft_lookback = false;
 			}
 			if (cCohort && cSWindow) {
@@ -190,7 +190,7 @@ void NicelineApplicator::runGrammarOnText(std::istream& input, std::ostream& out
 			cCohort->global_number = gWindow->cohort_counter++;
 			cCohort->wordform = addTag(tag);
 			lCohort = cCohort;
-			numCohorts++;
+			++numCohorts;
 
 			++space;
 			while (space && space[0]) {
@@ -257,7 +257,7 @@ void NicelineApplicator::runGrammarOnText(std::istream& input, std::ostream& out
 					splitMappings(mappings, *cCohort, *cReading, true);
 				}
 				cCohort->appendReading(cReading);
-				numReadings++;
+				++numReadings;
 
 				if (tab) {
 					space = ++tab;
@@ -359,8 +359,8 @@ void NicelineApplicator::printReading(const Reading* reading, std::ostream& outp
 			if (reading->parent->dep_parent == 0) {
 				pr = reading->parent->parent->cohorts[0];
 			}
-			else if (reading->parent->parent->parent->cohort_map.find(reading->parent->dep_parent) != reading->parent->parent->parent->cohort_map.end()) {
-				pr = reading->parent->parent->parent->cohort_map[reading->parent->dep_parent];
+			else if (gWindow->cohort_map.find(reading->parent->dep_parent) != gWindow->cohort_map.end()) {
+				pr = gWindow->cohort_map[reading->parent->dep_parent];
 			}
 		}
 
diff --git a/src/PlaintextApplicator.cpp b/src/PlaintextApplicator.cpp
index 10f2370e..0c9d37fb 100644
--- a/src/PlaintextApplicator.cpp
+++ b/src/PlaintextApplicator.cpp
@@ -129,7 +129,7 @@ void PlaintextApplicator::runGrammarOnText(std::istream& input, std::ostream& ou
 				lCohort = cCohort;
 				cSWindow = nullptr;
 				cCohort = nullptr;
-				numCohorts++;
+				++numCohorts;
 				did_soft_lookback = false;
 			}
 			if (cCohort && (cSWindow->cohorts.size() >= hard_limit || (!dep_delimit && grammar->delimiters && doesSetMatchCohortNormal(*cCohort, grammar->delimiters->number)))) {
@@ -146,7 +146,7 @@ void PlaintextApplicator::runGrammarOnText(std::istream& input, std::ostream& ou
 				lCohort = cCohort;
 				cSWindow = nullptr;
 				cCohort = nullptr;
-				numCohorts++;
+				++numCohorts;
 				did_soft_lookback = false;
 			}
 			if (!cSWindow) {
@@ -157,7 +157,7 @@ void PlaintextApplicator::runGrammarOnText(std::istream& input, std::ostream& ou
 				lSWindow = cSWindow;
 				lCohort = cSWindow->cohorts[0];
 				cCohort = nullptr;
-				numWindows++;
+				++numWindows;
 				did_soft_lookback = false;
 			}
 			if (gWindow->next.size() > num_windows) {
@@ -227,7 +227,7 @@ void PlaintextApplicator::runGrammarOnText(std::istream& input, std::ostream& ou
 				tag.append(u">\"");
 				cCohort->wordform = addTag(tag);
 				lCohort = cCohort;
-				numCohorts++;
+				++numCohorts;
 				cReading = initEmptyCohort(*cCohort);
 				cReading->noprint = !add_tags;
 				if (add_tags) {
diff --git a/src/TextualParser.cpp b/src/TextualParser.cpp
index 8c7c6cad..96adfadc 100644
--- a/src/TextualParser.cpp
+++ b/src/TextualParser.cpp
@@ -2678,25 +2678,32 @@ void TextualParser::parseFromUChar(UChar* input, const char* fname) {
 					grammar_size = static_cast<size_t>(_stat.st_size);
 				}
 
-				UFILE* grammar = u_fopen(abspath.data(), "rb", nullptr, nullptr);
-				if (!grammar) {
-					u_fprintf(ux_stderr, "%s: Error: Error opening %s for reading!\n", filebase, abspath.data());
-					CG3Quit(1);
-				}
-				UChar32 bom = u_fgetcx(grammar);
-				if (bom != 0xfeff && bom != static_cast<UChar32>(0xffffffff)) {
-					u_fungetc(bom, grammar);
+				std::string buf;
+				buf.resize(grammar_size);
+				{
+					std::ifstream grammar(abspath.data(), std::ios::binary);
+					if (!grammar) {
+						u_fprintf(ux_stderr, "%s: Error: Error opening %s for reading!\n", filebase, abspath.data());
+						CG3Quit(1);
+					}
+					if (!grammar.read(&buf[0], grammar_size)) {
+						u_fprintf(ux_stderr, "%s: Error: Error reading %s!\n", filebase, abspath.data());
+						CG3Quit(1);
+					}
+					if (buf[0] == '\xEF' && buf[1] == '\xBB' && buf[2] == '\xBF') {
+						buf.erase(0, 3);
+					}
 				}
 
 				grammarbufs.emplace_back(new UString(grammar_size * 2, 0));
 				auto& data = *grammarbufs.back().get();
-				uint32_t read = u_file_read(&data[4], SI32(grammar_size * 2), grammar);
-				u_fclose(grammar);
-				if (read >= grammar_size * 2 - 1) {
+				int32_t size = 0;
+				u_strFromUTF8(&data[4], SI32(grammar_size * 2), &size, buf.data(), SI32(buf.size()), &err);
+				if (size >= SI32(grammar_size * 2 - 1)) {
 					u_fprintf(ux_stderr, "%s: Error: Converting from underlying codepage to UTF-16 exceeded factor 2 buffer.\n", filebase);
 					CG3Quit(1);
 				}
-				data.resize(read + 4 + 1);
+				data.resize(size + 4 + 1);
 
 				uint32_t olines = 0;
 				swapper oswap(true, olines, result->lines);
@@ -2877,26 +2884,34 @@ int TextualParser::parse_grammar(const char* fname) {
 		result->grammar_size = static_cast<size_t>(_stat.st_size);
 	}
 
-	UFILE* grammar = u_fopen(filename, "rb", nullptr, nullptr);
-	if (!grammar) {
-		u_fprintf(ux_stderr, "%s: Error: Error opening %s for reading!\n", filebase, filename);
-		CG3Quit(1);
-	}
-	UChar32 bom = u_fgetcx(grammar);
-	if (bom != 0xfeff && bom != static_cast<UChar32>(0xffffffff)) {
-		u_fungetc(bom, grammar);
+	std::string buf;
+	buf.resize(result->grammar_size);
+	{
+		std::ifstream grammar(filename, std::ios::binary);
+		if (!grammar) {
+			u_fprintf(ux_stderr, "%s: Error: Error opening %s for reading!\n", filebase, filename);
+			CG3Quit(1);
+		}
+		if (!grammar.read(&buf[0], result->grammar_size)) {
+			u_fprintf(ux_stderr, "%s: Error: Error reading %s!\n", filebase, filename);
+			CG3Quit(1);
+		}
+		if (buf[0] == '\xEF' && buf[1] == '\xBB' && buf[2] == '\xBF') {
+			buf.erase(0, 3);
+		}
 	}
 
 	// It reads into the buffer at offset 4 because certain functions may look back, so we need some nulls in front.
 	grammarbufs.emplace_back(new UString(result->grammar_size * 2, 0));
 	auto& data = *grammarbufs.back().get();
-	uint32_t read = u_file_read(&data[4], SI32(result->grammar_size * 2), grammar);
-	u_fclose(grammar);
-	if (read >= result->grammar_size * 2 - 1) {
+	int32_t size = 0;
+	UErrorCode err = U_ZERO_ERROR;
+	u_strFromUTF8(&data[4], SI32(result->grammar_size * 2), &size, buf.data(), SI32(buf.size()), &err);
+	if (size >= SI32(result->grammar_size * 2 - 1)) {
 		u_fprintf(ux_stderr, "%s: Error: Converting from underlying codepage to UTF-16 exceeded factor 2 buffer.\n", filebase);
 		CG3Quit(1);
 	}
-	data.resize(read + 4 + 1);
+	data.resize(size + 4 + 1);
 
 	return parse_grammar(data);
 }
@@ -2909,12 +2924,11 @@ int TextualParser::parse_grammar(const char* buffer, size_t length) {
 	grammarbufs.emplace_back(new UString(length * 2, 0));
 	auto& data = *grammarbufs.back().get();
 
+	int32_t size = 0;
 	UErrorCode err = U_ZERO_ERROR;
-	UConverter* conv = ucnv_open("UTF-8", &err);
-	auto tmp = ucnv_toUChars(conv, &data[4], SI32(length * 2), buffer, SI32(length), &err);
-
-	if (static_cast<size_t>(tmp) >= length * 2 - 1) {
-		u_fprintf(ux_stderr, "%s: Error: Converting from underlying codepage to UTF-16 exceeded factor 2 buffer!\n", filebase);
+	u_strFromUTF8(&data[4], SI32(result->grammar_size * 2), &size, buffer, SI32(length), &err);
+	if (size >= SI32(result->grammar_size * 2 - 1)) {
+		u_fprintf(ux_stderr, "%s: Error: Converting from underlying codepage to UTF-16 exceeded factor 2 buffer.\n", filebase);
 		CG3Quit(1);
 	}
 
@@ -3029,7 +3043,7 @@ int TextualParser::parse_grammar(UString& data) {
 	// Create context sets for nested rules
 	{
 		constexpr UStringView grp[] = { STR_UU_C1, STR_UU_C2, STR_UU_C3, STR_UU_C4, STR_UU_C5, STR_UU_C6, STR_UU_C7, STR_UU_C8, STR_UU_C9 };
-		for (size_t i = 0; i < 9; i++) {
+		for (size_t i = 0; i < 9; ++i) {
 			Set* set_c = result->allocateSet();
 			set_c->line = 0;
 			set_c->setName(grp[i]);
diff --git a/src/Window.cpp b/src/Window.cpp
index 0f6395a0..52177ce6 100644
--- a/src/Window.cpp
+++ b/src/Window.cpp
@@ -43,14 +43,14 @@ Window::~Window() {
 
 SingleWindow* Window::allocSingleWindow() {
 	SingleWindow* swindow = alloc_swindow(this);
-	window_counter++;
+	++window_counter;
 	swindow->number = window_counter;
 	return swindow;
 }
 
 SingleWindow* Window::allocPushSingleWindow() {
 	SingleWindow* swindow = alloc_swindow(this);
-	window_counter++;
+	++window_counter;
 	swindow->number = window_counter;
 	if (!next.empty()) {
 		swindow->next = next.front();
@@ -66,7 +66,7 @@ SingleWindow* Window::allocPushSingleWindow() {
 
 SingleWindow* Window::allocAppendSingleWindow() {
 	SingleWindow* swindow = alloc_swindow(this);
-	window_counter++;
+	++window_counter;
 	swindow->number = window_counter;
 	if (!next.empty()) {
 		swindow->previous = next.back();
diff --git a/src/cg-conv.cpp b/src/cg-conv.cpp
index 719d29ad..7c9f593d 100644
--- a/src/cg-conv.cpp
+++ b/src/cg-conv.cpp
@@ -63,13 +63,13 @@ int main(int argc, char* argv[]) {
 		fprintf(out, "Options:\n");
 
 		size_t longest = 0;
-		for (uint32_t i = 0; i < options_conv.size(); i++) {
+		for (uint32_t i = 0; i < options_conv.size(); ++i) {
 			if (!options_conv[i].description.empty()) {
 				size_t len = strlen(options_conv[i].longName);
 				longest = std::max(longest, len);
 			}
 		}
-		for (uint32_t i = 0; i < options_conv.size(); i++) {
+		for (uint32_t i = 0; i < options_conv.size(); ++i) {
 			if (!options_conv[i].description.empty() && options_conv[i].description[0] != '!') {
 				fprintf(out, " ");
 				if (options_conv[i].shortName) {
diff --git a/src/cg-mwesplit.cpp b/src/cg-mwesplit.cpp
index e13581ae..4ac8dc37 100644
--- a/src/cg-mwesplit.cpp
+++ b/src/cg-mwesplit.cpp
@@ -63,13 +63,13 @@ int main(int argc, char** argv) {
 		fprintf(out, "Options:\n");
 
 		size_t longest = 0;
-		for (uint32_t i = 0; i < NUM_OPTIONS_MWE; i++) {
+		for (uint32_t i = 0; i < NUM_OPTIONS_MWE; ++i) {
 			if (!options_mwe[i].description.empty()) {
 				size_t len = strlen(options_mwe[i].longName);
 				longest = std::max(longest, len);
 			}
 		}
-		for (uint32_t i = 0; i < NUM_OPTIONS_MWE; i++) {
+		for (uint32_t i = 0; i < NUM_OPTIONS_MWE; ++i) {
 			if (!options_mwe[i].description.empty() && options_mwe[i].description[0] != '!') {
 				fprintf(out, " ");
 				if (options_mwe[i].shortName) {
diff --git a/src/cg-proc.cpp b/src/cg-proc.cpp
index f39cb20a..1f7e992b 100644
--- a/src/cg-proc.cpp
+++ b/src/cg-proc.cpp
@@ -327,7 +327,7 @@ int main(int argc, char* argv[]) {
 
 	applicator->setGrammar(&grammar);
 	applicator->setOptions();
-	for (int32_t i = 1; i <= sections; i++) {
+	for (int32_t i = 1; i <= sections; ++i) {
 		applicator->sections.push_back(i);
 	}
 
diff --git a/src/inlines.hpp b/src/inlines.hpp
index 7db76b32..48c53f91 100644
--- a/src/inlines.hpp
+++ b/src/inlines.hpp
@@ -305,7 +305,7 @@ template<typename Char>
 inline bool ISESC(const Char* p) {
 	uint32_t a = 1;
 	while (*(p - a) == '\\') {
-		a++;
+		++a;
 	}
 	return (a % 2 == 0);
 }
diff --git a/src/main.cpp b/src/main.cpp
index 744488aa..669ef612 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -86,13 +86,13 @@ int main(int argc, char* argv[]) {
 		fprintf(out, "Options:\n");
 
 		size_t longest = 0;
-		for (uint32_t i = 0; i < options.size(); i++) {
+		for (uint32_t i = 0; i < options.size(); ++i) {
 			if (!options[i].description.empty()) {
 				size_t len = strlen(options[i].longName);
 				longest = std::max(longest, len);
 			}
 		}
-		for (uint32_t i = 0; i < options.size(); i++) {
+		for (uint32_t i = 0; i < options.size(); ++i) {
 			if (!options[i].description.empty()) {
 				fprintf(out, " ");
 				if (options[i].shortName) {
diff --git a/src/parser_helpers.hpp b/src/parser_helpers.hpp
index f5975af9..825a11e3 100644
--- a/src/parser_helpers.hpp
+++ b/src/parser_helpers.hpp
@@ -54,7 +54,7 @@ Tag* parseTag(const UChar* to, const UChar* p, State& state, bool unescape=true)
 		const UChar* tmp = to;
 		while (tmp[0] && tmp[0] == '^') {
 			tag->type |= T_FAILFAST;
-			tmp++;
+			++tmp;
 		}
 
 		size_t length = u_strlen(tmp);
diff --git a/src/uextras.hpp b/src/uextras.hpp
index 0d77c15e..3062c74e 100644
--- a/src/uextras.hpp
+++ b/src/uextras.hpp
@@ -150,7 +150,7 @@ inline int ux_isSetOp(const UChar* it) {
 inline bool ux_isEmpty(const UChar* text) {
 	size_t length = u_strlen(text);
 	if (length > 0) {
-		for (size_t i = 0; i < length; i++) {
+		for (size_t i = 0; i < length; ++i) {
 			if (!ISSPACE(text[i])) {
 				return false;
 			}

From 9ddc564947bcba9ab4da605136a4f797312c0787 Mon Sep 17 00:00:00 2001
From: Tino Didriksen <mail@tinodidriksen.com>
Date: Fri, 29 Aug 2025 13:22:25 +0200
Subject: [PATCH 32/42] Move text belonging to removed cohorts to prior
 not-removed cohorts, or the containing window (63/69)

---
 src/BinaryApplicator.cpp             | 18 ++++++++++++++++++
 src/FormatConverter.hpp              |  3 ---
 src/GrammarApplicator.hpp            |  4 ++++
 src/GrammarApplicator_runGrammar.cpp |  6 ++++++
 4 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 2cb46a04..886540a8 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -387,6 +387,24 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 	  }
   }
 
+  // Move text belonging to removed cohorts to prior not-removed cohorts, or the containing window
+  for (size_t i = 0; i < window->all_cohorts.size(); ++i) {
+	  auto cohort = window->all_cohorts[i];
+	  if (cohort->local_number == 0 || (cohort->type & CT_REMOVED)) {
+		  if (!cohort->text.empty()) {
+			  for (size_t j = i; j > 0; --j) {
+				  if (window->all_cohorts[j - 1]->local_number == 0 || (window->all_cohorts[j - 1]->type & CT_REMOVED)) {
+					  continue;
+				  }
+				  window->all_cohorts[j-1]->text += cohort->text;
+				  cohort->text.clear();
+			  }
+			  window->text += cohort->text;
+			  cohort->text.clear();
+		  }
+	  }
+  }
+
   std::string cohort_buffer;
   uint16_t cohort_count = 0;
   for (auto& cohort : window->all_cohorts) {
diff --git a/src/FormatConverter.hpp b/src/FormatConverter.hpp
index 16f6cff6..a9cb1449 100644
--- a/src/FormatConverter.hpp
+++ b/src/FormatConverter.hpp
@@ -29,7 +29,6 @@
 #include "NicelineApplicator.hpp"
 #include "PlaintextApplicator.hpp"
 #include "Grammar.hpp"
-#include "cg3.h"
 
 namespace CG3 {
 
@@ -42,8 +41,6 @@ class FormatConverter : public ApertiumApplicator, public BinaryApplicator, publ
 	void runGrammarOnText(std::istream& input, std::ostream& output);
 
 	std::unique_ptr<std::istream> detectFormat(std::istream& in);
-	cg3_sformat fmt_input = CG3SF_CG;
-	cg3_sformat fmt_output = CG3SF_CG;
 
 	Grammar conv_grammar;
 
diff --git a/src/GrammarApplicator.hpp b/src/GrammarApplicator.hpp
index 8f03d5bb..c10b6721 100644
--- a/src/GrammarApplicator.hpp
+++ b/src/GrammarApplicator.hpp
@@ -31,6 +31,7 @@
 #include "interval_vector.hpp"
 #include "flat_unordered_set.hpp"
 #include "scoped_stack.hpp"
+#include "cg3.h"
 #include <deque>
 
 class Process;
@@ -126,6 +127,9 @@ class GrammarApplicator {
 	bool add_spacing = true;
 	bool print_ids = false;
 
+	cg3_sformat fmt_input = CG3SF_CG;
+	cg3_sformat fmt_output = CG3SF_CG;
+
 	bool dep_has_spanned = false;
 	uint32_t dep_delimit = 0;
 	bool dep_absolute = false;
diff --git a/src/GrammarApplicator_runGrammar.cpp b/src/GrammarApplicator_runGrammar.cpp
index 033fc156..81ace7fa 100644
--- a/src/GrammarApplicator_runGrammar.cpp
+++ b/src/GrammarApplicator_runGrammar.cpp
@@ -145,6 +145,12 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 
 	ux_stripBOM(input);
 
+	if (fmt_output == CG3SF_BINARY) {
+		cSWindow = gWindow->allocAppendSingleWindow();
+		initEmptySingleWindow(cSWindow);
+		lSWindow = cSWindow;
+	}
+
 	while (!input.eof()) {
 		++lines;
 		auto packoff = get_line_clean(line, cleaned, input);

From 7a3bdb47d7884e6a7ed828c3db7fd6790a462e02 Mon Sep 17 00:00:00 2001
From: Tino Didriksen <mail@tinodidriksen.com>
Date: Fri, 29 Aug 2025 13:27:20 +0200
Subject: [PATCH 33/42] Baseform/wordform type isn't enough to exclude (64/69)

---
 src/BinaryApplicator.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index 886540a8..f9de5528 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -229,8 +229,7 @@ bool BinaryApplicator::readWindow() {
 		addTagToReading(*cCohort->wread, cCohort->wordform);
 		for (uint16_t tn = 0; tn < tag_count; ++tn) {
 			READ_U16_INTO(tag);
-			addTagToReading(*cCohort->wread, window_tags[tag],
-							(tn + 1 == tag_count));
+			addTagToReading(*cCohort->wread, window_tags[tag], (tn + 1 == tag_count));
 		}
     }
 
@@ -496,7 +495,10 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 			uint32SortedVector unique;
 			for (auto& tter : reading->tags_list) {
 				auto tag = grammar->single_tags[tter];
-				if (tag->type & (T_WORDFORM | T_BASEFORM | T_DEPENDENCY | T_RELATION)) {
+				if (tter == reading->baseform || tter == reading->parent->wordform->hash) {
+					continue;
+				}
+				if (tag->type & (T_DEPENDENCY | T_RELATION)) {
 					continue;
 				}
 				if (unique_tags) {

From 6c82135e18f220ecd46e76a211b05b796f3e69e3 Mon Sep 17 00:00:00 2001
From: Tino Didriksen <mail@tinodidriksen.com>
Date: Fri, 29 Aug 2025 13:47:00 +0200
Subject: [PATCH 34/42] Create window for trailing vars (65/69)

---
 src/GrammarApplicator_runGrammar.cpp | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/GrammarApplicator_runGrammar.cpp b/src/GrammarApplicator_runGrammar.cpp
index 81ace7fa..3171fb07 100644
--- a/src/GrammarApplicator_runGrammar.cpp
+++ b/src/GrammarApplicator_runGrammar.cpp
@@ -145,6 +145,15 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 
 	ux_stripBOM(input);
 
+	auto adopt_variables = [&]() {
+		cSWindow->variables_set.insert(variables_set.begin(), variables_set.end());
+		variables_set.clear();
+		cSWindow->variables_rem.insert(variables_rem.begin(), variables_rem.end());
+		variables_rem.clear();
+		cSWindow->variables_output.insert(variables_output.begin(), variables_output.end());
+		variables_output.clear();
+	};
+
 	if (fmt_output == CG3SF_BINARY) {
 		cSWindow = gWindow->allocAppendSingleWindow();
 		initEmptySingleWindow(cSWindow);
@@ -244,13 +253,6 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 				cSWindow = gWindow->allocAppendSingleWindow();
 				initEmptySingleWindow(cSWindow);
 
-				cSWindow->variables_set = variables_set;
-				variables_set.clear();
-				cSWindow->variables_rem = variables_rem;
-				variables_rem.clear();
-				cSWindow->variables_output = variables_output;
-				variables_output.clear();
-
 				lSWindow = cSWindow;
 				cCohort = nullptr;
 				++numWindows;
@@ -271,6 +273,9 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 					u_fflush(ux_stderr);
 				}
 			}
+			if (cSWindow->all_cohorts.size() == 1) {
+				adopt_variables();
+			}
 			cCohort = alloc_cohort(cSWindow);
 			cCohort->global_number = gWindow->cohort_counter++;
 			cCohort->wordform = addTag(&cleaned[0]);
@@ -685,6 +690,11 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 		cCohort = nullptr;
 		cSWindow = nullptr;
 	}
+	if (fmt_output == CG3SF_BINARY && !variables_output.empty()) {
+		cSWindow = gWindow->allocAppendSingleWindow();
+		initEmptySingleWindow(cSWindow);
+		adopt_variables();
+	}
 	while (!gWindow->next.empty()) {
 		gWindow->shuffleWindowsDown();
 		runGrammarOnWindow();

From 86a856cda1aeb4e4fc3f71bee332309eadc3612e Mon Sep 17 00:00:00 2001
From: Tino Didriksen <mail@tinodidriksen.com>
Date: Fri, 29 Aug 2025 17:01:11 +0200
Subject: [PATCH 35/42] 3 distinct packet types (67/69)

---
 src/BinaryApplicator.cpp             | 1031 ++++++++++++++------------
 src/BinaryApplicator.hpp             |   41 +-
 src/FormatConverter.cpp              |    8 +-
 src/GrammarApplicator.cpp            |   18 +-
 src/GrammarApplicator_runGrammar.cpp |   24 +-
 src/JsonlApplicator.cpp              |    2 +-
 src/inlines.hpp                      |  140 ++--
 test/runall.pl                       |    1 -
 8 files changed, 715 insertions(+), 550 deletions(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index f9de5528..ac0f19ed 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -29,524 +29,609 @@ BinaryApplicator::BinaryApplicator(std::ostream& ux_err)
 }
 
 void BinaryApplicator::runGrammarOnText(std::istream& input, std::ostream& output) {
-  ux_stdin = &input;
-  ux_stdout = &output;
-
-  if (!input.good()) {
-    u_fprintf(ux_stderr, "Error: Input is null - nothing to parse!\n");
-    CG3Quit(1);
-  }
-  if (input.eof()) {
-    u_fprintf(ux_stderr, "Error: Input is empty - nothing to parse!\n");
-    CG3Quit(1);
-  }
-  if (!output) {
-    u_fprintf(ux_stderr, "Error: Output is null - cannot write to nothing!\n");
-    CG3Quit(1);
-  }
-
-  if (!grammar) {
-    u_fprintf(ux_stderr, "Error: No grammar provided - cannot continue! Hint: call setGrammar() first.\n");
-    CG3Quit(1);
-  }
-
-  {
-    std::string header(8, 0);
-    if (!input.read(&header[0], 8)) {
-      u_fprintf(ux_stderr, "Error: Could not read stream header!\n");
-      CG3Quit(1);
-    }
-    if (!is_cg3bsf(header)) {
-      u_fprintf(ux_stderr, "Error: Stream does not start with magic bytes - cannot read as binary!\n");
-      CG3Quit(1);
-    }
-    uint32_t version = reinterpret_cast<uint32_t*>(&header[4])[0];
-    if (version != CG3_BINARY_STREAM) {
-      u_fprintf(ux_stderr, "Error: Stream is version %u but this reader only knows version %u!\n", version, CG3_BINARY_STREAM);
-      CG3Quit(1);
-    }
-  }
-
-  index();
-
-  uint32_t resetAfter = ((num_windows + 4) * 2 + 1);
-  bool flushAfter = false;
-
-  gWindow->window_span = num_windows;
-
-  auto flush = [&]() {
-    if (gWindow->back()) {
-      gWindow->back()->flush_after = true;
-    }
-
-    while (!gWindow->next.empty()) {
-      gWindow->shuffleWindowsDown();
-      runGrammarOnWindow();
-    }
-
-    gWindow->shuffleWindowsDown();
-    while (!gWindow->previous.empty()) {
-      SingleWindow* tmp = gWindow->previous.front();
-      printSingleWindow(tmp, output);
-      free_swindow(tmp);
-      gWindow->previous.erase(gWindow->previous.begin());
-    }
-    flushAfter = false;
-  };
-
-  while (!input.eof()) {
-    flushAfter = readWindow();
-    ++numWindows;
-    if (gWindow->next.size() > num_windows) {
-      gWindow->shuffleWindowsDown();
-      runGrammarOnWindow();
-      if (numWindows % resetAfter == 0) {
-	resetIndexes();
-      }
-    }
-    if (flushAfter) {
-      flush();
-    }
-  }
-  flush();
-}
+	ux_stdin = &input;
+	ux_stdout = &output;
 
-#define READ_U16_INTO(dest) \
-  do { \
-    (dest) = reinterpret_cast<uint16_t*>(&buf[pos])[0]; \
-    pos += 2; \
-  } while (false)
-
-#define READ_U32_INTO(dest) \
-  do { \
-    (dest) = reinterpret_cast<uint32_t*>(&buf[pos])[0]; \
-    pos += 4; \
-  } while (false)
-
-#define READ_STR_INTO(dest)			\
-  do { \
-    uint16_t tl = reinterpret_cast<uint16_t*>(&buf[pos])[0]; \
-    pos += 2; \
-    (dest).clear(); \
-    (dest).resize(tl, 0); \
-    int32_t olen = 0; \
-    UErrorCode status = U_ZERO_ERROR; \
-    u_strFromUTF8(&(dest)[0], tl, &olen, &buf[pos], tl, &status); \
-    (dest).resize(olen); \
-    pos += tl; \
-  } while (false)
-
-bool BinaryApplicator::readWindow() {
-  uint32_t cs = 0;
-  readRaw(*ux_stdin, cs);
-
-  if (ux_stdin->eof()) {
-    return true;
-  }
-
-  SingleWindow* cSWindow = gWindow->allocAppendSingleWindow();
-  initEmptySingleWindow(cSWindow);
-
-  std::string buf(cs, 0);
-  ux_stdin->read(&buf[0], cs);
-  uint32_t pos = 0;
-
-  // TODO: flags
-  uint16_t flags;
-  READ_U16_INTO(flags);
-  if (flags & BFW_FLUSH) {
-    cSWindow->flush_after = true;
-  }
-  if (flags & BFW_DEP_SPAN) {
-	  dep_has_spanned = true;
-  }
-
-  TagVector window_tags;
-  uint16_t tag_count;
-  READ_U16_INTO(tag_count);
-  window_tags.reserve(tag_count);
-  for (uint16_t i = 0; i < tag_count; ++i) {
-    UString tg;
-    READ_STR_INTO(tg);
-    window_tags.push_back(addTag(tg));
-	if (tg[0] == grammar->mapping_prefix) {
-		window_tags.back()->type |= T_MAPPING;
+	if (!input.good()) {
+		u_fprintf(ux_stderr, "Error: Input is null - nothing to parse!\n");
+		CG3Quit(1);
 	}
-	else {
-		window_tags.back()->type &= ~T_MAPPING;
+	if (input.eof()) {
+		u_fprintf(ux_stderr, "Error: Input is empty - nothing to parse!\n");
+		CG3Quit(1);
 	}
-  }
-
-  uint16_t var_count;
-  READ_U16_INTO(var_count);
-  for (uint16_t vn = 0; vn < var_count; ++vn) {
-	  char mode = buf[pos];
-	  ++pos;
-	  uint16_t tag1, tag2;
-	  READ_U16_INTO(tag1);
-	  READ_U16_INTO(tag2);
-	  auto hash1 = window_tags[tag1]->hash;
-	  if (mode == BFV_SETVAR) {
-		  cSWindow->variables_set[hash1] = window_tags[tag2]->hash;
-		  cSWindow->variables_rem.erase(hash1);
-		  cSWindow->variables_output.insert(hash1);
-	  }
-	  else if (mode == BFV_SETVAR_ANY) {
-		  cSWindow->variables_set[hash1] = grammar->tag_any;
-		  cSWindow->variables_rem.erase(hash1);
-		  cSWindow->variables_output.insert(hash1);
-	  }
-	  else if (mode == BFV_REMVAR) {
-		  cSWindow->variables_set.erase(hash1);
-		  cSWindow->variables_rem.insert(hash1);
-		  cSWindow->variables_output.insert(hash1);
-	  }
-  }
-
-  READ_STR_INTO(cSWindow->text);
-  READ_STR_INTO(cSWindow->text_post);
-
-  uint16_t cohort_count;
-  READ_U16_INTO(cohort_count);
-  uint16_t tag;
-  for (uint16_t cn = 0; cn < cohort_count; ++cn) {
-    Cohort* cCohort = alloc_cohort(cSWindow);
-    cCohort->global_number = gWindow->cohort_counter++;
-    ++numCohorts;
-
-    READ_U16_INTO(flags);
-	if (flags & BFC_RELATED) {
-		cCohort->type |= CT_RELATED;
-		has_relations = true;
+	if (!output) {
+		u_fprintf(ux_stderr, "Error: Output is null - cannot write to nothing!\n");
+		CG3Quit(1);
 	}
 
-    READ_U16_INTO(tag);
-    cCohort->wordform = window_tags[tag];
+	if (!grammar) {
+		u_fprintf(ux_stderr, "Error: No grammar provided - cannot continue! Hint: call setGrammar() first.\n");
+		CG3Quit(1);
+	}
 
-    READ_U16_INTO(tag_count);
-    if (tag_count) {
-		cCohort->wread = alloc_reading(cCohort);
-		addTagToReading(*cCohort->wread, cCohort->wordform);
-		for (uint16_t tn = 0; tn < tag_count; ++tn) {
-			READ_U16_INTO(tag);
-			addTagToReading(*cCohort->wread, window_tags[tag], (tn + 1 == tag_count));
+	{
+		std::string header(8, 0);
+		if (!input.read(&header[0], 8)) {
+			u_fprintf(ux_stderr, "Error: Could not read stream header!\n");
+			CG3Quit(1);
+		}
+		if (!is_cg3bsf(header)) {
+			u_fprintf(ux_stderr, "Error: Stream does not start with magic bytes - cannot read as binary!\n");
+			CG3Quit(1);
+		}
+		uint32_t version = reinterpret_cast<uint32_t*>(&header[4])[0];
+		if (version != CG3_BINARY_STREAM) {
+			u_fprintf(ux_stderr, "Error: Stream is version %u but this reader only knows version %u!\n", version, CG3_BINARY_STREAM);
+			CG3Quit(1);
 		}
-    }
+	}
+
+	index();
+
+	uint32_t resetAfter = ((num_windows + 4) * 2 + 1);
 
-	READ_U32_INTO(cCohort->dep_self);
-	READ_U32_INTO(cCohort->dep_parent);
-	gWindow->relation_map[cCohort->dep_self] = cCohort->global_number;
+	gWindow->window_span = num_windows;
+
+	auto flush = [&](bool flush_after = false) {
+		auto backSWindow = gWindow->back();
+		if (backSWindow) {
+			backSWindow->flush_after = flush_after;
+		}
 
-	if (cCohort->dep_parent != DEP_NO_PARENT) {
-		has_dep = true;
+		while (!gWindow->next.empty()) {
+			gWindow->shuffleWindowsDown();
+			runGrammarOnWindow();
+		}
+
+		gWindow->shuffleWindowsDown();
+		while (!gWindow->previous.empty()) {
+			SingleWindow* tmp = gWindow->previous.front();
+			printSingleWindow(tmp, output);
+			free_swindow(tmp);
+			gWindow->previous.erase(gWindow->previous.begin());
+		}
+
+		return backSWindow;
+	};
+
+	while (!input.eof()) {
+		auto packet = readPacket();
+		if (packet.type == BFP_WINDOW) {
+			//auto cSWindow = static_cast<SingleWindow*>(packet.payload);
+			++numWindows;
+			if (gWindow->next.size() > num_windows) {
+				gWindow->shuffleWindowsDown();
+				runGrammarOnWindow();
+				if (numWindows % resetAfter == 0) {
+					resetIndexes();
+				}
+			}
+		}
+		else if (packet.type == BFP_COMMAND) {
+			auto cmd = static_cast<uint8_t>(reinterpret_cast<uintptr_t>(packet.payload));
+			if (cmd == BFC_FLUSH) {
+				if (!flush(true)) {
+					printStreamCommand(STR_CMD_FLUSH, *ux_stdout);
+				}
+			}
+			else if (cmd == BFC_EXIT) {
+				printStreamCommand(STR_CMD_EXIT, *ux_stdout);
+				return;
+			}
+			else if (cmd == BFC_IGNORE) {
+				printStreamCommand(STR_CMD_IGNORE, *ux_stdout);
+			}
+			else if (cmd == BFC_RESUME) {
+				printStreamCommand(STR_CMD_RESUME, *ux_stdout);
+			}
+		}
+		else if (packet.type == BFP_TEXT) {
+			auto& text = *static_cast<UString*>(packet.payload);
+			printPlainTextLine(text, *ux_stdout);
+		}
 	}
+	flush(false);
+}
 
-	uint16_t rel_count;
-	READ_U16_INTO(rel_count);
-	for (uint16_t rn = 0; rn < rel_count; ++rn) {
-		READ_U16_INTO(tag);
-		uint32_t head;
-		READ_U32_INTO(head);
-		cCohort->relations_input[window_tags[tag]->hash].insert(head);
+#define READ_U16_INTO(dest)                                 \
+	do {                                                    \
+		(dest) = reinterpret_cast<uint16_t*>(&buf[pos])[0]; \
+		pos += 2;                                           \
+	} while (false)
+
+#define READ_U32_INTO(dest)                                 \
+	do {                                                    \
+		(dest) = reinterpret_cast<uint32_t*>(&buf[pos])[0]; \
+		pos += 4;                                           \
+	} while (false)
+
+#define READ_STR_INTO(dest)                                           \
+	do {                                                              \
+		uint16_t tl = reinterpret_cast<uint16_t*>(&buf[pos])[0];      \
+		pos += 2;                                                     \
+		(dest).clear();                                               \
+		(dest).resize(tl, 0);                                         \
+		int32_t olen = 0;                                             \
+		UErrorCode status = U_ZERO_ERROR;                             \
+		u_strFromUTF8(&(dest)[0], tl, &olen, &buf[pos], tl, &status); \
+		(dest).resize(olen);                                          \
+		pos += tl;                                                    \
+	} while (false)
+
+BinaryPacket BinaryApplicator::readPacket() {
+	BinaryPacket packet;
+	readLE(*ux_stdin, packet.type);
+	if (packet.type == BFP_WINDOW) {
+		readWindow(packet.payload);
 	}
-	if (rel_count) {
-		has_relations = true;
-		gWindow->relation_map[cCohort->dep_self] = cCohort->global_number;
-		cCohort->type |= CT_RELATED;
+	else if (packet.type == BFP_COMMAND) {
+		readCommand(packet.payload);
+	}
+	if (packet.type == BFP_TEXT) {
+		readText(packet.payload);
+	}
+	return packet;
+}
+
+void BinaryApplicator::readWindow(void*& payload) {
+	uint32_t cs = 0;
+	readLE(*ux_stdin, cs);
+
+	if (ux_stdin->eof()) {
+		payload = nullptr;
+		return;
+	}
+
+	SingleWindow* cSWindow = gWindow->allocAppendSingleWindow();
+	initEmptySingleWindow(cSWindow);
+
+	std::string buf(cs, 0);
+	ux_stdin->read(&buf[0], cs);
+	uint32_t pos = 0;
+
+	// TODO: flags
+	uint16_t flags;
+	READ_U16_INTO(flags);
+	if (flags & BFW_DEP_SPAN) {
+		dep_has_spanned = true;
+	}
+
+	TagVector window_tags;
+	uint16_t tag_count;
+	READ_U16_INTO(tag_count);
+	window_tags.reserve(tag_count);
+	for (uint16_t i = 0; i < tag_count; ++i) {
+		UString tg;
+		READ_STR_INTO(tg);
+		window_tags.push_back(addTag(tg));
+		if (tg[0] == grammar->mapping_prefix) {
+			window_tags.back()->type |= T_MAPPING;
+		}
+		else {
+			window_tags.back()->type &= ~T_MAPPING;
+		}
+	}
+
+	uint16_t var_count;
+	READ_U16_INTO(var_count);
+	for (uint16_t vn = 0; vn < var_count; ++vn) {
+		char mode = buf[pos];
+		++pos;
+		uint16_t tag1, tag2;
+		READ_U16_INTO(tag1);
+		READ_U16_INTO(tag2);
+		auto hash1 = window_tags[tag1]->hash;
+		if (mode == BFV_SETVAR) {
+			cSWindow->variables_set[hash1] = window_tags[tag2]->hash;
+			cSWindow->variables_rem.erase(hash1);
+			cSWindow->variables_output.insert(hash1);
+		}
+		else if (mode == BFV_SETVAR_ANY) {
+			cSWindow->variables_set[hash1] = grammar->tag_any;
+			cSWindow->variables_rem.erase(hash1);
+			cSWindow->variables_output.insert(hash1);
+		}
+		else if (mode == BFV_REMVAR) {
+			cSWindow->variables_set.erase(hash1);
+			cSWindow->variables_rem.insert(hash1);
+			cSWindow->variables_output.insert(hash1);
+		}
 	}
 
-    READ_STR_INTO(cCohort->text);
-    READ_STR_INTO(cCohort->wblank);
-
-    uint16_t reading_count;
-    READ_U16_INTO(reading_count);
-	if (!reading_count) initEmptyCohort(*cCohort);
-    Reading* prev = nullptr;
-    for (uint16_t rn = 0; rn < reading_count; ++rn) {
-      Reading* cReading = alloc_reading(cCohort);
-      addTagToReading(*cReading, cCohort->wordform);
-
-      READ_U16_INTO(flags);
-
-      READ_U16_INTO(tag);
-	  addTagToReading(*cReading, window_tags[tag]);
-
-      READ_U16_INTO(tag_count);
-	  TagList mappings;
-      for (uint16_t tn = 0; tn < tag_count; ++tn) {
-		  READ_U16_INTO(tag);
-		  if (window_tags[tag]->type & T_MAPPING) {
-			  mappings.push_back(window_tags[tag]);
-		  }
-		  else {
-			  addTagToReading(*cReading, window_tags[tag]);
-		  }
-      }
-	  if (!mappings.empty()) {
-		  splitMappings(mappings, *cCohort, *cReading, true);
-	  }
-
-      if (prev && (flags & BFR_SUBREADING)) {
-		  prev->next = cReading;
-      }
-      else if (flags & BFR_DELETED) {
-		  cCohort->deleted.push_back(cReading);
-      }
-      else {
-		  cCohort->appendReading(cReading);
-      }
-      prev = cReading;
-      ++numReadings;
-    }
-
-	if (cn+1 == cohort_count) {
-		for (auto iter : cCohort->readings) {
-			if (iter->tags.find(endtag) == iter->tags.end()) {
-				addTagToReading(*iter, endtag);
+	READ_STR_INTO(cSWindow->text);
+	READ_STR_INTO(cSWindow->text_post);
+
+	uint16_t cohort_count;
+	READ_U16_INTO(cohort_count);
+	uint16_t tag;
+	for (uint16_t cn = 0; cn < cohort_count; ++cn) {
+		Cohort* cCohort = alloc_cohort(cSWindow);
+		cCohort->global_number = gWindow->cohort_counter++;
+		++numCohorts;
+
+		READ_U16_INTO(flags);
+		if (flags & BFC_RELATED) {
+			cCohort->type |= CT_RELATED;
+			has_relations = true;
+		}
+
+		READ_U16_INTO(tag);
+		cCohort->wordform = window_tags[tag];
+
+		READ_U16_INTO(tag_count);
+		if (tag_count) {
+			cCohort->wread = alloc_reading(cCohort);
+			addTagToReading(*cCohort->wread, cCohort->wordform);
+			for (uint16_t tn = 0; tn < tag_count; ++tn) {
+				READ_U16_INTO(tag);
+				addTagToReading(*cCohort->wread, window_tags[tag], (tn + 1 == tag_count));
+			}
+		}
+
+		READ_U32_INTO(cCohort->dep_self);
+		READ_U32_INTO(cCohort->dep_parent);
+		gWindow->relation_map[cCohort->dep_self] = cCohort->global_number;
+
+		if (cCohort->dep_parent != DEP_NO_PARENT) {
+			has_dep = true;
+		}
+
+		uint16_t rel_count;
+		READ_U16_INTO(rel_count);
+		for (uint16_t rn = 0; rn < rel_count; ++rn) {
+			READ_U16_INTO(tag);
+			uint32_t head;
+			READ_U32_INTO(head);
+			cCohort->relations_input[window_tags[tag]->hash].insert(head);
+		}
+		if (rel_count) {
+			has_relations = true;
+			gWindow->relation_map[cCohort->dep_self] = cCohort->global_number;
+			cCohort->type |= CT_RELATED;
+		}
+
+		READ_STR_INTO(cCohort->text);
+		READ_STR_INTO(cCohort->wblank);
+
+		uint16_t reading_count;
+		READ_U16_INTO(reading_count);
+		if (!reading_count)
+			initEmptyCohort(*cCohort);
+		Reading* prev = nullptr;
+		for (uint16_t rn = 0; rn < reading_count; ++rn) {
+			Reading* cReading = alloc_reading(cCohort);
+			addTagToReading(*cReading, cCohort->wordform);
+
+			READ_U16_INTO(flags);
+
+			READ_U16_INTO(tag);
+			addTagToReading(*cReading, window_tags[tag]);
+
+			READ_U16_INTO(tag_count);
+			TagList mappings;
+			for (uint16_t tn = 0; tn < tag_count; ++tn) {
+				READ_U16_INTO(tag);
+				if (window_tags[tag]->type & T_MAPPING) {
+					mappings.push_back(window_tags[tag]);
+				}
+				else {
+					addTagToReading(*cReading, window_tags[tag]);
+				}
+			}
+			if (!mappings.empty()) {
+				splitMappings(mappings, *cCohort, *cReading, true);
+			}
+
+			if (prev && (flags & BFR_SUBREADING)) {
+				prev->next = cReading;
+			}
+			else if (flags & BFR_DELETED) {
+				cCohort->deleted.push_back(cReading);
+			}
+			else {
+				cCohort->appendReading(cReading);
+			}
+			prev = cReading;
+			++numReadings;
+		}
+
+		if (cn + 1 == cohort_count) {
+			for (auto iter : cCohort->readings) {
+				if (iter->tags.find(endtag) == iter->tags.end()) {
+					addTagToReading(*iter, endtag);
+				}
 			}
 		}
+
+		insert_if_exists(cCohort->possible_sets, grammar->sets_any);
+		cSWindow->appendCohort(cCohort);
 	}
 
-    insert_if_exists(cCohort->possible_sets, grammar->sets_any);
-    cSWindow->appendCohort(cCohort);
-  }
+	payload = cSWindow;
+}
+
+void BinaryApplicator::readCommand(void*& payload) {
+	auto cmd = readLE<uint8_t>(*ux_stdin);
+	payload = reinterpret_cast<void*>(static_cast<uintptr_t>(cmd));
+}
 
-  return cSWindow->flush_after;
+void BinaryApplicator::readText(void*& payload) {
+	readUTF8_LE(*ux_stdin, text);
+	payload = &text;
 }
 
-#define WRITE_U16_INTO(n, buffer) \
-  do { \
-    std::string tmp(2, 0);	       \
-    auto tmp_n = static_cast<uint16_t>(n); \
-    tmp.assign(reinterpret_cast<char*>(&tmp_n), 2);	\
-    (buffer) += tmp; \
-  } while (false)
-
-#define WRITE_U32_INTO(n, buffer) \
-  do { \
-    std::string tmp(4, 0);	       \
-    auto tmp_n = static_cast<uint32_t>(n); \
-    tmp.assign(reinterpret_cast<char*>(&tmp_n), 4);	\
-    (buffer) += tmp; \
-  } while (false)
-
-#define WRITE_TAG_INTO(tag, buffer) \
-  do { \
-    if (tag_index.find((tag)) == tag_index.end()) { \
-      tag_index[(tag)] = UI32(tags_to_write.size()); \
-      tags_to_write.push_back((tag)); \
-    } \
-    WRITE_U16_INTO(tag_index[(tag)], buffer); \
-  } while (false)
-
-#define WRITE_STR_INTO(s, buffer) \
-  do { \
-    std::string tmp((s).size() * 4, 0);		\
-    int32_t olen = 0; \
-    UErrorCode status = U_ZERO_ERROR; \
-    u_strToUTF8(&tmp[0], SI32((s).size() * 4 - 1), &olen, (s).data(), SI32((s).size()), &status); \
-    tmp.resize(olen); \
-    WRITE_U16_INTO(UI16(olen), (buffer)); \
-    (buffer) += tmp; \
-  } while (false)
+#define WRITE_U16_INTO(n, buffer)                       \
+	do {                                                \
+		std::string tmp(2, 0);                          \
+		auto tmp_n = static_cast<uint16_t>(n);          \
+		tmp.assign(reinterpret_cast<char*>(&tmp_n), 2); \
+		(buffer) += tmp;                                \
+	} while (false)
+
+#define WRITE_U32_INTO(n, buffer)                       \
+	do {                                                \
+		std::string tmp(4, 0);                          \
+		auto tmp_n = static_cast<uint32_t>(n);          \
+		tmp.assign(reinterpret_cast<char*>(&tmp_n), 4); \
+		(buffer) += tmp;                                \
+	} while (false)
+
+#define WRITE_TAG_INTO(tag, buffer)                        \
+	do {                                                   \
+		if (tag_index.find((tag)) == tag_index.end()) {    \
+			tag_index[(tag)] = UI32(tags_to_write.size()); \
+			tags_to_write.push_back((tag));                \
+		}                                                  \
+		WRITE_U16_INTO(tag_index[(tag)], buffer);          \
+	} while (false)
+
+#define WRITE_STR_INTO(s, buffer)                                                                     \
+	do {                                                                                              \
+		std::string tmp((s).size() * 4, 0);                                                           \
+		int32_t olen = 0;                                                                             \
+		UErrorCode status = U_ZERO_ERROR;                                                             \
+		u_strToUTF8(&tmp[0], SI32((s).size() * 4 - 1), &olen, (s).data(), SI32((s).size()), &status); \
+		tmp.resize(olen);                                                                             \
+		WRITE_U16_INTO(UI16(olen), (buffer));                                                         \
+		(buffer) += tmp;                                                                              \
+	} while (false)
 
 void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& output, bool profiling) {
-  if (window->number == 1) {
-    output.write("CGBF", 4);
-    std::string version;
-    WRITE_U32_INTO(CG3_BINARY_STREAM, version);
-    output.write(version.data(), 4);
-  }
-
-  TagVector tags_to_write;
-  std::map<Tag*, uint32_t> tag_index;
-
-  uint16_t var_count = 0;
-  std::string var_buffer;
-  for (auto var : window->variables_output) {
-	  ++var_count;
-	  Tag* key = grammar->single_tags[var];
-	  auto iter = window->variables_set.find(var);
-	  if (iter != window->variables_set.end()) {
-		  if (iter->second != grammar->tag_any) {
-			  var_buffer += static_cast<char>(BFV_SETVAR);
-			  WRITE_TAG_INTO(key, var_buffer);
-			  WRITE_TAG_INTO(grammar->single_tags[iter->second], var_buffer);
-		  }
-		  else {
-			  var_buffer += static_cast<char>(BFV_SETVAR_ANY);
-			  WRITE_TAG_INTO(key, var_buffer);
-			  WRITE_U16_INTO(0, var_buffer);
-		  }
-	  }
-	  else {
-		  var_buffer += static_cast<char>(BFV_REMVAR);
-		  WRITE_TAG_INTO(key, var_buffer);
-		  WRITE_U16_INTO(0, var_buffer);
-	  }
-  }
-
-  // Move text belonging to removed cohorts to prior not-removed cohorts, or the containing window
-  for (size_t i = 0; i < window->all_cohorts.size(); ++i) {
-	  auto cohort = window->all_cohorts[i];
-	  if (cohort->local_number == 0 || (cohort->type & CT_REMOVED)) {
-		  if (!cohort->text.empty()) {
-			  for (size_t j = i; j > 0; --j) {
-				  if (window->all_cohorts[j - 1]->local_number == 0 || (window->all_cohorts[j - 1]->type & CT_REMOVED)) {
-					  continue;
-				  }
-				  window->all_cohorts[j-1]->text += cohort->text;
-				  cohort->text.clear();
-			  }
-			  window->text += cohort->text;
-			  cohort->text.clear();
-		  }
-	  }
-  }
-
-  std::string cohort_buffer;
-  uint16_t cohort_count = 0;
-  for (auto& cohort : window->all_cohorts) {
-    if (cohort->local_number == 0 || (cohort->type & CT_REMOVED)) {
-      continue;
-    }
-    ++cohort_count;
-
-    uint16_t flags = 0;
-	if (cohort->type & CT_RELATED) {
-		flags |= BFC_RELATED;
+	if (!header_done) {
+		output.write("CGBF", 4);
+		writeLE(output, CG3_BINARY_STREAM);
+		header_done = true;
 	}
-    WRITE_U16_INTO(flags, cohort_buffer);
-
-    WRITE_TAG_INTO(cohort->wordform, cohort_buffer);
-    if (cohort->wread) {
-		std::string tag_buffer;
-		uint16_t tag_count = 0;
-		for (auto tter : cohort->wread->tags_list) {
-			if (tter == cohort->wordform->hash) {
-				continue;
-			}
-			WRITE_TAG_INTO(grammar->single_tags[tter], tag_buffer);
-			++tag_count;
-		}
-		WRITE_U16_INTO(tag_count, cohort_buffer);
-		cohort_buffer += tag_buffer;
-    }
-    else {
-		WRITE_U16_INTO(0, cohort_buffer);
-    }
-
-    WRITE_U32_INTO(cohort->global_number, cohort_buffer);
-	if (cohort->dep_parent == 0 || cohort->dep_parent == DEP_NO_PARENT) {
-		WRITE_U32_INTO(cohort->dep_parent, cohort_buffer);
-	}
-	else {
-		if (gWindow->cohort_map.find(cohort->dep_parent) != gWindow->cohort_map.end()) {
-			auto pr = gWindow->cohort_map[cohort->dep_parent];
-			if (pr->local_number == 0) {
-				WRITE_U32_INTO(0, cohort_buffer);
+
+	writeLE(output, UI8(BFP_WINDOW));
+
+	TagVector tags_to_write;
+	std::map<Tag*, uint32_t> tag_index;
+
+	uint16_t var_count = 0;
+	std::string var_buffer;
+	for (auto var : window->variables_output) {
+		++var_count;
+		Tag* key = grammar->single_tags[var];
+		auto iter = window->variables_set.find(var);
+		if (iter != window->variables_set.end()) {
+			if (iter->second != grammar->tag_any) {
+				var_buffer += static_cast<char>(BFV_SETVAR);
+				WRITE_TAG_INTO(key, var_buffer);
+				WRITE_TAG_INTO(grammar->single_tags[iter->second], var_buffer);
 			}
 			else {
-				WRITE_U32_INTO(pr->global_number, cohort_buffer);
+				var_buffer += static_cast<char>(BFV_SETVAR_ANY);
+				WRITE_TAG_INTO(key, var_buffer);
+				WRITE_U16_INTO(0, var_buffer);
 			}
 		}
 		else {
-			WRITE_U32_INTO(DEP_NO_PARENT, cohort_buffer);
+			var_buffer += static_cast<char>(BFV_REMVAR);
+			WRITE_TAG_INTO(key, var_buffer);
+			WRITE_U16_INTO(0, var_buffer);
 		}
 	}
 
-	std::string rel_buffer;
-	uint16_t rel_count = 0;
-	for (const auto& miter : cohort->relations) {
-		auto it = grammar->single_tags.find(miter.first);
-		if (it == grammar->single_tags.end()) {
-			it = grammar->single_tags.find(miter.first);
-		}
-		for (auto siter : miter.second) {
-			++rel_count;
-			WRITE_TAG_INTO(it->second, rel_buffer);
-			WRITE_U32_INTO(siter, rel_buffer);
+	// Move text belonging to removed cohorts to prior not-removed cohorts, or the containing window
+	for (size_t i = 0; i < window->all_cohorts.size(); ++i) {
+		auto cohort = window->all_cohorts[i];
+		if (cohort->local_number == 0 || (cohort->type & CT_REMOVED)) {
+			if (!cohort->text.empty()) {
+				for (size_t j = i; j > 0; --j) {
+					if (window->all_cohorts[j - 1]->local_number == 0 || (window->all_cohorts[j - 1]->type & CT_REMOVED)) {
+						continue;
+					}
+					window->all_cohorts[j - 1]->text += cohort->text;
+					cohort->text.clear();
+				}
+				window->text += cohort->text;
+				cohort->text.clear();
+			}
 		}
 	}
-	WRITE_U16_INTO(rel_count, cohort_buffer);
-	cohort_buffer += rel_buffer;
 
-    WRITE_STR_INTO(cohort->text, cohort_buffer);
-    WRITE_STR_INTO(cohort->wblank, cohort_buffer);
-
-    std::string reading_buffer;
-    uint16_t reading_count = 0;
-    std::sort(cohort->readings.begin(), cohort->readings.end(), Reading::cmp_number);
-    for (auto top_reading : cohort->readings) {
-		if (top_reading->noprint) {
+	std::string cohort_buffer;
+	uint16_t cohort_count = 0;
+	for (auto& cohort : window->all_cohorts) {
+		if (cohort->local_number == 0 || (cohort->type & CT_REMOVED)) {
 			continue;
 		}
-		auto reading = top_reading;
-		while (reading) {
-			++reading_count;
-			uint16_t flags = 0;
-			if (reading != top_reading) {
-				flags |= BFR_SUBREADING;
-			}
-			WRITE_U16_INTO(flags, reading_buffer);
-			WRITE_TAG_INTO(grammar->single_tags[reading->baseform], reading_buffer);
+		cohort->unignoreAll();
+		++cohort_count;
+
+		uint16_t flags = 0;
+		if (cohort->type & CT_RELATED) {
+			flags |= BFC_RELATED;
+		}
+		WRITE_U16_INTO(flags, cohort_buffer);
+
+		WRITE_TAG_INTO(cohort->wordform, cohort_buffer);
+		if (cohort->wread) {
 			std::string tag_buffer;
 			uint16_t tag_count = 0;
-			uint32SortedVector unique;
-			for (auto& tter : reading->tags_list) {
-				auto tag = grammar->single_tags[tter];
-				if (tter == reading->baseform || tter == reading->parent->wordform->hash) {
+			for (auto tter : cohort->wread->tags_list) {
+				if (tter == cohort->wordform->hash) {
 					continue;
 				}
-				if (tag->type & (T_DEPENDENCY | T_RELATION)) {
-					continue;
+				WRITE_TAG_INTO(grammar->single_tags[tter], tag_buffer);
+				++tag_count;
+			}
+			WRITE_U16_INTO(tag_count, cohort_buffer);
+			cohort_buffer += tag_buffer;
+		}
+		else {
+			WRITE_U16_INTO(0, cohort_buffer);
+		}
+
+		WRITE_U32_INTO(cohort->global_number, cohort_buffer);
+		if (cohort->dep_parent == 0 || cohort->dep_parent == DEP_NO_PARENT) {
+			WRITE_U32_INTO(cohort->dep_parent, cohort_buffer);
+		}
+		else {
+			if (gWindow->cohort_map.find(cohort->dep_parent) != gWindow->cohort_map.end()) {
+				auto pr = gWindow->cohort_map[cohort->dep_parent];
+				if (pr->local_number == 0) {
+					WRITE_U32_INTO(0, cohort_buffer);
+				}
+				else {
+					WRITE_U32_INTO(pr->global_number, cohort_buffer);
+				}
+			}
+			else {
+				WRITE_U32_INTO(DEP_NO_PARENT, cohort_buffer);
+			}
+		}
+
+		std::string rel_buffer;
+		uint16_t rel_count = 0;
+		for (const auto& miter : cohort->relations) {
+			auto it = grammar->single_tags.find(miter.first);
+			if (it == grammar->single_tags.end()) {
+				it = grammar->single_tags.find(miter.first);
+			}
+			for (auto siter : miter.second) {
+				++rel_count;
+				WRITE_TAG_INTO(it->second, rel_buffer);
+				WRITE_U32_INTO(siter, rel_buffer);
+			}
+		}
+		WRITE_U16_INTO(rel_count, cohort_buffer);
+		cohort_buffer += rel_buffer;
+
+		WRITE_STR_INTO(cohort->text, cohort_buffer);
+		WRITE_STR_INTO(cohort->wblank, cohort_buffer);
+
+		std::string reading_buffer;
+		uint16_t reading_count = 0;
+		std::sort(cohort->readings.begin(), cohort->readings.end(), Reading::cmp_number);
+		for (auto top_reading : cohort->readings) {
+			if (top_reading->noprint) {
+				continue;
+			}
+			auto reading = top_reading;
+			while (reading) {
+				++reading_count;
+				uint16_t flags = 0;
+				if (reading != top_reading) {
+					flags |= BFR_SUBREADING;
 				}
-				if (unique_tags) {
-					if (unique.find(tter) != unique.end()) {
+				WRITE_U16_INTO(flags, reading_buffer);
+				WRITE_TAG_INTO(grammar->single_tags[reading->baseform], reading_buffer);
+				std::string tag_buffer;
+				uint16_t tag_count = 0;
+				uint32SortedVector unique;
+				for (auto& tter : reading->tags_list) {
+					auto tag = grammar->single_tags[tter];
+					if (tter == reading->baseform || tter == reading->parent->wordform->hash) {
+						continue;
+					}
+					if (tag->type & (T_DEPENDENCY | T_RELATION)) {
 						continue;
 					}
-					unique.insert(tter);
+					if (unique_tags) {
+						if (unique.find(tter) != unique.end()) {
+							continue;
+						}
+						unique.insert(tter);
+					}
+					WRITE_TAG_INTO(tag, tag_buffer);
+					++tag_count;
 				}
-				WRITE_TAG_INTO(tag, tag_buffer);
-				++tag_count;
+				WRITE_U16_INTO(tag_count, reading_buffer);
+				reading_buffer += tag_buffer;
+				reading = reading->next;
 			}
-			WRITE_U16_INTO(tag_count, reading_buffer);
-			reading_buffer += tag_buffer;
-			reading = reading->next;
-		}
-    }
-    WRITE_U16_INTO(reading_count, cohort_buffer);
-    cohort_buffer += reading_buffer;
-  }
-
-  std::string header_buffer;
-
-  uint16_t flags = 0;
-  if (window->flush_after) {
-    flags |= BFW_FLUSH;
-  }
-  if (dep_has_spanned) {
-	  flags |= BFW_DEP_SPAN;
-  }
-  WRITE_U16_INTO(flags, header_buffer);
-
-  WRITE_U16_INTO(tags_to_write.size(), header_buffer);
-  for (auto& tag : tags_to_write) {
-    WRITE_STR_INTO(tag->tag, header_buffer);
-  }
-
-  WRITE_U16_INTO(var_count, header_buffer);
-  header_buffer += var_buffer;
-
-  WRITE_STR_INTO(window->text, header_buffer);
-  WRITE_STR_INTO(window->text_post, header_buffer);
-
-  WRITE_U16_INTO(cohort_count, header_buffer);
-
-  auto total_size = UI32(header_buffer.size() + cohort_buffer.size());
-  writeRaw(output, total_size);
-  output.write(header_buffer.data(), header_buffer.size());
-  output.write(cohort_buffer.data(), cohort_buffer.size());
-  output.flush();
+		}
+		WRITE_U16_INTO(reading_count, cohort_buffer);
+		cohort_buffer += reading_buffer;
+	}
+
+	std::string header_buffer;
+
+	uint16_t flags = 0;
+	if (dep_has_spanned) {
+		flags |= BFW_DEP_SPAN;
+	}
+	WRITE_U16_INTO(flags, header_buffer);
+
+	WRITE_U16_INTO(tags_to_write.size(), header_buffer);
+	for (auto& tag : tags_to_write) {
+		WRITE_STR_INTO(tag->tag, header_buffer);
+	}
+
+	WRITE_U16_INTO(var_count, header_buffer);
+	header_buffer += var_buffer;
+
+	WRITE_STR_INTO(window->text, header_buffer);
+	WRITE_STR_INTO(window->text_post, header_buffer);
+
+	WRITE_U16_INTO(cohort_count, header_buffer);
+
+	auto total_size = UI32(header_buffer.size() + cohort_buffer.size());
+	writeLE(output, total_size);
+	output.write(header_buffer.data(), header_buffer.size());
+	output.write(cohort_buffer.data(), cohort_buffer.size());
+
+	if (window->flush_after) {
+		printStreamCommand(STR_CMD_FLUSH, output);
+	}
+
+	output.flush();
+}
+
+void BinaryApplicator::printStreamCommand(UStringView cmd, std::ostream& output) {
+	if (!header_done) {
+		output.write("CGBF", 4);
+		writeLE(output, CG3_BINARY_STREAM);
+		header_done = true;
+	}
+
+	writeLE(output, UI8(BFP_COMMAND));
+	if (cmd == STR_CMD_FLUSH) {
+		writeLE(output, UI8(BFC_FLUSH));
+	}
+	else if (cmd == STR_CMD_EXIT) {
+		writeLE(output, UI8(BFC_EXIT));
+	}
+	else if (cmd == STR_CMD_IGNORE) {
+		writeLE(output, UI8(BFC_IGNORE));
+	}
+	else if (cmd == STR_CMD_RESUME) {
+		writeLE(output, UI8(BFC_RESUME));
+	}
 }
+
+void BinaryApplicator::printPlainTextLine(UStringView line, std::ostream& output) {
+	if (!header_done) {
+		output.write("CGBF", 4);
+		writeLE(output, CG3_BINARY_STREAM);
+		header_done = true;
+	}
+
+	writeLE(output, UI8(BFP_TEXT));
+	writeUTF8_LE(output, line);
+}
+
 }
diff --git a/src/BinaryApplicator.hpp b/src/BinaryApplicator.hpp
index edcace99..e6cb3f7a 100644
--- a/src/BinaryApplicator.hpp
+++ b/src/BinaryApplicator.hpp
@@ -18,8 +18,8 @@
 */
 
 #pragma once
-#ifndef GRAMMARAPPLICATORBINARY_H
-#define GRAMMARAPPLICATORBINARY_H
+#ifndef c6d28b7452ec699b_GRAMMARAPPLICATORBINARY_H
+#define c6d28b7452ec699b_GRAMMARAPPLICATORBINARY_H
 
 #include "GrammarApplicator.hpp"
 
@@ -27,8 +27,7 @@ namespace CG3 {
 
 enum BinaryFormatFlags {
 	// Window
-	BFW_FLUSH         = (1 << 0),
-	BFW_DEP_SPAN      = (1 << 1),
+	BFW_DEP_SPAN      = (1 << 0),
 	// Cohort
 	BFC_RELATED       = (1 << 0),
 	// Reading
@@ -40,17 +39,43 @@ enum BinaryFormatFlags {
 	BFV_REMVAR        = 3,
 };
 
+enum BinaryPacketType : uint8_t {
+	BFP_INVALID       = 0,
+	BFP_WINDOW        = 1,
+	BFP_COMMAND       = 2,
+	BFP_TEXT          = 3,
+};
+
+enum BinaryCommandType : uint8_t {
+	BFC_FLUSH         = 1,
+	BFC_EXIT          = 2,
+	BFC_IGNORE        = 3,
+	BFC_RESUME        = 4,
+};
+
+struct BinaryPacket {
+	BinaryPacketType type = BFP_INVALID;
+	void* payload = nullptr;
+};
+
 class BinaryApplicator : public virtual GrammarApplicator {
 public:
-  BinaryApplicator(std::ostream& ux_err);
+	BinaryApplicator(std::ostream& ux_err);
 
-  void runGrammarOnText(std::istream& input, std::ostream& output);
+	void runGrammarOnText(std::istream& input, std::ostream& output);
 
 protected:
-  void printSingleWindow(SingleWindow* window, std::ostream& output, bool profiling = false) override;
+	void printSingleWindow(SingleWindow* window, std::ostream& output, bool profiling = false) override;
+	void printStreamCommand(UStringView cmd, std::ostream& output) override;
+	void printPlainTextLine(UStringView line, std::ostream& output) override;
 
 private:
-	bool readWindow();
+	bool header_done = false;
+	UString text;
+	BinaryPacket readPacket();
+	void readWindow(void*& payload);
+	void readCommand(void*& payload);
+	void readText(void*& payload);
 };
 }
 
diff --git a/src/FormatConverter.cpp b/src/FormatConverter.cpp
index bd0e72d2..a38094e1 100644
--- a/src/FormatConverter.cpp
+++ b/src/FormatConverter.cpp
@@ -244,8 +244,10 @@ void FormatConverter::printStreamCommand(UStringView cmd, std::ostream& output)
 		JsonlApplicator::printStreamCommand(cmd, output);
 		break;
 	}
-	case CG3SF_BINARY:
+	case CG3SF_BINARY: {
+		BinaryApplicator::printStreamCommand(cmd, output);
 		break;
+	}
 	case CG3SF_CG:
 	case CG3SF_APERTIUM:
 	case CG3SF_FST:
@@ -264,8 +266,10 @@ void FormatConverter::printPlainTextLine(UStringView line, std::ostream& output)
 		JsonlApplicator::printPlainTextLine(line, output);
 		break;
 	}
-	case CG3SF_BINARY:
+	case CG3SF_BINARY: {
+		BinaryApplicator::printPlainTextLine(line, output);
 		break;
+	}
 	case CG3SF_CG:
 	case CG3SF_APERTIUM:
 	case CG3SF_FST:
diff --git a/src/GrammarApplicator.cpp b/src/GrammarApplicator.cpp
index 1628956f..d9e84e1f 100644
--- a/src/GrammarApplicator.cpp
+++ b/src/GrammarApplicator.cpp
@@ -581,7 +581,7 @@ void GrammarApplicator::printSingleWindow(SingleWindow* window, std::ostream& ou
 	}
 
 	if (window->flush_after) {
-		printStreamCommand(UString(STR_CMD_FLUSH), output);
+		printStreamCommand(STR_CMD_FLUSH, output);
 	}
 	u_fflush(output);
 }
@@ -604,7 +604,7 @@ void GrammarApplicator::pipeOutReading(const Reading* reading, std::ostream& out
 	writeRaw(ss, flags);
 
 	if (reading->baseform) {
-		writeUTF8String(ss, grammar->single_tags.find(reading->baseform)->second->tag);
+		writeUTF8_Raw(ss, grammar->single_tags.find(reading->baseform)->second->tag);
 	}
 
 	uint32_t cs = 0;
@@ -628,7 +628,7 @@ void GrammarApplicator::pipeOutReading(const Reading* reading, std::ostream& out
 		if (tag->type & T_DEPENDENCY && has_dep) {
 			continue;
 		}
-		writeUTF8String(ss, tag->tag);
+		writeUTF8_Raw(ss, tag->tag);
 	}
 
 	const auto& str = ss.str();
@@ -655,7 +655,7 @@ void GrammarApplicator::pipeOutCohort(const Cohort* cohort, std::ostream& output
 		writeRaw(ss, cohort->dep_parent);
 	}
 
-	writeUTF8String(ss, cohort->wordform->tag);
+	writeUTF8_Raw(ss, cohort->wordform->tag);
 
 	uint32_t cs = UI32(cohort->readings.size());
 	writeRaw(ss, cs);
@@ -663,7 +663,7 @@ void GrammarApplicator::pipeOutCohort(const Cohort* cohort, std::ostream& output
 		pipeOutReading(rter1, ss);
 	}
 	if (!cohort->text.empty()) {
-		writeUTF8String(ss, cohort->text);
+		writeUTF8_Raw(ss, cohort->text);
 	}
 
 	const auto& str = ss.str();
@@ -718,7 +718,7 @@ void GrammarApplicator::pipeInReading(Reading* reading, Process& input, bool for
 	reading->deleted = (flags & (1 << 2)) != 0;
 
 	if (flags & (1 << 3)) {
-		UString str = readUTF8String(ss);
+		UString str = readUTF8_Raw(ss);
 		if (str != grammar->single_tags.find(reading->baseform)->second->tag) {
 			Tag* tag = addTag(str);
 			reading->baseform = tag->hash;
@@ -743,7 +743,7 @@ void GrammarApplicator::pipeInReading(Reading* reading, Process& input, bool for
 	}
 
 	for (size_t i = 0; i < cs; ++i) {
-		UString str = readUTF8String(ss);
+		UString str = readUTF8_Raw(ss);
 		Tag* tag = addTag(str);
 		reading->tags_list.push_back(tag->hash);
 		if (debug_level > 1) {
@@ -784,7 +784,7 @@ void GrammarApplicator::pipeInCohort(Cohort* cohort, Process& input) {
 	}
 
 	bool force_readings = false;
-	UString str = readUTF8String(input);
+	UString str = readUTF8_Raw(input);
 	if (str != cohort->wordform->tag) {
 		Tag* tag = addTag(str);
 		cohort->wordform = tag;
@@ -803,7 +803,7 @@ void GrammarApplicator::pipeInCohort(Cohort* cohort, Process& input) {
 	}
 
 	if (flags & (1 << 0)) {
-		cohort->text = readUTF8String(input);
+		cohort->text = readUTF8_Raw(input);
 		if (debug_level > 1) {
 			u_fprintf(ux_stderr, "DEBUG: cohort text %S\n", cohort->text.data());
 		}
diff --git a/src/GrammarApplicator_runGrammar.cpp b/src/GrammarApplicator_runGrammar.cpp
index 3171fb07..f23650fb 100644
--- a/src/GrammarApplicator_runGrammar.cpp
+++ b/src/GrammarApplicator_runGrammar.cpp
@@ -154,11 +154,14 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 		variables_output.clear();
 	};
 
-	if (fmt_output == CG3SF_BINARY) {
-		cSWindow = gWindow->allocAppendSingleWindow();
-		initEmptySingleWindow(cSWindow);
-		lSWindow = cSWindow;
-	}
+	auto binary_maybe_window = [&]() {
+		if (fmt_output == CG3SF_BINARY) {
+			cSWindow = gWindow->allocAppendSingleWindow();
+			initEmptySingleWindow(cSWindow);
+			lSWindow = cSWindow;
+		}
+	};
+	binary_maybe_window();
 
 	while (!input.eof()) {
 		++lines;
@@ -502,7 +505,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 					}
 
 					if (!backSWindow) {
-						printStreamCommand(UString(STR_CMD_FLUSH), output);
+						printStreamCommand(STR_CMD_FLUSH, output);
 					}
 					line[0] = 0;
 					variables.clear();
@@ -517,7 +520,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 					}
 					is_cmd = true;
 					ignoreinput = true;
-					printStreamCommand(UString(STR_CMD_IGNORE), output);
+					printStreamCommand(STR_CMD_IGNORE, output);
 					line[0] = 0;
 				}
 				else if (&cleaned[0] == STR_CMD_RESUME) {
@@ -526,7 +529,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 					}
 					is_cmd = true;
 					ignoreinput = false;
-					printStreamCommand(UString(STR_CMD_RESUME), output);
+					printStreamCommand(STR_CMD_RESUME, output);
 					line[0] = 0;
 				}
 				else if (&cleaned[0] == STR_CMD_EXIT) {
@@ -534,7 +537,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 						u_fprintf(ux_stderr, "Info: EXIT encountered on line %u. Exiting...\n", numLines);
 					}
 					is_cmd = true;
-					printStreamCommand(UString(STR_CMD_EXIT), output);
+					printStreamCommand(STR_CMD_EXIT, output);
 					goto CGCMD_EXIT;
 				}
 				else if (u_strncmp(&cleaned[0], STR_CMD_SETVAR.data(), SI32(STR_CMD_SETVAR.size())) == 0) {
@@ -691,8 +694,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 		cSWindow = nullptr;
 	}
 	if (fmt_output == CG3SF_BINARY && !variables_output.empty()) {
-		cSWindow = gWindow->allocAppendSingleWindow();
-		initEmptySingleWindow(cSWindow);
+		binary_maybe_window();
 		adopt_variables();
 	}
 	while (!gWindow->next.empty()) {
diff --git a/src/JsonlApplicator.cpp b/src/JsonlApplicator.cpp
index 6d291aea..5f139bf2 100644
--- a/src/JsonlApplicator.cpp
+++ b/src/JsonlApplicator.cpp
@@ -778,7 +778,7 @@ void JsonlApplicator::printSingleWindow(SingleWindow* window, std::ostream& outp
 
 	// Print flush command if needed
 	if (window->flush_after) {
-		printStreamCommand(UString(STR_CMD_FLUSH), output);
+		printStreamCommand(STR_CMD_FLUSH, output);
 	}
 }
 
diff --git a/src/inlines.hpp b/src/inlines.hpp
index 48c53f91..a693e90f 100644
--- a/src/inlines.hpp
+++ b/src/inlines.hpp
@@ -497,7 +497,58 @@ inline void readRaw(S& stream, T& value) {
 	stream.read(reinterpret_cast<char*>(&value), sizeof(T));
 }
 
-inline void writeUTF8String(std::ostream& output, const UChar* str, size_t len = 0) {
+template<typename T>
+inline void writeBE(std::ostream& stream, T value) {
+	value = be::native_to_big(value);
+	writeRaw(stream, value);
+}
+
+template<>
+inline void writeBE(std::ostream& stream, double value) {
+	int exp = 0;
+	auto mant64 = UI64(SI64(DBL(std::numeric_limits<int64_t>::max()) * frexp(value, &exp)));
+	auto exp32 = UI32(exp);
+	writeBE(stream, mant64);
+	writeBE(stream, exp32);
+}
+
+template<typename S, typename T>
+inline void writeLE(S& stream, T value) {
+	value = be::native_to_little(value);
+	writeRaw(stream, value);
+}
+
+template<typename T>
+inline T readBE(std::istream& stream) {
+	T value;
+	readRaw(stream, value);
+	return be::big_to_native(value);
+}
+
+template<>
+inline double readBE(std::istream& stream) {
+	auto mant64 = readBE<uint64_t>(stream);
+	auto exp = static_cast<int>(readBE<int32_t>(stream));
+
+	auto value = DBL(SI64(mant64)) / DBL(std::numeric_limits<int64_t>::max());
+
+	return ldexp(value, exp);
+}
+
+template<typename S, typename T>
+inline void readLE(S& stream, T& value) {
+	readRaw(stream, value);
+	be::little_to_native_inplace(value);
+}
+
+template<typename T>
+inline T readLE(std::istream& stream) {
+	T value;
+	readRaw(stream, value);
+	return be::little_to_native(value);
+}
+
+inline void writeUTF8_Raw(std::ostream& output, const UChar* str, size_t len = 0) {
 	if (len == 0) {
 		len = u_strlen(str);
 	}
@@ -512,12 +563,35 @@ inline void writeUTF8String(std::ostream& output, const UChar* str, size_t len =
 	output.write(&buffer[0], cs);
 }
 
-inline void writeUTF8String(std::ostream& output, const UString& str) {
-	writeUTF8String(output, str.data(), str.size());
+inline void writeUTF8_Raw(std::ostream& output, const UString& str) {
+	writeUTF8_Raw(output, str.data(), str.size());
+}
+
+inline void writeUTF8_LE(std::ostream& output, const UChar* str, size_t len = 0) {
+	if (len == 0) {
+		len = u_strlen(str);
+	}
+
+	std::vector<char> buffer(len * 4);
+	int32_t olen = 0;
+	UErrorCode status = U_ZERO_ERROR;
+	u_strToUTF8(&buffer[0], SI32(len * 4 - 1), &olen, str, SI32(len), &status);
+
+	auto cs = UI16(olen);
+	writeLE(output, cs);
+	output.write(&buffer[0], cs);
+}
+
+inline void writeUTF8_LE(std::ostream& output, const UString& str) {
+	writeUTF8_LE(output, str.data(), str.size());
+}
+
+inline void writeUTF8_LE(std::ostream& output, const UStringView& str) {
+	writeUTF8_LE(output, str.data(), str.size());
 }
 
 template<typename S>
-inline UString readUTF8String(S& input) {
+inline UString readUTF8_Raw(S& input) {
 	uint16_t len = 0;
 	readRaw(input, len);
 
@@ -534,54 +608,30 @@ inline UString readUTF8String(S& input) {
 	return rv;
 }
 
-#ifdef _MSC_VER
-	// warning C4127: conditional expression is constant
-	#pragma warning (disable: 4127)
-#endif
+template<typename S, typename Str>
+inline void readUTF8_LE(S& input, Str& rv) {
+	uint16_t len = 0;
+	readLE(input, len);
 
-template<typename T>
-inline void writeBE(std::ostream& stream, T value) {
-	value = be::native_to_big(value);
-	stream.write(reinterpret_cast<const char*>(&value), sizeof(value));
-	if (!stream) {
-		throw std::runtime_error("Stream was in bad state in writeBE()");
-	}
-}
+	rv.clear();
+	rv.resize(len);
+	std::vector<char> buffer(len);
+	input.read(&buffer[0], len);
 
-template<>
-inline void writeBE(std::ostream& stream, double value) {
-	int exp = 0;
-	auto mant64 = UI64(SI64(DBL(std::numeric_limits<int64_t>::max()) * frexp(value, &exp)));
-	auto exp32 = UI32(exp);
-	writeBE(stream, mant64);
-	writeBE(stream, exp32);
-}
+	int32_t olen = 0;
+	UErrorCode status = U_ZERO_ERROR;
+	u_strFromUTF8(&rv[0], len, &olen, &buffer[0], len, &status);
 
-template<typename T>
-inline T readBE(std::istream& stream) {
-	if (!stream) {
-		throw std::runtime_error("Stream was in bad state in readBE()");
-	}
-	T tmp;
-	stream.read(reinterpret_cast<char*>(&tmp), sizeof(tmp));
-	return be::big_to_native(tmp);
+	rv.resize(olen);
 }
 
-template<>
-inline double readBE(std::istream& stream) {
-	auto mant64 = readBE<uint64_t>(stream);
-	auto exp = static_cast<int>(readBE<int32_t>(stream));
-
-	auto value = DBL(SI64(mant64)) / DBL(std::numeric_limits<int64_t>::max());
-
-	return ldexp(value, exp);
+template<typename S>
+inline UString readUTF8_LE(S& input) {
+	UString rv;
+	readUTF8_LE(input, rv);
+	return rv;
 }
 
-#ifdef _MSC_VER
-	// warning C4127: conditional expression is constant
-	#pragma warning (default: 4127)
-#endif
-
 template<typename Cont>
 inline void GAppSetOpts_ranged(const char* value, Cont& cont, bool fill = true) {
 	cont.clear();
diff --git a/test/runall.pl b/test/runall.pl
index e9c4b157..a14336d6 100755
--- a/test/runall.pl
+++ b/test/runall.pl
@@ -90,7 +90,6 @@ sub run_pl {
 	`echo "Include Static grammar.cg3 ;" > grammar.bsf.cg3`;
 	`cat input.txt | "$binary" $args --in-cg --out-binary -g grammar.bsf.cg3 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$binary" $args --in-binary --out-cg -g grammar.bsf.cg3 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m | "$bindir/../scripts/cg-stabilize-relations" >output.bsf.txt`;
 	`cat expected.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m | "$bindir/../scripts/cg-stabilize-relations" > expected.bsf.txt`;
-	`echo '<STREAMCMD:FLUSH>' >> expected.bsf.txt`;
 	`diff -B expected.bsf.txt output.bsf.txt >diff.bsf.txt`;
 
 	if (-s "diff.bsf.txt") {

From 4acac69ee1286a87d71f0c3d3e312a48053c7d9e Mon Sep 17 00:00:00 2001
From: Tino Didriksen <mail@tinodidriksen.com>
Date: Fri, 29 Aug 2025 17:42:50 +0200
Subject: [PATCH 36/42] Ensure binary stream is little endian (still 67/69)

---
 src/BinaryApplicator.cpp | 163 ++++++++++++++++++++-------------------
 1 file changed, 82 insertions(+), 81 deletions(-)

diff --git a/src/BinaryApplicator.cpp b/src/BinaryApplicator.cpp
index ac0f19ed..db558e4d 100644
--- a/src/BinaryApplicator.cpp
+++ b/src/BinaryApplicator.cpp
@@ -134,31 +134,6 @@ void BinaryApplicator::runGrammarOnText(std::istream& input, std::ostream& outpu
 	flush(false);
 }
 
-#define READ_U16_INTO(dest)                                 \
-	do {                                                    \
-		(dest) = reinterpret_cast<uint16_t*>(&buf[pos])[0]; \
-		pos += 2;                                           \
-	} while (false)
-
-#define READ_U32_INTO(dest)                                 \
-	do {                                                    \
-		(dest) = reinterpret_cast<uint32_t*>(&buf[pos])[0]; \
-		pos += 4;                                           \
-	} while (false)
-
-#define READ_STR_INTO(dest)                                           \
-	do {                                                              \
-		uint16_t tl = reinterpret_cast<uint16_t*>(&buf[pos])[0];      \
-		pos += 2;                                                     \
-		(dest).clear();                                               \
-		(dest).resize(tl, 0);                                         \
-		int32_t olen = 0;                                             \
-		UErrorCode status = U_ZERO_ERROR;                             \
-		u_strFromUTF8(&(dest)[0], tl, &olen, &buf[pos], tl, &status); \
-		(dest).resize(olen);                                          \
-		pos += tl;                                                    \
-	} while (false)
-
 BinaryPacket BinaryApplicator::readPacket() {
 	BinaryPacket packet;
 	readLE(*ux_stdin, packet.type);
@@ -190,16 +165,49 @@ void BinaryApplicator::readWindow(void*& payload) {
 	ux_stdin->read(&buf[0], cs);
 	uint32_t pos = 0;
 
+	auto READ_U16_INTO = [&](uint16_t& dest) {
+		dest = *reinterpret_cast<uint16_t*>(&buf[pos]);
+		be::little_to_native_inplace(dest);
+		pos += sizeof(dest);
+	};
+
+	auto READ_U16 = [&]() {
+		uint16_t dest;
+		READ_U16_INTO(dest);
+		return dest;
+	};
+
+	auto READ_U32_INTO = [&](uint32_t& dest) {
+		dest = *reinterpret_cast<uint32_t*>(&buf[pos]);
+		be::little_to_native_inplace(dest);
+		pos += sizeof(dest);
+	};
+
+	auto READ_U32 = [&]() {
+		uint32_t dest;
+		READ_U32_INTO(dest);
+		return dest;
+	};
+
+	auto READ_STR_INTO = [&](UString& dest) {
+		auto tl = READ_U16();
+		dest.clear();
+		dest.resize(tl);
+		int32_t olen = 0;
+		UErrorCode status = U_ZERO_ERROR;
+		u_strFromUTF8(&(dest)[0], tl, &olen, &buf[pos], tl, &status);
+		dest.resize(olen);
+		pos += tl;
+	};
+
 	// TODO: flags
-	uint16_t flags;
-	READ_U16_INTO(flags);
+	auto flags = READ_U16();
 	if (flags & BFW_DEP_SPAN) {
 		dep_has_spanned = true;
 	}
 
 	TagVector window_tags;
-	uint16_t tag_count;
-	READ_U16_INTO(tag_count);
+	auto tag_count = READ_U16();
 	window_tags.reserve(tag_count);
 	for (uint16_t i = 0; i < tag_count; ++i) {
 		UString tg;
@@ -213,14 +221,12 @@ void BinaryApplicator::readWindow(void*& payload) {
 		}
 	}
 
-	uint16_t var_count;
-	READ_U16_INTO(var_count);
+	auto var_count = READ_U16();
 	for (uint16_t vn = 0; vn < var_count; ++vn) {
 		char mode = buf[pos];
 		++pos;
-		uint16_t tag1, tag2;
-		READ_U16_INTO(tag1);
-		READ_U16_INTO(tag2);
+		auto tag1 = READ_U16();
+		auto tag2 = READ_U16();
 		auto hash1 = window_tags[tag1]->hash;
 		if (mode == BFV_SETVAR) {
 			cSWindow->variables_set[hash1] = window_tags[tag2]->hash;
@@ -242,8 +248,7 @@ void BinaryApplicator::readWindow(void*& payload) {
 	READ_STR_INTO(cSWindow->text);
 	READ_STR_INTO(cSWindow->text_post);
 
-	uint16_t cohort_count;
-	READ_U16_INTO(cohort_count);
+	auto cohort_count = READ_U16();
 	uint16_t tag;
 	for (uint16_t cn = 0; cn < cohort_count; ++cn) {
 		Cohort* cCohort = alloc_cohort(cSWindow);
@@ -277,12 +282,10 @@ void BinaryApplicator::readWindow(void*& payload) {
 			has_dep = true;
 		}
 
-		uint16_t rel_count;
-		READ_U16_INTO(rel_count);
+		auto rel_count = READ_U16();
 		for (uint16_t rn = 0; rn < rel_count; ++rn) {
 			READ_U16_INTO(tag);
-			uint32_t head;
-			READ_U32_INTO(head);
+			auto head = READ_U32();
 			cCohort->relations_input[window_tags[tag]->hash].insert(head);
 		}
 		if (rel_count) {
@@ -294,10 +297,10 @@ void BinaryApplicator::readWindow(void*& payload) {
 		READ_STR_INTO(cCohort->text);
 		READ_STR_INTO(cCohort->wblank);
 
-		uint16_t reading_count;
-		READ_U16_INTO(reading_count);
-		if (!reading_count)
+		auto reading_count = READ_U16();
+		if (!reading_count) {
 			initEmptyCohort(*cCohort);
+		}
 		Reading* prev = nullptr;
 		for (uint16_t rn = 0; rn < reading_count; ++rn) {
 			Reading* cReading = alloc_reading(cCohort);
@@ -361,42 +364,6 @@ void BinaryApplicator::readText(void*& payload) {
 	payload = &text;
 }
 
-#define WRITE_U16_INTO(n, buffer)                       \
-	do {                                                \
-		std::string tmp(2, 0);                          \
-		auto tmp_n = static_cast<uint16_t>(n);          \
-		tmp.assign(reinterpret_cast<char*>(&tmp_n), 2); \
-		(buffer) += tmp;                                \
-	} while (false)
-
-#define WRITE_U32_INTO(n, buffer)                       \
-	do {                                                \
-		std::string tmp(4, 0);                          \
-		auto tmp_n = static_cast<uint32_t>(n);          \
-		tmp.assign(reinterpret_cast<char*>(&tmp_n), 4); \
-		(buffer) += tmp;                                \
-	} while (false)
-
-#define WRITE_TAG_INTO(tag, buffer)                        \
-	do {                                                   \
-		if (tag_index.find((tag)) == tag_index.end()) {    \
-			tag_index[(tag)] = UI32(tags_to_write.size()); \
-			tags_to_write.push_back((tag));                \
-		}                                                  \
-		WRITE_U16_INTO(tag_index[(tag)], buffer);          \
-	} while (false)
-
-#define WRITE_STR_INTO(s, buffer)                                                                     \
-	do {                                                                                              \
-		std::string tmp((s).size() * 4, 0);                                                           \
-		int32_t olen = 0;                                                                             \
-		UErrorCode status = U_ZERO_ERROR;                                                             \
-		u_strToUTF8(&tmp[0], SI32((s).size() * 4 - 1), &olen, (s).data(), SI32((s).size()), &status); \
-		tmp.resize(olen);                                                                             \
-		WRITE_U16_INTO(UI16(olen), (buffer));                                                         \
-		(buffer) += tmp;                                                                              \
-	} while (false)
-
 void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& output, bool profiling) {
 	if (!header_done) {
 		output.write("CGBF", 4);
@@ -407,7 +374,41 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 	writeLE(output, UI8(BFP_WINDOW));
 
 	TagVector tags_to_write;
-	std::map<Tag*, uint32_t> tag_index;
+	std::map<Tag*, uint16_t> tag_index;
+
+	auto WRITE_U16_INTO = [&](uint16_t n, std::string& buffer) {
+		be::native_to_little_inplace(n);
+		auto chr = reinterpret_cast<char*>(&n);
+		buffer += chr[0];
+		buffer += chr[1];
+	};
+
+	auto WRITE_U32_INTO = [&](uint32_t n, std::string& buffer) {
+		be::native_to_little_inplace(n);
+		auto chr = reinterpret_cast<char*>(&n);
+		buffer += chr[0];
+		buffer += chr[1];
+		buffer += chr[2];
+		buffer += chr[3];
+	};
+
+	auto WRITE_TAG_INTO = [&](Tag* tag, std::string& buffer) {
+		if (tag_index.find(tag) == tag_index.end()) {
+			tag_index[tag] = UI16(tags_to_write.size());
+			tags_to_write.push_back(tag);
+		}
+		WRITE_U16_INTO(tag_index[tag], buffer);
+	};
+
+	auto WRITE_STR_INTO = [&](const UString& s, std::string& buffer) {
+		std::string tmp(s.size() * 4, 0);
+		int32_t olen = 0;
+		UErrorCode status = U_ZERO_ERROR;
+		u_strToUTF8(&tmp[0], SI32(s.size() * 4 - 1), &olen, s.data(), SI32(s.size()), &status);
+		tmp.resize(olen);
+		WRITE_U16_INTO(UI16(olen), buffer);
+		buffer += tmp;
+	};
 
 	uint16_t var_count = 0;
 	std::string var_buffer;
@@ -576,7 +577,7 @@ void BinaryApplicator::printSingleWindow(SingleWindow* window, std::ostream& out
 	}
 	WRITE_U16_INTO(flags, header_buffer);
 
-	WRITE_U16_INTO(tags_to_write.size(), header_buffer);
+	WRITE_U16_INTO(UI16(tags_to_write.size()), header_buffer);
 	for (auto& tag : tags_to_write) {
 		WRITE_STR_INTO(tag->tag, header_buffer);
 	}

From 2450df02ed09b4b5804ff14e60a59116a5f32a63 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Fri, 5 Sep 2025 10:19:05 -0400
Subject: [PATCH 37/42] multiple packet types python

---
 python/cg3.py | 69 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 56 insertions(+), 13 deletions(-)

diff --git a/python/cg3.py b/python/cg3.py
index 9c136ff5..064b9411 100644
--- a/python/cg3.py
+++ b/python/cg3.py
@@ -10,6 +10,7 @@ class Reading:
 	lemma: str = ''
 	tags: List[str] = field(default_factory=list)
 	subreading: Optional['Reading'] = None
+	deleted: bool = False
 
 @dataclass
 class Cohort:
@@ -29,9 +30,15 @@ class Window:
 	rem_vars: List[str] = field(default_factory=list)
 	text: str = ''
 	text_post: str = ''
-	flush_after: bool = False
 	dep_has_spanned: bool = False
 
+@dataclass
+class Packet:
+	type: str = ''
+	window: Optional[Window] = None
+	command: str = ''
+	text: str = ''
+
 def parse_binary_window(buf):
 	'''Given a bytestring `buf` containing a single window
 	(not including the length header), parse and return a Window()
@@ -56,8 +63,6 @@ def read_str():
 	window = Window()
 	window_flags = read_u16()
 	if window_flags & 1:
-		window.flush_after = True
-	if window_flags & 2:
 		window.dep_has_spanned = True
 	tag_count = read_u16()
 	tags = [read_str() for i in range(tag_count)]
@@ -109,13 +114,17 @@ def read_tags():
 				prev.subreading = reading
 			else:
 				cohort.readings.append(reading)
+			if reading_flags & 2:
+				reading.deleted = True
 			prev = reading
 		window.cohorts.append(cohort)
 	return window
 
-def parse_binary_stream(fin):
-	'''Given a file `fin`, yield a series of Window() objects.
-	raises ValueError if stream header is missing or invalid.'''
+def parse_binary_stream(fin, windows_only=False):
+	'''Given a file `fin`, yield a series of Packet() objects.
+	raises ValueError if stream header is missing or invalid.
+	If `windows_only` is True, packets containing commands or text
+	are skipped and Window() objects are returned instead.'''
 
 	header = fin.read(8)
 	label, version = struct.unpack('<4sI', header)
@@ -124,11 +133,45 @@ def parse_binary_stream(fin):
 	if version != 1:
 		raise ValueError('Unknown binary format version!')
 	while True:
-		spec = fin.read(4)
-		if len(spec) != 4:
-			break;
-		block_len = struct.unpack('<I', spec)[0]
-		block = fin.read(block_len)
-		if len(block) != block_len:
+		ptype = fin.read(1)
+		if len(ptype) != 1:
 			break
-		yield parse_binary_window(block)
+		if ptype[0] == 1:
+			spec = fin.read(4)
+			if len(spec) != 4:
+				break;
+			block_len = struct.unpack('<I', spec)[0]
+			block = fin.read(block_len)
+			if len(block) != block_len:
+				break
+			window = parse_binary_window(block)
+			if windows_only:
+				yield window
+			else:
+				yield Packet(type='window', window=window)
+		elif ptype[0] == 2:
+			cmd = fin.read(1)
+			if len(cmd) != 1:
+				break
+			if windows_only:
+				continue
+			pack = Packet(type='command')
+			if cmd[0] == 1:
+				pack.command = 'FLUSH'
+			elif cmd[0] == 2:
+				pack.command = 'EXIT'
+			elif cmd[0] == 3:
+				pack.command = 'IGNORE'
+			elif cmd[0] == 4:
+				pack.command = 'RESUME'
+			else:
+				continue
+		elif ptype[0] == 3:
+			lbuf = fin.read(2)
+			ln = struct.unpack('<I', lbuf)[0]
+			pack = Packet(type='text')
+			pack.text = fin.read(ln).decode('utf-8')
+			if not windows_only:
+				yield pack
+		else:
+			continue

From 21d794062b1ab08991982dab7dab80cd94f19224 Mon Sep 17 00:00:00 2001
From: Daniel Swanson <awesomeevildudes@gmail.com>
Date: Fri, 5 Sep 2025 10:28:48 -0400
Subject: [PATCH 38/42] update docs

---
 manual/streamformats.xml | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/manual/streamformats.xml b/manual/streamformats.xml
index c988c762..b1d36a0d 100644
--- a/manual/streamformats.xml
+++ b/manual/streamformats.xml
@@ -195,10 +195,21 @@
     </para>
     <para>
       The stream begins with a header containing <code>CGBF</code> followed by a 4-byte version number (currently <code>1</code>).
-      After that, each window begins with 4 bytes specifying the length of the block and then the following structure:
+      After that, each packet begins with 1 byte indicating its contents.
+      <code>1</code> is a window, <code>2></code> is a command, and <code>3></code> is text.
+    </para>
+    <para>
+      Command packets have a second byte identifying the command: <code>1</code> for <code>FLUSH</code>, <code>2</code> for <code>EXIT</code>, <code>3</code> for <code>IGNORE</code>, and <code>4</code> for <code>RESUME</code>.
+      Commands which manipulate variables are represented in window packets.
+    </para>
+    <para>
+      Text packets consist of a 2-byte length followed by the contents in UTF-8.
+    </para>
+    <para>
+      Each window packet begins with 4 bytes specifying the length of the block and then the following structure:
       <screen>
         window flags [2]
-          &gt; 1 = flush_after
+          &gt; 1 = has multi-window dependencies
         tags [array of str]
         variables [array]
           mode
@@ -211,6 +222,7 @@
         text_post [str]
         cohorts [array]
           flags [2]
+            &gt; 1 = is target of a relation
           wordform [tag]
           static_tags [array of tag]
           dep_self [4]
@@ -223,6 +235,7 @@
           readings [array]
             flags [2]
               &gt; 1 = is subreading of predecessor
+              &gt; 2 = deleted
             baseform [tag]
             tags [array of tag]
       </screen>

From ea296543ad64dacf1fa89e29c763fefd8c546a7f Mon Sep 17 00:00:00 2001
From: Tino Didriksen <mail@tinodidriksen.com>
Date: Mon, 8 Sep 2025 18:47:06 +0200
Subject: [PATCH 39/42] Move mappings to the end; Tell cg-sort which prefix

---
 scripts/cg-stabilize-relations       |  2 +-
 src/GrammarApplicator.cpp            | 11 +++++++++++
 src/GrammarApplicator_reflow.cpp     |  1 +
 src/GrammarApplicator_runGrammar.cpp |  2 ++
 test/T_Dependency/prefix.txt         |  1 +
 test/T_MappingPrefix/expected.txt    |  7 +++----
 test/T_MappingPrefix/prefix.txt      |  1 +
 test/runall.pl                       | 13 +++++++++----
 8 files changed, 29 insertions(+), 9 deletions(-)
 create mode 100644 test/T_Dependency/prefix.txt
 create mode 100644 test/T_MappingPrefix/prefix.txt

diff --git a/scripts/cg-stabilize-relations b/scripts/cg-stabilize-relations
index 30f93e87..b23d9736 100755
--- a/scripts/cg-stabilize-relations
+++ b/scripts/cg-stabilize-relations
@@ -9,7 +9,7 @@ args = parser.parse_args()
 
 id_map = {}
 
-tag = re.compile(r'\b(ID:|R:\w+:)(\d+)\b')
+tag = re.compile(r'\b(ID:|R:[^:\s]+:)(\d+)\b')
 def repl(matchobj):
     global id_map
     n = matchobj.group(2)
diff --git a/src/GrammarApplicator.cpp b/src/GrammarApplicator.cpp
index d9e84e1f..f3d309c0 100644
--- a/src/GrammarApplicator.cpp
+++ b/src/GrammarApplicator.cpp
@@ -374,6 +374,9 @@ void GrammarApplicator::printReading(const Reading* reading, std::ostream& outpu
 	}
 
 	uint32SortedVector unique;
+	static thread_local TagList mappings;
+	mappings.clear();
+
 	for (auto tter : reading->tags_list) {
 		if ((!show_end_tags && tter == endtag) || tter == begintag) {
 			continue;
@@ -394,6 +397,14 @@ void GrammarApplicator::printReading(const Reading* reading, std::ostream& outpu
 		if (tag->type & T_RELATION && has_relations) {
 			continue;
 		}
+		if (tag->type & T_MAPPING) {
+			// Move mappings to the end
+			mappings.push_back(tag);
+			continue;
+		}
+		u_fprintf(output, " %S", tag->tag.data());
+	}
+	for (auto tag : mappings) {
 		u_fprintf(output, " %S", tag->tag.data());
 	}
 
diff --git a/src/GrammarApplicator_reflow.cpp b/src/GrammarApplicator_reflow.cpp
index 5301c335..a7604770 100644
--- a/src/GrammarApplicator_reflow.cpp
+++ b/src/GrammarApplicator_reflow.cpp
@@ -491,6 +491,7 @@ uint32_t GrammarApplicator::addTagToReading(Reading& reading, Tag* tag, bool reh
 	}
 
 	if (tag->type & T_MAPPING || tag->tag[0] == grammar->mapping_prefix) {
+		tag->type |= T_MAPPING;
 		if (reading.mapping && reading.mapping != tag) {
 			u_fprintf(ux_stderr, "Error: addTagToReading() cannot add a mapping tag to a reading which already is mapped!\n");
 			CG3Quit(1);
diff --git a/src/GrammarApplicator_runGrammar.cpp b/src/GrammarApplicator_runGrammar.cpp
index f23650fb..b8da9fd7 100644
--- a/src/GrammarApplicator_runGrammar.cpp
+++ b/src/GrammarApplicator_runGrammar.cpp
@@ -368,6 +368,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 				if (base && base[0]) {
 					Tag* tag = addTag(base);
 					if (tag->type & T_MAPPING || tag->tag[0] == grammar->mapping_prefix) {
+						tag->type |= T_MAPPING;
 						all_mappings[cReading].push_back(tag);
 					}
 					else {
@@ -383,6 +384,7 @@ void GrammarApplicator::runGrammarOnText(std::istream& input, std::ostream& outp
 			if (base && base[0]) {
 				Tag* tag = addTag(base);
 				if (tag->type & T_MAPPING || tag->tag[0] == grammar->mapping_prefix) {
+					tag->type |= T_MAPPING;
 					all_mappings[cReading].push_back(tag);
 				}
 				else {
diff --git a/test/T_Dependency/prefix.txt b/test/T_Dependency/prefix.txt
new file mode 100644
index 00000000..3038d22f
--- /dev/null
+++ b/test/T_Dependency/prefix.txt
@@ -0,0 +1 @@
+§
diff --git a/test/T_MappingPrefix/expected.txt b/test/T_MappingPrefix/expected.txt
index 94b32f8f..e1bb5351 100644
--- a/test/T_MappingPrefix/expected.txt
+++ b/test/T_MappingPrefix/expected.txt
@@ -1,8 +1,8 @@
 "<word>"
-	"word" @mapped $tag §tag £tag @tag ADD:4 ADD:5 ADD:6 ADD:7
+	"word" @mapped $tag §tag @tag £tag ADD:4 ADD:5 ADD:6 ADD:7
 	"word" £mapped
-	"word" §mapped $tag §tag £tag @tag ADD:4 ADD:5 ADD:6 ADD:7
-	"word" $mapped $tag §tag £tag @tag ADD:4 ADD:5 ADD:6 ADD:7
+	"word" §mapped $tag §tag @tag £tag ADD:4 ADD:5 ADD:6 ADD:7
+	"word" $mapped $tag §tag @tag £tag ADD:4 ADD:5 ADD:6 ADD:7
 "<word2>"
 	"word2" a £re-mapped UNMAP:9:normal MAP:12
 "<word2>"
@@ -16,4 +16,3 @@
 "<word3>"
 	"word3" a b c REMOVE:15
 ;	"word3" a b c £a £b £c REMOVE:15
-
diff --git a/test/T_MappingPrefix/prefix.txt b/test/T_MappingPrefix/prefix.txt
new file mode 100644
index 00000000..93660ce9
--- /dev/null
+++ b/test/T_MappingPrefix/prefix.txt
@@ -0,0 +1 @@
+£
diff --git a/test/runall.pl b/test/runall.pl
index a14336d6..ee27b5b4 100755
--- a/test/runall.pl
+++ b/test/runall.pl
@@ -40,6 +40,12 @@ sub run_pl {
 	my ($binary,$override,$args) = @_;
 	my $good = 1;
 
+	my $prefix = '@';
+	if (-s 'prefix.txt') {
+		$prefix = `cat prefix.txt`;
+		chomp($prefix);
+	}
+
 	# Normal run
 	`"$binary" $args $override -g grammar.cg3 -I input.txt -O output.txt >stdout.txt 2>stderr.txt`;
 	`diff -B expected.txt output.txt >diff.txt`;
@@ -85,11 +91,9 @@ sub run_pl {
 	}
 
 	# Normal run, but with binary I/O
-	my $conv = $binary;
-	$conv =~ s@vislcg3(\.exe)?$@cg-conv@g;
 	`echo "Include Static grammar.cg3 ;" > grammar.bsf.cg3`;
-	`cat input.txt | "$binary" $args --in-cg --out-binary -g grammar.bsf.cg3 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$binary" $args --in-binary --out-cg -g grammar.bsf.cg3 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m | "$bindir/../scripts/cg-stabilize-relations" >output.bsf.txt`;
-	`cat expected.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m | "$bindir/../scripts/cg-stabilize-relations" > expected.bsf.txt`;
+	`cat input.txt | "$binary" $args --in-cg --out-binary -g grammar.bsf.cg3 2>stderr.bsf.conv1.txt | "$binary" $args $override -g grammar.cg3 --in-binary --out-binary 2>stderr.bsf.vislcg3.txt | "$binary" $args --in-binary --out-cg -g grammar.bsf.cg3 2>stderr.bsf.conv2.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m '$prefix' | "$bindir/../scripts/cg-stabilize-relations" >output.bsf.txt`;
+	`cat expected.txt | "$bindir/../scripts/cg-untrace" | "$bindir/../scripts/cg-sort" -m '$prefix' | "$bindir/../scripts/cg-stabilize-relations" > expected.bsf.txt`;
 	`diff -B expected.bsf.txt output.bsf.txt >diff.bsf.txt`;
 
 	if (-s "diff.bsf.txt") {
@@ -159,6 +163,7 @@ sub run_pl {
 	my $args = '';
 	if (-s 'args.txt') {
 		$args = `cat args.txt`;
+		chomp($args);
 	}
 	if (-x 'run.pl') {
 		`./run.pl "$binary" \Q$c\E $args`;

From 7aa00086d442dafb17e029b0e6d64c6c28ff556c Mon Sep 17 00:00:00 2001
From: Tino Didriksen <mail@tinodidriksen.com>
Date: Mon, 8 Sep 2025 19:08:52 +0200
Subject: [PATCH 40/42] Force PERL_UNICODE=SDA in the test runner

---
 test/runall.pl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/runall.pl b/test/runall.pl
index ee27b5b4..2870edce 100755
--- a/test/runall.pl
+++ b/test/runall.pl
@@ -7,6 +7,8 @@
 my $bindir = realpath $Bin;
 chdir $bindir or die("Error: Could not change directory to $bindir !");
 
+$ENV{PERL_UNICODE} = 'SDA';
+
 # Search paths for the binary
 my @binlist = (
 	"../build/src/vislcg3",

From 7838daee40ad3f51b10d69058c233d0ca2fb751b Mon Sep 17 00:00:00 2001
From: Tino Didriksen <mail@tinodidriksen.com>
Date: Mon, 8 Sep 2025 19:25:57 +0200
Subject: [PATCH 41/42] Version

---
 src/version.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/version.hpp b/src/version.hpp
index ced0348e..abc2f46d 100644
--- a/src/version.hpp
+++ b/src/version.hpp
@@ -26,8 +26,8 @@
 constexpr auto CG3_COPYRIGHT_STRING = "Copyright (C) 2007-2025 GrammarSoft ApS. Licensed under GPLv3+";
 
 constexpr uint32_t CG3_VERSION_MAJOR = 1;
-constexpr uint32_t CG3_VERSION_MINOR = 5;
-constexpr uint32_t CG3_VERSION_PATCH = 7;
+constexpr uint32_t CG3_VERSION_MINOR = 6;
+constexpr uint32_t CG3_VERSION_PATCH = 0;
 constexpr uint32_t CG3_REVISION = 13898;
 constexpr uint32_t CG3_FEATURE_REV = 13898;
 constexpr uint32_t CG3_TOO_OLD = 10373;

From f4f812f5611af249f15dd4f0a29abb6b7f68bab9 Mon Sep 17 00:00:00 2001
From: Tino Didriksen <mail@tinodidriksen.com>
Date: Mon, 8 Sep 2025 19:31:01 +0200
Subject: [PATCH 42/42] Install cg-stabilize-relations

---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2eeaca5e..b4588a95 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -165,6 +165,7 @@ configure_file(scripts/cg3-autobin.pl.in scripts/cg3-autobin.pl @ONLY)
 install(PROGRAMS
 	"${CMAKE_CURRENT_BINARY_DIR}/scripts/cg3-autobin.pl"
 	"${CMAKE_CURRENT_SOURCE_DIR}/scripts/cg-sort"
+	"${CMAKE_CURRENT_SOURCE_DIR}/scripts/cg-stabilize-relations"
 	"${CMAKE_CURRENT_SOURCE_DIR}/scripts/cg-strictify"
 	"${CMAKE_CURRENT_SOURCE_DIR}/scripts/cg-untrace"
 	DESTINATION ${CMAKE_INSTALL_BINDIR})