From 75c979ef3deab5346aee9c21259f502108c1bb77 Mon Sep 17 00:00:00 2001 From: Daniel Swanson Date: Wed, 8 Apr 2026 23:15:04 -0400 Subject: [PATCH 1/4] fix some more obvious thread-safety issues --- src/Cohort.cpp | 2 +- src/GrammarApplicator.cpp | 8 ++++---- src/Reading.cpp | 2 +- src/SingleWindow.cpp | 2 +- src/cg3.h | 2 ++ src/libcg3.cpp | 20 ++++++++++++++++++++ 6 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/Cohort.cpp b/src/Cohort.cpp index 93567de9b..b515dc6c4 100644 --- a/src/Cohort.cpp +++ b/src/Cohort.cpp @@ -29,7 +29,7 @@ namespace CG3 { -extern pool pool_cohorts; +extern thread_local pool pool_cohorts; Cohort* alloc_cohort(SingleWindow* p) { Cohort* c = pool_cohorts.get(); diff --git a/src/GrammarApplicator.cpp b/src/GrammarApplicator.cpp index f3d309c04..00e4d31bc 100644 --- a/src/GrammarApplicator.cpp +++ b/src/GrammarApplicator.cpp @@ -32,9 +32,9 @@ namespace CG3 { // Order is important - we want SingleWindows to be destroyed first, then Cohorts, then Readings -pool pool_readings; -pool pool_cohorts; -pool pool_swindows; +thread_local pool pool_readings; +thread_local pool pool_cohorts; +thread_local pool pool_swindows; GrammarApplicator::GrammarApplicator(std::ostream& ux_err) : ux_stderr(&ux_err) @@ -48,7 +48,7 @@ GrammarApplicator::~GrammarApplicator() { delete grammar; } grammar = nullptr; - ux_stderr = nullptr; + //ux_stderr = nullptr; for (auto rx : text_delimiters) { uregex_close(rx); diff --git a/src/Reading.cpp b/src/Reading.cpp index b0946d6d5..a0dad372d 100644 --- a/src/Reading.cpp +++ b/src/Reading.cpp @@ -23,7 +23,7 @@ namespace CG3 { -extern pool pool_readings; +extern thread_local pool pool_readings; Reading* alloc_reading(Cohort* p) { Reading* r = pool_readings.get(); diff --git a/src/SingleWindow.cpp b/src/SingleWindow.cpp index f709c6312..1df3839a2 100644 --- a/src/SingleWindow.cpp +++ b/src/SingleWindow.cpp @@ -24,7 +24,7 @@ namespace CG3 { -extern pool pool_swindows; +extern thread_local pool pool_swindows; SingleWindow* alloc_swindow(Window* p) { SingleWindow* s = pool_swindows.get(); diff --git a/src/cg3.h b/src/cg3.h index 6c2145b27..fff90140c 100644 --- a/src/cg3.h +++ b/src/cg3.h @@ -113,10 +113,12 @@ Valid signatures: //*/ void cg3_applicator_setoption(cg3_applicator* applicator, cg3_option option, void* value); void cg3_applicator_free(cg3_applicator* applicator); +void cg3_applicator_free_binary(cg3_applicator* applicator); void cg3_run_grammar_on_text(cg3_applicator*, std_istream*, std_ostream*); void cg3_run_grammar_on_text_fns(cg3_applicator*, const char* input, const char* output); size_t cg3_run_grammar_on_buffer(cg3_applicator* applicator_, const char* input, size_t in_length, char* output, size_t out_length); +size_t cg3_run_grammar_on_buffer_binary(cg3_applicator* applicator_, const char* input, size_t in_length, char* output, size_t out_length); cg3_mwesplitapplicator* cg3_mwesplitapplicator_create(); #define cg3_mwesplitapplicator_free cg3_applicator_free diff --git a/src/libcg3.cpp b/src/libcg3.cpp index d4f85e3bb..9efabb678 100644 --- a/src/libcg3.cpp +++ b/src/libcg3.cpp @@ -283,6 +283,11 @@ void cg3_applicator_free(cg3_applicator* applicator_) { delete applicator; } +void cg3_applicator_free_binary(cg3_applicator* applicator_) { + GrammarApplicator* applicator = static_cast(applicator_); + delete applicator; +} + void cg3_run_grammar_on_text(cg3_applicator* applicator_, std_istream* is_, std_ostream* os_) { GrammarApplicator* applicator = static_cast(applicator_); std::istream* is = static_cast(is_); @@ -312,6 +317,21 @@ size_t cg3_run_grammar_on_buffer(cg3_applicator* applicator_, const char* input, return mx; } +size_t cg3_run_grammar_on_buffer_binary(cg3_applicator* applicator_, const char* input, size_t in_length, char* output, size_t out_length) { + BinaryApplicator* applicator = static_cast(applicator_); + std::string istr(input, in_length); + std::istringstream is(istr); + std::ostringstream os; + applicator->runGrammarOnText(is, os); + // Ideally we would write directly to output without copying, + // but I couldn't figure out how to do that, and this is good + // enough for my purposes. -DGS 2026-02-16 + const auto& ostr = os.str(); + auto mx = (ostr.size() > out_length ? out_length : ostr.size()); + memcpy(output, ostr.data(), mx); + return mx; +} + cg3_sentence* cg3_sentence_new(cg3_applicator* applicator_) { GrammarApplicator* applicator = static_cast(applicator_); SingleWindow* current = applicator->gWindow->allocSingleWindow(); From 7715b1305a66cf729debb8d6af82e4b270b13a27 Mon Sep 17 00:00:00 2001 From: Daniel Swanson Date: Tue, 19 May 2026 12:18:49 -0400 Subject: [PATCH 2/4] AST globals --- src/AST.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/AST.hpp b/src/AST.hpp index 85090621d..b354b6e61 100644 --- a/src/AST.hpp +++ b/src/AST.hpp @@ -88,7 +88,7 @@ enum ASTType { AST_TextDelimiters, NUM_ASTTypes }; -const char* ASTType_str[NUM_ASTTypes] = {}; +thread_local const char* ASTType_str[NUM_ASTTypes] = {}; struct ASTHelper; struct ASTNode { @@ -106,10 +106,10 @@ struct ASTNode { {} }; -bool parse_ast = false; -ASTNode ast; -ASTNode* cur_ast = * -ASTHelper* cur_ast_help = nullptr; +thread_local bool parse_ast = false; +thread_local ASTNode ast; +thread_local ASTNode* cur_ast = * +thread_local ASTHelper* cur_ast_help = nullptr; const UChar* xml_encode(const UChar* b, const UChar* e) { static thread_local CG3::UString buf; From e09088f77666712db8bc94acc964af2c3a0e5739 Mon Sep 17 00:00:00 2001 From: Daniel Swanson Date: Sat, 23 May 2026 16:41:11 -0400 Subject: [PATCH 3/4] static vars in methods also need thread_local --- src/GrammarApplicator.hpp | 2 +- src/flat_unordered_map.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/GrammarApplicator.hpp b/src/GrammarApplicator.hpp index 9203443c9..f6a66af8f 100644 --- a/src/GrammarApplicator.hpp +++ b/src/GrammarApplicator.hpp @@ -371,7 +371,7 @@ class GrammarApplicator { Reading* get_sub_reading(Reading* tr, int sub_reading); void printDebugRule(const Rule& rule, bool target = true, bool cntx = true) { - static std::stringstream buf; + thread_local static std::stringstream buf; bool ttrace = false; swapper _st(true, trace, ttrace); diff --git a/src/flat_unordered_map.hpp b/src/flat_unordered_map.hpp index 65d130ccc..f4ef3133f 100644 --- a/src/flat_unordered_map.hpp +++ b/src/flat_unordered_map.hpp @@ -295,7 +295,7 @@ class flat_unordered_map { return; } - static container vals; + thread_local static container vals; vals.resize(0); vals.reserve(size_); for (auto& elem : elements) { From 73e13f97c38bf439c2b69ddffb7d5b84c402f4a3 Mon Sep 17 00:00:00 2001 From: Daniel Swanson Date: Sat, 23 May 2026 16:57:36 -0400 Subject: [PATCH 4/4] don't need to remove this line --- src/GrammarApplicator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GrammarApplicator.cpp b/src/GrammarApplicator.cpp index 00e4d31bc..1cc8130b6 100644 --- a/src/GrammarApplicator.cpp +++ b/src/GrammarApplicator.cpp @@ -48,7 +48,7 @@ GrammarApplicator::~GrammarApplicator() { delete grammar; } grammar = nullptr; - //ux_stderr = nullptr; + ux_stderr = nullptr; for (auto rx : text_delimiters) { uregex_close(rx);