From 683f524a828192aff498637086ed5b148a77378e Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Mon, 8 Jun 2026 11:19:46 -0700 Subject: [PATCH 1/7] smart quotes for keys and values --- CHANGELOG.md | 8 +-- README.md | 2 +- ext/smarter_json/smarter_json.c | 9 +++ lib/smarter_json/parser.rb | 6 ++ lib/smarter_json/version.rb | 2 +- spec/parser_spec.rb | 107 +++++++++++++++++++++++++++++++- 6 files changed, 127 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c14e3fe..cef9aa0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,9 @@ # SmarterJSON Change Log -> 🚧 Getting ready for the 1.0.0 release - sorry for the interface changes - thank you for your patience! 🚧 - > ⚠️ **New Interface (since 0.9.7):** > -> SmarterJSON **always return an `Array`** of documents: +> SmarterJSON **always returns an `Array`** of documents. > > `SmarterJSON.process` / `SmarterJSON.process_file` return: > @@ -16,9 +14,11 @@ > ⚠️ We discourage the use of `process(input).first` / `process(input)[0]` because it silently drops potential additional documents > Please use `process_one` if you are expecting only one JSON doc, e.g. in API payloads. -## 0.9.10 (unreleased) +## 1.0.0 (unreleased) +- **The public interface is now stable** β€” `process`, `process_one`, `process_file`, `generate`, and the documented options; semantic versioning from here on. - Unknown or wrongly-typed options now raise `ArgumentError` instead of being silently ignored, so a typo (e.g. `symbolize_names:` instead of `symbolize_keys:`) is caught immediately. - Input tagged `ASCII-8BIT` whose bytes are valid UTF-8 (e.g. a `Net::HTTP` `response.body`) is now read as UTF-8, so its string values compare equal to UTF-8 literals; ASCII-8BIT input that is not valid UTF-8 raises `SmarterJSON::EncodingError` (pass an explicit `encoding:` for legacy encodings). +- Object keys may now use smart/curly quotes too (e.g. JSON pasted from a word processor), not just string values. ## 0.9.9 (2026-06-07) - Much faster pure-Ruby parsing (the path used without the C extension) β€” roughly 3Γ— on string-heavy data, ~2Γ— on number-heavy, ~1.7Γ— on object-heavy (on a YJIT-enabled Ruby). Parsed values are unchanged. diff --git a/README.md b/README.md index 9387069..e54cf96 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ Three things set it apart: - Trailing commas; unquoted keys (`{host: localhost}`); single-quoted, triple-quoted (`'''…'''`), and quoteless string values - Implicit root object β€” a config file that starts with `key: value`, no outer `{}` - `NaN`, `Infinity`, hex (`0xFF`), leading `+` / `.`, underscores in numbers (`1_000_000`) -- UTF-8 BOM, smart/curly quotes, Python literals (`True` / `False` / `None`), JavaScript `undefined` +- UTF-8 BOM, smart/curly quotes (in keys and values), Python literals (`True` / `False` / `None`), JavaScript `undefined` - Mixed CR / LF / CRLF line endings, and any Ruby-supported input encoding (via `encoding:`) - Duplicate keys (last value wins by default; configurable) diff --git a/ext/smarter_json/smarter_json.c b/ext/smarter_json/smarter_json.c index 3c4f3ac..4aa0023 100644 --- a/ext/smarter_json/smarter_json.c +++ b/ext/smarter_json/smarter_json.c @@ -262,6 +262,8 @@ static inline int fj_needs_ws_skip(int b) { /* forward declarations (mutual recursion) */ static VALUE fj_parse_value(fj_state *st); static VALUE fj_parse_member_value(fj_state *st); +static int fj_smart_quote_kind(fj_state *st); +static VALUE fj_parse_smart_string(fj_state *st, int kind); static void fj_append_utf8(VALUE buf, unsigned long cp) { char tmp[4]; @@ -842,6 +844,7 @@ static VALUE fj_parse_identifier_key(fj_state *st) { static VALUE fj_parse_object_key(fj_state *st) { int b = fj_byte(st); + int kind; /* Quoted key. The common case has no escapes: intern straight from the buffer * with no throwaway allocation. An escaped key (rare) falls through to the @@ -862,6 +865,12 @@ static VALUE fj_parse_object_key(fj_state *st) { return fj_parse_string(st, b); } + /* A key may open with a smart/curly quote too (a word-processor paste curls the + * keys, not just the values) β€” route to the same reader the value path uses. + * Mirrors the Ruby fallback's parse_object_key; Hash#[]= dedups the key on store. */ + kind = fj_smart_quote_kind(st); + if (kind) return fj_parse_smart_string(st, kind); + if (fj_is_key_start(b)) return fj_parse_identifier_key(st); fj_error(st, "expected a key"); diff --git a/lib/smarter_json/parser.rb b/lib/smarter_json/parser.rb index 834e28f..5aac9b1 100644 --- a/lib/smarter_json/parser.rb +++ b/lib/smarter_json/parser.rb @@ -1240,6 +1240,12 @@ def parse_object_key b = byte return parse_string(DQUOTE) if b == DQUOTE return parse_string(SQUOTE) if b == SQUOTE + + # A key may open with a smart/curly quote too (word-processor paste curls keys, + # not just values) β€” route to the same reader values already use. + kind = smart_quote_kind(@pos) + return parse_smart_string(kind) if kind + raise error("expected a key") unless b && key_start_byte?(b) parse_identifier_key diff --git a/lib/smarter_json/version.rb b/lib/smarter_json/version.rb index bdc837e..3d9352e 100644 --- a/lib/smarter_json/version.rb +++ b/lib/smarter_json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SmarterJSON - VERSION = "0.9.9" + VERSION = "1.0.0" end diff --git a/spec/parser_spec.rb b/spec/parser_spec.rb index 9dffcd3..ce693ce 100644 --- a/spec/parser_spec.rb +++ b/spec/parser_spec.rb @@ -726,8 +726,16 @@ end describe "smart / curly quotes" do + # Quote codepoints used in the inputs below, written as \u escapes because the + # glyphs are visually near-identical to straight quotes (and this file's save + # hook rewrites any literal glyph back to its \u escape \u2014 so the Unicode NAME, + # not a pasted glyph, is the reliable identifier): + # \u201C LEFT DOUBLE QUOTATION MARK (open curly double) + # \u201D RIGHT DOUBLE QUOTATION MARK (close curly double) + # \u2018 LEFT SINGLE QUOTATION MARK (open curly single) + # \u2019 RIGHT SINGLE QUOTATION MARK (close curly single, also the typographic apostrophe) + # \u00EF LATIN SMALL LETTER I WITH DIAERESIS (the i in the "naive" key) it "accepts curly double quotes as regular double quotes" do - # U+201C LEFT DOUBLE QUOTATION MARK, U+201D RIGHT DOUBLE QUOTATION MARK input = "{\"a\": \u201Chello\u201D}" expect(SmarterJSON.process(input, acceleration: acceleration)).to eq([{ "a" => "hello" }]) expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "a" => "hello" }) @@ -739,6 +747,103 @@ expect(SmarterJSON.process(input, acceleration: acceleration)).to eq([{ "a" => "hello" }]) expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "a" => "hello" }) end + + it "keeps curly quotes that appear INSIDE a straight-quoted string as content" do + # {"quote": "Hello \u201cworld\u201d"} \u2014 the curly quotes (U+201C/U+201D) sit inside a + # straight double-quoted value, so they are literal content, not delimiters. + # Same principle as a col_sep inside a quoted CSV field in SmarterCSV: once a + # string is open, only its own closer (here the straight ") ends it. + input = "{\"quote\": \"Hello \u201cworld\u201d\"}" + expected = { "quote" => "Hello \u201cworld\u201d" } + expect(SmarterJSON.process(input, acceleration: acceleration)).to eq([expected]) + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq(expected) + end + + # The word-processor paste curls the KEYS too, not just the values \u2014 these + # are RED until smart quotes are accepted in key position (Ruby + C). + describe "smart-quoted keys" do + it "accepts a smart double-quoted key" do + input = "{\u201cname\u201d: \"Tilo\"}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "name" => "Tilo" }) + end + + it "accepts a smart single-quoted key" do + input = "{\u2018name\u2019: \"Tilo\"}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "name" => "Tilo" }) + end + + it "accepts smart quotes on BOTH key and value" do + input = "{\u201cname\u201d: \u201cTilo\u201d}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "name" => "Tilo" }) + end + + it "accepts a smart double key with a smart single value (mixed styles)" do + input = "{\u201cname\u201d: \u2018Tilo\u2019}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "name" => "Tilo" }) + end + + it "accepts multiple smart-quoted keys in one object" do + input = "{\u201ca\u201d: 1, \u201cb\u201d: 2}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "a" => 1, "b" => 2 }) + end + + it "accepts a smart-quoted key in a nested object" do + input = "{\"outer\": {\u201cinner\u201d: 1}}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "outer" => { "inner" => 1 } }) + end + + it "accepts a smart-quoted key containing spaces and multibyte characters" do + input = "{\u201cna\u00efve key\u201d: 1}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "na\u00efve key" => 1 }) + end + + it "is lenient about smart-quote direction on a key" do + # opens with the RIGHT curly (U+201D), closes with the LEFT (U+201C) + input = "{\u201dname\u201c: 1}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "name" => 1 }) + end + + it "symbolizes a smart-quoted key when symbolize_keys: true" do + input = "{\u201cname\u201d: 1}" + expect(SmarterJSON.process_one(input, symbolize_keys: true, acceleration: acceleration)).to eq({ name: 1 }) + end + + it "accepts an empty smart-quoted key" do + input = "{\u201c\u201d: 1}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "" => 1 }) + end + + it "handles a smart-quoted key AND curly content inside a straight-quoted value" do + input = "{\u201cquote\u201d: \"Hello \u201cworld\u201d\"}" + expected = { "quote" => "Hello \u201cworld\u201d" } + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq(expected) + end + + it "raises on an unterminated smart-quoted key" do + expect { SmarterJSON.process_one("{\u201cname: 1}", acceleration: acceleration) } + .to raise_error(SmarterJSON::ParseError) + end + end + + # Regression guards: a smart quote that appears INSIDE an already-open string + # is content, not a delimiter (the SmarterCSV "separator inside a quoted field" + # principle). These are GREEN today and must stay green when keys change. + describe "smart quotes stay content inside an already-open string" do + it "keeps a smart apostrophe inside a straight-quoted string" do + input = "{\"msg\": \"I don\u2019t know\"}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "msg" => "I don\u2019t know" }) + end + + it "keeps a smart apostrophe inside a smart-quoted string" do + input = "{\"msg\": \u201cI don\u2019t know\u201d}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "msg" => "I don\u2019t know" }) + end + + it "keeps straight double quotes inside a smart-quoted string" do + input = "{\"msg\": \u201cHe said \"hi\" loudly\u201d}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "msg" => "He said \"hi\" loudly" }) + end + end end describe "Python literals" do From 5cd1c5a26493e7995225e80257ca89a49d167c1c Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Mon, 8 Jun 2026 11:50:19 -0700 Subject: [PATCH 2/7] towards version 1.0.0 --- CHANGELOG.md | 7 +++- README.md | 2 +- docs/basic_write_api.md | 2 +- docs/options.md | 9 ++--- ext/smarter_json/smarter_json.c | 23 ++++++++---- lib/smarter_json/generator.rb | 23 ++++++++++-- lib/smarter_json/parser.rb | 14 ++++++-- lib/smarter_json/version.rb | 2 +- spec/generator_spec.rb | 24 +++++++++++++ spec/parser_spec.rb | 62 +++++++++++++++++++++++++++++++++ 10 files changed, 148 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cef9aa0..1943026 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,11 +14,16 @@ > ⚠️ We discourage the use of `process(input).first` / `process(input)[0]` because it silently drops potential additional documents > Please use `process_one` if you are expecting only one JSON doc, e.g. in API payloads. -## 1.0.0 (unreleased) +## 1.0.0.pre1 (unreleased) + +RSpec tests: 1,034 + - **The public interface is now stable** β€” `process`, `process_one`, `process_file`, `generate`, and the documented options; semantic versioning from here on. - Unknown or wrongly-typed options now raise `ArgumentError` instead of being silently ignored, so a typo (e.g. `symbolize_names:` instead of `symbolize_keys:`) is caught immediately. - Input tagged `ASCII-8BIT` whose bytes are valid UTF-8 (e.g. a `Net::HTTP` `response.body`) is now read as UTF-8, so its string values compare equal to UTF-8 literals; ASCII-8BIT input that is not valid UTF-8 raises `SmarterJSON::EncodingError` (pass an explicit `encoding:` for legacy encodings). - Object keys may now use smart/curly quotes too (e.g. JSON pasted from a word processor), not just string values. +- `SmarterJSON.generate` accepts `allow_nan: true` to emit `NaN` / `Infinity` / `-Infinity` (JSON5-style) instead of raising, so non-finite numbers round-trip; the default still raises. +- A numeric literal that overflows `Float` range (e.g. `1e400`) now reports a `:number_overflow` warning via `on_warning` instead of silently becoming `Infinity`. ## 0.9.9 (2026-06-07) - Much faster pure-Ruby parsing (the path used without the C extension) β€” roughly 3Γ— on string-heavy data, ~2Γ— on number-heavy, ~1.7Γ— on object-heavy (on a YJIT-enabled Ruby). Parsed values are unchanged. diff --git a/README.md b/README.md index e54cf96..6c8e5e7 100644 --- a/README.md +++ b/README.md @@ -201,7 +201,7 @@ In short: **SmarterJSON's C path matches or beats Oj/strict on every file** (app | `decimal_precision` | `:auto` | `:auto` keeps high-precision decimals as `BigDecimal`; `:float` forces `Float`; `:bigdecimal` forces `BigDecimal` | | `acceleration` | `true` | `true` uses the C extension when compiled and loadable; `false` forces pure Ruby (identical results) | | `encoding` | `nil` | labels the input's encoding; `nil` keeps the input's own (no transcoding pass; see below) | -| `on_warning` | `nil` | a callable invoked once per lenient fix applied (`:empty_slot`, `:empty_value`, `:duplicate_key`), passed a `SmarterJSON::Warning`; the return value is never changed. See below. | +| `on_warning` | `nil` | a callable invoked once per lenient fix applied (`:empty_slot`, `:empty_value`, `:duplicate_key`, `:number_overflow`), passed a `SmarterJSON::Warning`; the return value is never changed. See below. | ## Examples diff --git a/docs/basic_write_api.md b/docs/basic_write_api.md index 66df12f..12de944 100644 --- a/docs/basic_write_api.md +++ b/docs/basic_write_api.md @@ -58,7 +58,7 @@ SmarterJSON.generate(Float::INFINITY) # raises SmarterJSON::GenerateError β€” SmarterJSON.generate(Float::NAN) # raises SmarterJSON::GenerateError β€” non-finite Float ``` -(`GenerateError` is a kind of `SmarterJSON::Error`, so `rescue SmarterJSON::Error` catches it. `Infinity` and `NaN` are accepted on the *read* side as a leniency, but they are not valid JSON to *write*.) +(`GenerateError` is a kind of `SmarterJSON::Error`, so `rescue SmarterJSON::Error` catches it. `Infinity` and `NaN` are accepted on the *read* side as a leniency; to *write* them, pass `allow_nan: true` and they're emitted as `NaN` / `Infinity` / `-Infinity` (JSON5-style, so SmarterJSON reads them back) β€” otherwise non-finite values raise, since they aren't valid strict JSON.) By default `generate` is strict: it only writes the types above and raises on anything else. To serialize `Time`, `Date`, or your own objects, pass `coerce: true` β€” an unsupported value is then converted by its own `as_json` (whose result is re-emitted, so escaping/`indent`/`sort_keys` still apply) or, failing that, `to_json` (spliced verbatim): diff --git a/docs/options.md b/docs/options.md index 81f0d57..c85f823 100644 --- a/docs/options.md +++ b/docs/options.md @@ -43,7 +43,7 @@ warns.map(&:type) # => [:empty_slot] warns.first.to_s # => "extra comma, collapsed an empty slot at line 1, col 4" ``` -The warning types are `:empty_slot` (a collapsed empty comma slot, e.g. `[1,,2]`), `:empty_value` (a key with no value, read as `null`, e.g. `{a:}`), and `:duplicate_key` (a repeated key that was dropped), plus wrapper-recovery warnings such as `:code_fence_stripped`, `:prefix_text_ignored`, `:suffix_text_ignored`, and `:wrapper_tag_stripped`. Clean input never invokes the handler. Warnings work on both the C and pure-Ruby paths, so `acceleration:` doesn't change them. +The warning types are `:empty_slot` (a collapsed empty comma slot, e.g. `[1,,2]`), `:empty_value` (a key with no value, read as `null`, e.g. `{a:}`), `:duplicate_key` (a repeated key that was dropped), and `:number_overflow` (a numeric literal too large for `Float`, e.g. `1e400`, collapsed to `Infinity`), plus wrapper-recovery warnings such as `:code_fence_stripped`, `:prefix_text_ignored`, `:suffix_text_ignored`, and `:wrapper_tag_stripped`. Clean input never invokes the handler. Warnings work on both the C and pure-Ruby paths, so `acceleration:` doesn't change them. ### A note on `:encoding` @@ -59,12 +59,13 @@ These options are passed to [`SmarterJSON.generate`](./basic_write_api.md) as th | Option | Default | Explanation | |------------|---------|-----------------------------------------------------------------------------------------------------------------------------| +| `:allow_nan` | `false` | When `true`, non-finite `Float`/`BigDecimal` values emit the JSON5 barewords `NaN` / `Infinity` / `-Infinity` (which SmarterJSON reads back, so they round-trip). When `false` (the default), a non-finite number raises `SmarterJSON::GenerateError` β€” they aren't valid strict JSON. | +| `:ascii_only` | `false` | Escape every non-ASCII character as `\uXXXX` (astral characters as a UTF-16 surrogate pair). The default emits raw UTF-8. | +| `:coerce` | `false` | When `true`, a value that isn't natively supported is converted by its own `as_json` (the result is re-emitted, so the other options still apply) or, failing that, `to_json` (spliced verbatim). When `false` (the default), such a value raises `SmarterJSON::GenerateError`. | | `:format` | `:json` | `:json` writes standard JSON (Hash β†’ object, Array β†’ array, scalar β†’ scalar). `:ndjson` writes newline-delimited JSON: an Array becomes one element per line, any other value becomes a single line. | | `:indent` | `0` | Spaces per nesting level for pretty-printing. `0` (the default) is compact output. Empty objects/arrays stay inline. Not allowed with `:ndjson` (a record must be a single line). | -| `:sort_keys` | `false` | Emit object keys in sorted order (Symbol keys sorted by their string form). Useful for canonical, diff-friendly output. | -| `:ascii_only` | `false` | Escape every non-ASCII character as `\uXXXX` (astral characters as a UTF-16 surrogate pair). The default emits raw UTF-8. | | `:script_safe` | `false` | Escape the `/` in `` tag. | -| `:coerce` | `false` | When `true`, a value that isn't natively supported is converted by its own `as_json` (the result is re-emitted, so the other options still apply) or, failing that, `to_json` (spliced verbatim). When `false` (the default), such a value raises `SmarterJSON::GenerateError`. | +| `:sort_keys` | `false` | Emit object keys in sorted order (Symbol keys sorted by their string form). Useful for canonical, diff-friendly output. | Configuration is validated up front: an unknown option key, a known key with the wrong type or value (a non-Symbol `:format`, a negative/non-Integer `:indent`, a non-boolean flag), or combining `:indent` with `:ndjson`, raises `ArgumentError`. diff --git a/ext/smarter_json/smarter_json.c b/ext/smarter_json/smarter_json.c index 4aa0023..41243d5 100644 --- a/ext/smarter_json/smarter_json.c +++ b/ext/smarter_json/smarter_json.c @@ -40,6 +40,7 @@ static ID fj_call_id; /* cached :call (invoking the on_warning handler) */ static VALUE fj_sym_empty_slot; static VALUE fj_sym_empty_value; static VALUE fj_sym_duplicate_key; +static VALUE fj_sym_number_overflow; static ID fj_bigdecimal_id; /* cached BigDecimal() method id (set in Init) */ static ID fj_to_sym_id; /* cached :to_sym (symbolize_keys) */ static ID fj_key_p_id; /* cached :key? (non-default duplicate_key modes) */ @@ -581,7 +582,8 @@ static VALUE fj_float_strtod(const char *p, long n) { } /* e10 is the final base-10 exponent (already adjusted by the fraction length). */ -static FJ_ALWAYS_INLINE VALUE fj_float_from_parts(uint64_t m10, int m10digits, int64_t e10, int neg, int overflow, const char *p, long n) { +static FJ_ALWAYS_INLINE VALUE fj_float_from_parts(fj_state *st, uint64_t m10, int m10digits, int64_t e10, int neg, int overflow, const char *p, long n) { + double d; /* Fast path by mantissa width (our scanner accumulates m10 exactly up to 18 digits, flagging overflow beyond): 1..18 digits -> Eisel-Lemire, correctly-rounded for any exact uint64 mantissa @@ -591,10 +593,16 @@ static FJ_ALWAYS_INLINE VALUE fj_float_from_parts(uint64_t m10, int m10digits, i >18 digits / overflow / extreme exponent -> strtod (round-to-odd). */ if (!overflow && m10digits >= 1 && m10digits <= 18 && (long)m10digits + e10 >= -307) { if (m10 == 0) return rb_float_new(neg ? -0.0 : 0.0); - return rb_float_new(fj_eisel_lemire_s2d(e10, m10, neg)); + d = fj_eisel_lemire_s2d(e10, m10, neg); + } else { + /* Fallback for >18 digits / extreme or subnormal exponents. */ + d = RFLOAT_VALUE(fj_float_strtod(p, n)); } - /* Fallback for >18 digits / extreme or subnormal exponents. */ - return fj_float_strtod(p, n); + /* A finite literal whose magnitude exceeds Float range (e.g. 1e400) becomes + Β±Infinity β€” a silent data change. Report it via :number_overflow (the value is + still returned). The Infinity/NaN keywords take separate paths and never get here. */ + if (isinf(d)) fj_warn(st, fj_sym_number_overflow, "number literal out of Float range β€” collapsed to Infinity"); + return rb_float_new(d); } /* Scan an already-bounded quoteless token [p, p+n) exactly once: validate it as a @@ -679,7 +687,7 @@ static int fj_try_decimal(fj_state *st, const char *p, long n, VALUE *out) { (st->decimal_precision == 1 && m10digits > 16 && fj_sig_digits(p, n) > 16)) { *out = fj_to_bigdecimal_token(p, n); } else { - *out = fj_float_from_parts(m10, m10digits, e10, neg, overflow, p, n); + *out = fj_float_from_parts(st, m10, m10digits, e10, neg, overflow, p, n); } return 1; } @@ -791,7 +799,7 @@ static VALUE fj_parse_number(fj_state *st) { (st->decimal_precision == 1 && m10digits > 16 && fj_sig_digits(np, nlen) > 16)) { return fj_to_bigdecimal_token(np, nlen); } - return fj_float_from_parts(m10, m10digits, e10, neg, overflow, np, nlen); + return fj_float_from_parts(st, m10, m10digits, e10, neg, overflow, np, nlen); } static VALUE fj_parse_literal(fj_state *st, const char *word, VALUE value) { @@ -1206,7 +1214,7 @@ static int fj_try_member_number(fj_state *st, VALUE *out) { (st->decimal_precision == 1 && m10digits > 16 && fj_sig_digits(np, nlen) > 16)) { *out = fj_to_bigdecimal_token(np, nlen); } else { - *out = fj_float_from_parts(m10, m10digits, e10, neg, overflow, np, nlen); + *out = fj_float_from_parts(st, m10, m10digits, e10, neg, overflow, np, nlen); } return 1; } @@ -1634,6 +1642,7 @@ void Init_smarter_json(void) { fj_sym_empty_slot = ID2SYM(rb_intern("empty_slot")); fj_sym_empty_value = ID2SYM(rb_intern("empty_value")); fj_sym_duplicate_key = ID2SYM(rb_intern("duplicate_key")); + fj_sym_number_overflow = ID2SYM(rb_intern("number_overflow")); fj_sym_encoding = ID2SYM(rb_intern("encoding")); fj_sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys")); fj_sym_first_wins = ID2SYM(rb_intern("first_wins")); diff --git a/lib/smarter_json/generator.rb b/lib/smarter_json/generator.rb index bc9c79f..bf6a97a 100644 --- a/lib/smarter_json/generator.rb +++ b/lib/smarter_json/generator.rb @@ -36,7 +36,7 @@ class Generator # Strict configuration: an unknown writer option is a caller bug, so it raises # rather than being silently ignored. - KNOWN_OPTIONS = %i[format indent ascii_only script_safe sort_keys coerce].freeze + KNOWN_OPTIONS = %i[format indent ascii_only script_safe sort_keys coerce allow_nan].freeze def initialize(options = {}) unknown = options.keys - KNOWN_OPTIONS @@ -64,6 +64,7 @@ def initialize(options = {}) @script_safe = boolean_option(options, :script_safe) # escape 16 ? to_big_decimal(body) : body.to_f + else significant_digits(body) > 16 ? to_big_decimal(body) : float_or_warn(body) end end + # A finite numeric literal whose magnitude exceeds Float range (e.g. 1e400) becomes + # Β±Infinity β€” a silent data change. Report it via :number_overflow (the value is still + # returned; we warn rather than raise or invent). The Infinity/NaN *keywords* go through + # a separate path and never reach here, so they don't warn. + def float_or_warn(body) + f = body.to_f + warn(:number_overflow, "number literal out of Float range β€” collapsed to #{f}") if f.infinite? + f + end + # Count significant mantissa digits (leading zeros excluded, exponent ignored) to pick # Float vs BigDecimal in :auto mode. A single byte-scan β€” the old three-regex version # (strip exponent, strip non-digits, strip leading zeros, .length) ran on every float diff --git a/lib/smarter_json/version.rb b/lib/smarter_json/version.rb index 3d9352e..6478d98 100644 --- a/lib/smarter_json/version.rb +++ b/lib/smarter_json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SmarterJSON - VERSION = "1.0.0" + VERSION = "1.0.0.pre1" end diff --git a/spec/generator_spec.rb b/spec/generator_spec.rb index b8ef3ab..de4ce3b 100644 --- a/spec/generator_spec.rb +++ b/spec/generator_spec.rb @@ -292,6 +292,29 @@ def inner.as_json(*) end end + describe "allow_nan: emit non-finite numbers (opt-in)" do + it "raises GenerateError on a non-finite Float by default" do + expect { SmarterJSON.generate([Float::INFINITY]) }.to raise_error(SmarterJSON::GenerateError) + expect { SmarterJSON.generate([Float::NAN]) }.to raise_error(SmarterJSON::GenerateError) + end + + it "emits Infinity / -Infinity / NaN literals when allow_nan: true" do + expect(SmarterJSON.generate([Float::INFINITY], allow_nan: true)).to eq("[Infinity]") + expect(SmarterJSON.generate([-Float::INFINITY], allow_nan: true)).to eq("[-Infinity]") + expect(SmarterJSON.generate([Float::NAN], allow_nan: true)).to eq("[NaN]") + end + + it "emits a non-finite BigDecimal too" do + expect(SmarterJSON.generate([BigDecimal("Infinity")], allow_nan: true)).to eq("[Infinity]") + end + + it "round-trips through process (the read↔write asymmetry this fixes)" do + expect(SmarterJSON.process_one(SmarterJSON.generate([Float::INFINITY], allow_nan: true))).to eq([Float::INFINITY]) + nan = SmarterJSON.process_one(SmarterJSON.generate([Float::NAN], allow_nan: true)).first + expect(nan).to be_a(Float).and(be_nan) + end + end + describe "value validation (every writer option, valid and invalid)" do # One source of truth for the writer option/value matrix, mirroring the reader # matrix in options_spec.rb. format and the four flags are closed sets; indent @@ -303,6 +326,7 @@ def inner.as_json(*) script_safe: { valid: [true, false], invalid: [1, "x", nil] }, sort_keys: { valid: [true, false], invalid: ["x", 0, nil] }, coerce: { valid: [true, false], invalid: [0, "x", nil] }, + allow_nan: { valid: [true, false], invalid: ["yes", 1, nil] }, } it "the case table covers every known writer option (no option escapes the matrix)" do diff --git a/spec/parser_spec.rb b/spec/parser_spec.rb index ce693ce..2fa2583 100644 --- a/spec/parser_spec.rb +++ b/spec/parser_spec.rb @@ -825,6 +825,35 @@ end end + # Smart quotes must behave identically in value position \u2014 these mirror the key + # tests above, since both keys and values route through the same parse_smart_string. + describe "smart-quoted values (parity with keys)" do + it "accepts a smart-quoted value with spaces and multibyte characters" do + input = "{\"k\": \u201cna\u00efve value\u201d}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "k" => "na\u00efve value" }) + end + + it "is lenient about smart-quote direction on a value" do + input = "{\"k\": \u201dhi\u201c}" # opens U+201D, closes U+201C + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "k" => "hi" }) + end + + it "accepts an empty smart-quoted value" do + input = "{\"k\": \u201c\u201d}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "k" => "" }) + end + + it "accepts a smart single-quoted value" do + input = "{\"k\": \u2018hi\u2019}" + expect(SmarterJSON.process_one(input, acceleration: acceleration)).to eq({ "k" => "hi" }) + end + end + + it "does not support triple-quoted keys (by design \u2014 a multi-line key has no use case)" do + expect { SmarterJSON.process_one("{'''k''': 1}", acceleration: acceleration) } + .to raise_error(SmarterJSON::ParseError) + end + # Regression guards: a smart quote that appears INSIDE an already-open string # is content, not a delimiter (the SmarterCSV "separator inside a quoted field" # principle). These are GREEN today and must stay green when keys change. @@ -863,6 +892,39 @@ end end + describe "number overflow warning (:number_overflow)" do + it "warns when a finite literal overflows Float range to Infinity" do + types = [] + SmarterJSON.process("[1e400]", on_warning: ->(w) { types << w.type }, acceleration: acceleration) + expect(types).to eq([:number_overflow]) + end + + it "warns for negative overflow too" do + types = [] + SmarterJSON.process("[-1e400]", on_warning: ->(w) { types << w.type }, acceleration: acceleration) + expect(types).to eq([:number_overflow]) + end + + it "still returns Infinity β€” the change is reported, not silent" do + expect(SmarterJSON.process_one("[1e400]", acceleration: acceleration)).to eq([Float::INFINITY]) + end + + it "does NOT warn on a literal Infinity / -Infinity / NaN (intentional, not an overflow)" do + types = [] + SmarterJSON.process("[Infinity, -Infinity, NaN]", on_warning: ->(w) { types << w.type }, acceleration: acceleration) + expect(types).to eq([]) + end + + it "does NOT warn in :bigdecimal mode β€” the value is preserved, no overflow" do + types = [] + v = SmarterJSON.process_one("[1e400]", decimal_precision: :bigdecimal, + on_warning: ->(w) { types << w.type }, acceleration: acceleration) + expect(types).to eq([]) + expect(v.first).to be_a(BigDecimal) + expect(v.first.finite?).to be(true) + end + end + describe "JavaScript undefined" do it "parses undefined as nil" do expect(SmarterJSON.process("undefined", acceleration: acceleration)).to eq([nil]) From 2ba813b33fee5f1ec57c11afa19583fe63d012bf Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Mon, 8 Jun 2026 13:30:02 -0700 Subject: [PATCH 3/7] peformance --- ext/smarter_json/smarter_json.c | 6 ++++-- lib/smarter_json/parser.rb | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ext/smarter_json/smarter_json.c b/ext/smarter_json/smarter_json.c index 41243d5..5852635 100644 --- a/ext/smarter_json/smarter_json.c +++ b/ext/smarter_json/smarter_json.c @@ -600,8 +600,10 @@ static FJ_ALWAYS_INLINE VALUE fj_float_from_parts(fj_state *st, uint64_t m10, in } /* A finite literal whose magnitude exceeds Float range (e.g. 1e400) becomes Β±Infinity β€” a silent data change. Report it via :number_overflow (the value is - still returned). The Infinity/NaN keywords take separate paths and never get here. */ - if (isinf(d)) fj_warn(st, fj_sym_number_overflow, "number literal out of Float range β€” collapsed to Infinity"); + still returned). The Infinity/NaN keywords take separate paths and never get here. + Gate isinf on a listening handler (matches the Ruby float_or_warn): no handler -> + no point detecting, and it keeps the test off the hot number path. */ + if (st->on_warning != Qnil && isinf(d)) fj_warn(st, fj_sym_number_overflow, "number literal out of Float range β€” collapsed to Infinity"); return rb_float_new(d); } diff --git a/lib/smarter_json/parser.rb b/lib/smarter_json/parser.rb index 100db96..df1fea2 100644 --- a/lib/smarter_json/parser.rb +++ b/lib/smarter_json/parser.rb @@ -1403,7 +1403,10 @@ def decimal_value(body) # a separate path and never reach here, so they don't warn. def float_or_warn(body) f = body.to_f - warn(:number_overflow, "number literal out of Float range β€” collapsed to #{f}") if f.infinite? + # Only test for overflow when an on_warning handler is listening: `f.infinite?` is a + # per-float method call we don't want on the hot number path otherwise, and with no + # handler the warning would go nowhere anyway. Overflow is vanishingly rare. + warn(:number_overflow, "number literal out of Float range β€” collapsed to #{f}") if @on_warning && f.infinite? f end From f323f44cde0098e0bf4b9ff4f08e6908e545a591 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Mon, 8 Jun 2026 15:21:15 -0700 Subject: [PATCH 4/7] update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6c8e5e7..8235ad8 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,7 @@ Where a like-for-like comparison exists, here is SmarterJSON's C path against ea | config.jsonc | **1.1Γ— faster** | 1.2Γ— slower | **3.6Γ— faster** | | deeply_nested | **1.2Γ— faster** | **can't parse** ‑ | **4.1Γ— faster** | | github_events | β‰ˆ tied | 1.1Γ— slower | **2.7Γ— faster** | -| string_array | **1.1Γ— faster** | β‰ˆ tied | **1.7Γ— faster** | +| string_array | β‰ˆ tied | β‰ˆ tied | **1.6Γ— faster** | | twitter | **1.3Γ— faster** | 1.2Γ— slower | **3.2Γ— faster** | | usgs_earthquakes β‰  | **1.4Γ— faster** | 1.1Γ— slower | **3.4Γ— faster** | | weather_berlin | **1.8Γ— faster** | **1.1Γ— faster** | **3.2Γ— faster** | From eec834c2c097cac260e501668fa678a90659b890 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Mon, 8 Jun 2026 16:02:30 -0700 Subject: [PATCH 5/7] making generator iterative --- CHANGELOG.md | 1 + README.md | 2 +- lib/smarter_json/generator.rb | 117 +++++++++++++++++----------------- spec/generator_spec.rb | 45 +++++++++++++ 4 files changed, 105 insertions(+), 60 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1943026..1dcbd5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ RSpec tests: 1,034 - Object keys may now use smart/curly quotes too (e.g. JSON pasted from a word processor), not just string values. - `SmarterJSON.generate` accepts `allow_nan: true` to emit `NaN` / `Infinity` / `-Infinity` (JSON5-style) instead of raising, so non-finite numbers round-trip; the default still raises. - A numeric literal that overflows `Float` range (e.g. `1e400`) now reports a `:number_overflow` warning via `on_warning` instead of silently becoming `Infinity`. +- `SmarterJSON.generate` is now iterative (like the parser), so serializing a deeply nested structure no longer risks `SystemStackError` β€” reading and writing are both depth-safe. ## 0.9.9 (2026-06-07) - Much faster pure-Ruby parsing (the path used without the C extension) β€” roughly 3Γ— on string-heavy data, ~2Γ— on number-heavy, ~1.7Γ— on object-heavy (on a YJIT-enabled Ruby). Parsed values are unchanged. diff --git a/README.md b/README.md index 8235ad8..b6761f1 100644 --- a/README.md +++ b/README.md @@ -299,7 +299,7 @@ TEXT ## Nesting & untrusted input -Both the C extension and the pure-Ruby engine are **iterative, not recursive** β€” they track nesting on an explicit, heap-allocated stack rather than the call stack. So deeply nested input **cannot overflow the call stack or segfault**: nesting is bounded only by available memory, the same posture as Oj (which also ships no nesting limit; the stdlib `json` caps at 100). The `deeply_nested.json` benchmark (212 MB of nesting) is handled without issue. +Both the C extension and the pure-Ruby engine are **iterative, not recursive** β€” they track nesting on an explicit, heap-allocated stack rather than the call stack. So deeply nested input **cannot overflow the call stack or segfault**: nesting is bounded only by available memory, the same posture as Oj (which also ships no nesting limit; the stdlib `json` caps at 100). The `deeply_nested.json` benchmark (212 MB of nesting) is handled without issue. **`generate` is iterative too**, so serializing a deeply nested Ruby structure can't overflow the stack either β€” reading *and* writing are both depth-safe. The trade-off: there is currently **no fixed nesting or input-size limit**, so extremely large or adversarially-nested untrusted input is bounded by memory (it can exhaust RAM), not by a crash. If you process untrusted input and want a hard cap, that's a planned opt-in guard β€” for now, size-limit upstream. diff --git a/lib/smarter_json/generator.rb b/lib/smarter_json/generator.rb index bf6a97a..121be59 100644 --- a/lib/smarter_json/generator.rb +++ b/lib/smarter_json/generator.rb @@ -97,7 +97,39 @@ def boolean_option(options, key) raise ArgumentError, "#{key} must be true or false (got #{value.inspect})" end - def emit(obj, buf, level = 0) + # Iterative serializer β€” an explicit frame stack (one frame per open container), + # mirroring the recursive structure but heap-allocated, so arbitrarily deep input + # cannot overflow the call stack (parity with the iterative parser). Output is + # byte-identical to the former recursive version. A frame is a small Array: + # [members, idx, is_hash, before_first, before_rest, colon, closer, level] + def emit(obj, buf) + stack = [] + push_value(obj, 0, buf, stack) + until stack.empty? + frame = stack.last + members = frame[0] + i = frame[1] + if i == members.length + buf << frame[6] # closer + stack.pop + next + end + frame[1] = i + 1 + buf << (i.zero? ? frame[3] : frame[4]) # opener-pad / separator-pad + if frame[2] # hash + k, v = members[i] + emit_string(k.is_a?(String) ? k : k.to_s, buf) # Symbol/other keys -> string + buf << frame[5] # colon + push_value(v, frame[7] + 1, buf, stack) + else + push_value(members[i], frame[7] + 1, buf, stack) + end + end + end + + # Emit one value at `level`: a scalar appends directly; a non-empty container writes + # its opener and pushes a frame for the driver above to walk (no recursion into it). + def push_value(obj, level, buf, stack) case obj when nil then buf << "null" when true then buf << "true" @@ -107,22 +139,30 @@ def emit(obj, buf, level = 0) when Integer then buf << obj.to_s when Float then emit_float(obj, buf) when BigDecimal then emit_bigdecimal(obj, buf) - when Array then emit_array(obj, buf, level) - when Hash then emit_hash(obj, buf, level) + when Array + return buf << "[]" if obj.empty? # empty stays inline, even in pretty mode + + buf << (@pretty ? "[\n" : "[") + stack << container_frame(obj, false, level) + when Hash + return buf << "{}" if obj.empty? # empty stays inline, even in pretty mode + + pairs = @sort_keys ? obj.sort_by { |k, _| k.is_a?(String) ? k : k.to_s } : obj.to_a + buf << (@pretty ? "{\n" : "{") + stack << container_frame(pairs, true, level) else - return emit_coerced(obj, buf, level) if @coerce + return push_coerced(obj, level, buf, stack) if @coerce raise SmarterJSON::GenerateError, "SmarterJSON.generate cannot serialize #{obj.class}" end end - # coerce: true β€” let a value that isn't natively supported convert itself. - # Prefer as_json (its result is re-emitted through the normal pipeline, so the - # escaping/format options still apply); fall back to to_json (spliced as-is, so - # ascii_only / script_safe do not reach inside it). Raise if it defines neither. - def emit_coerced(obj, buf, level) + # coerce: true β€” prefer as_json (re-emitted through the normal pipeline, so the + # escaping/format options still apply); else to_json (spliced as-is, so ascii_only / + # script_safe do not reach inside it); else raise. + def push_coerced(obj, level, buf, stack) if obj.respond_to?(:as_json) - emit(obj.as_json, buf, level) + push_value(obj.as_json, level, buf, stack) elsif obj.respond_to?(:to_json) buf << obj.to_json else @@ -130,57 +170,16 @@ def emit_coerced(obj, buf, level) end end - def emit_array(arr, buf, level) - return buf << "[]" if arr.empty? # empty stays inline, even in pretty mode - + # Build a frame for an open container at `level`, precomputing its punctuation/indent + # once (as the recursive version computed `pad` once per container). + def container_frame(members, is_hash, level) + close_glyph = is_hash ? "}" : "]" if @pretty - pad = " " * (@indent * (level + 1)) - buf << "[\n" - arr.each_with_index do |v, i| - buf << ",\n" unless i.zero? - buf << pad - emit(v, buf, level + 1) - end - buf << "\n" << (" " * (@indent * level)) << "]" + pad = " " * (@indent * (level + 1)) + padl = " " * (@indent * level) + [members, 0, is_hash, pad, ",\n#{pad}", ": ", "\n#{padl}#{close_glyph}", level] else - buf << "[" - arr.each_with_index do |v, i| - buf << "," unless i.zero? - emit(v, buf, level) - end - buf << "]" - end - end - - def emit_hash(hash, buf, level) - return buf << "{}" if hash.empty? # empty stays inline, even in pretty mode - - pairs = @sort_keys ? hash.sort_by { |k, _| k.is_a?(String) ? k : k.to_s } : hash - - if @pretty - pad = " " * (@indent * (level + 1)) - buf << "{\n" - first = true - pairs.each do |k, v| - buf << ",\n" unless first - first = false - buf << pad - emit_string(k.is_a?(String) ? k : k.to_s, buf) # Symbol/other keys -> string - buf << ": " - emit(v, buf, level + 1) - end - buf << "\n" << (" " * (@indent * level)) << "}" - else - buf << "{" - first = true - pairs.each do |k, v| - buf << "," unless first - first = false - emit_string(k.is_a?(String) ? k : k.to_s, buf) # Symbol/other keys -> string - buf << ":" - emit(v, buf, level) - end - buf << "}" + [members, 0, is_hash, "", ",", ":", close_glyph, level] end end diff --git a/spec/generator_spec.rb b/spec/generator_spec.rb index de4ce3b..96662bc 100644 --- a/spec/generator_spec.rb +++ b/spec/generator_spec.rb @@ -363,4 +363,49 @@ def inner.as_json(*) .to raise_error(ArgumentError, /unknown option.*pretty/i) end end + + # The parser is iterative (handles 212 MB of nesting); the generator must match β€” + # deeply nested Ruby structures must not blow the call stack with SystemStackError. + describe "deep nesting (iterative generator β€” no stack overflow)" do + it "generates a deeply nested array" do + arr = [] + cur = arr + 100_000.times do + nxt = [] + cur << nxt + cur = nxt + end + out = SmarterJSON.generate(arr) + expect(out.start_with?("[" * 100)).to be(true) + expect(out.end_with?("]" * 100)).to be(true) + end + + it "generates a deeply nested hash" do + h = {} + cur = h + 100_000.times do + nxt = {} + cur["k"] = nxt + cur = nxt + end + expect { SmarterJSON.generate(h) }.not_to raise_error + end + + it "round-trips deeply nested input (process then generate)" do + s = ("[" * 50_000) + ("]" * 50_000) + v = SmarterJSON.process_one(s) + expect { SmarterJSON.generate(v) }.not_to raise_error + end + + it "deep nesting with indent: 2 also works (pretty path)" do + arr = [] + cur = arr + 20_000.times do + nxt = [] + cur << nxt + cur = nxt + end + expect { SmarterJSON.generate(arr, indent: 2) }.not_to raise_error + end + end end From a5fbabc3492cc8db3bc4f0ff50ab1781498327ea Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Mon, 8 Jun 2026 18:24:17 -0700 Subject: [PATCH 6/7] version 1.0.0 --- CHANGELOG.md | 2 +- lib/smarter_json/version.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1dcbd5e..a3a8a9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ > ⚠️ We discourage the use of `process(input).first` / `process(input)[0]` because it silently drops potential additional documents > Please use `process_one` if you are expecting only one JSON doc, e.g. in API payloads. -## 1.0.0.pre1 (unreleased) +## 1.0.0 (2026-06-08) RSpec tests: 1,034 diff --git a/lib/smarter_json/version.rb b/lib/smarter_json/version.rb index 6478d98..3d9352e 100644 --- a/lib/smarter_json/version.rb +++ b/lib/smarter_json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SmarterJSON - VERSION = "1.0.0.pre1" + VERSION = "1.0.0" end From 7a9ac99755ff9da5b32acec252bd5b7a30257c79 Mon Sep 17 00:00:00 2001 From: Tilo Sloboda Date: Mon, 8 Jun 2026 18:31:25 -0700 Subject: [PATCH 7/7] update --- smarter_json.gemspec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smarter_json.gemspec b/smarter_json.gemspec index 8212cef..fbf4486 100644 --- a/smarter_json.gemspec +++ b/smarter_json.gemspec @@ -9,9 +9,9 @@ Gem::Specification.new do |spec| spec.version = SmarterJSON::VERSION spec.date = Time.now.utc.strftime('%Y-%m-%d') spec.license = 'MIT' - spec.summary = 'SmarterJSON: A lenient, robust, streaming JSON parser for Ruby supporting JSON, JSON5, NDJSON, and HJSON-style input.' + spec.summary = 'A lenient, fast JSON processor for Ruby β€” reads strict JSON, NDJSON, JSON5, HJSON, and the messy JSON humans and LLMs actually write.' spec.description = <<~DESC - SmarterJSON is a permissive JSON/JSON5 parser: comments, trailing commas, different quote styles, Python/JS keywords, and more, all parse to the same Ruby objects. Purposely no strict mode, always best-effort, blazing fast. Handles BOM, smart quotes, messy input. Compatible with config/data files and API responses alike. + A lenient, fast JSON processor for Ruby. It extracts strict JSON, NDJSON, JSON5, HJSON-style config, and the messy JSON-ish input humans and LLMs actually write β€” comments, trailing commas, single / unquoted / smart quotes, Python and JS keywords, a UTF-8 BOM, and more all parse to the same Ruby objects, with no modes or flags to set. Where a traditional parser stops at the first deviation and throws away the whole document, SmarterJSON keeps going β€” it optimizes for getting your data out, not for policing the JSON spec. It reads multi-document NDJSON / JSONL in one call (and streams it with a block), and in benchmarks its C extension matches or beats Oj on nearly every file. SmarterJSON is opinionated: we want your JSON processing to be successful. DESC spec.homepage = "https://github.com/tilo/smarter_json"