From afcdc93eb0468ed84600437dfb845d83c7fc1e99 Mon Sep 17 00:00:00 2001 From: Andrew Nesbitt Date: Mon, 13 Apr 2026 08:59:11 +0100 Subject: [PATCH] Add input limits to prevent DoS from untrusted version strings Bounds input length and constraint count at parser entry points so malformed or hostile input fails fast instead of consuming unbounded memory or CPU. - Version::MAX_LENGTH (256) caps version strings at construction and cache lookup, bounding cache key memory at entries * MAX_LENGTH - Parser::MAX_INPUT_LENGTH (2048) caps range strings at parse/parse_native - Parser::MAX_CONSTRAINTS (64) caps |-separated and ||-separated constraint counts, preventing the O(n^2 log n) exclusion loop from being driven into multi-second runtime - Narrows bare rescue in maven union parser to ArgumentError so it no longer swallows Interrupt or NoMemoryError Tests in test/test_security.rb cover each limit plus regression checks that legitimate inputs near the limits still parse fast. --- lib/vers/constraint.rb | 6 ++ lib/vers/parser.rb | 37 ++++++++- lib/vers/version.rb | 13 ++++ test/test_security.rb | 172 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 225 insertions(+), 3 deletions(-) create mode 100644 test/test_security.rb diff --git a/lib/vers/constraint.rb b/lib/vers/constraint.rb index 1eb2724..1be7b5d 100644 --- a/lib/vers/constraint.rb +++ b/lib/vers/constraint.rb @@ -54,6 +54,12 @@ def initialize(operator, version) # Vers::Constraint.parse("!=2.0.0") # => # # def self.parse(constraint_string) + # Bound input length before cache lookup so oversized strings never + # become cache keys and never trigger eviction. + if constraint_string.length > Version::MAX_LENGTH + raise ArgumentError, "Constraint string too long (#{constraint_string.length} > #{Version::MAX_LENGTH})" + end + return @@constraint_cache[constraint_string] if @@constraint_cache.key?(constraint_string) if @@constraint_cache.size >= @@cache_size_limit diff --git a/lib/vers/parser.rb b/lib/vers/parser.rb index f625758..5de478c 100644 --- a/lib/vers/parser.rb +++ b/lib/vers/parser.rb @@ -32,6 +32,19 @@ class Parser @@parser_cache = {} @@cache_size_limit = 500 + # Maximum accepted length for a range string at parse/parse_native + # entry points. Range strings concatenate multiple constraints so this + # is set higher than Version::MAX_LENGTH while still bounding + # split/regex work to a few KB. + MAX_INPUT_LENGTH = 2048 + + # Maximum number of |-separated or ||-separated constraints in a + # single range. The exclusion loop in parse_constraints does + # O(n^2 log n) work as each != splits an interval and reconstructs the + # range; capping n keeps the worst case under a few thousand interval + # operations. + MAX_CONSTRAINTS = 64 + ## # Parses a vers URI string into a VersionRange # @@ -47,6 +60,8 @@ class Parser # parser.parse("vers:pypi/==1.2.3") # def parse(vers_string) + validate_input_length!(vers_string) + if vers_string == "*" return VersionRange.unbounded end @@ -75,6 +90,8 @@ def parse(vers_string) # parser.parse_native(">=1.0,<2.0", "pypi") # def parse_native(range_string, scheme) + validate_input_length!(range_string) + case scheme when "npm" parse_npm_range(range_string) @@ -151,6 +168,12 @@ def to_vers_string(version_range, scheme) private + def validate_input_length!(input) + return if input.nil? + return if input.length <= MAX_INPUT_LENGTH + raise ArgumentError, "Range string too long (#{input.length} > #{MAX_INPUT_LENGTH})" + end + def sort_key_for_constraint(constraint) version = constraint.sub(OPERATOR_PREFIX_REGEX, '') v = Version.cached_new(version) @@ -158,7 +181,12 @@ def sort_key_for_constraint(constraint) end def parse_constraints(constraints_string, scheme) - constraint_strings = constraints_string.split(/[|,]/) + # Limit constraint count to bound the O(n^2 log n) exclusion loop + # below: each != splits an interval and reconstructs the range. + constraint_strings = constraints_string.split(/[|,]/, MAX_CONSTRAINTS + 1) + if constraint_strings.length > MAX_CONSTRAINTS + raise ArgumentError, "Too many constraints (> #{MAX_CONSTRAINTS})" + end intervals = [] exclusions = [] interval_scheme = %w[maven nuget].include?(scheme) ? scheme : nil @@ -200,7 +228,10 @@ def parse_npm_range(range_string) # Handle || (OR) operator if range_string.include?('||') - or_parts = range_string.split('||').map(&:strip) + or_parts = range_string.split('||', MAX_CONSTRAINTS + 1).map(&:strip) + if or_parts.length > MAX_CONSTRAINTS + raise ArgumentError, "Too many || clauses (> #{MAX_CONSTRAINTS})" + end ranges = or_parts.map { |part| parse_npm_range(part) } return ranges.reduce { |acc, range| acc.union(range) } end @@ -498,7 +529,7 @@ def parse_maven_range(range_string) begin parsed_range = parse_maven_range(range_part) ranges << parsed_range - rescue + rescue ArgumentError # If parsing fails, skip this part end end diff --git a/lib/vers/version.rb b/lib/vers/version.rb index e9606b3..59eae41 100644 --- a/lib/vers/version.rb +++ b/lib/vers/version.rb @@ -22,15 +22,24 @@ class Version # Regex for parsing semantic version components including build metadata SEMANTIC_VERSION_REGEX = /\A(\d+)(?:\.(\d+))?(?:\.(\d+))?(?:-([^+]+))?(?:\+(.+))?\z/ + # Maximum accepted length for a version string. Real-world version + # strings rarely exceed 100 characters; 256 leaves headroom for unusual + # prerelease tags while bounding regex/split work and cache key size. + MAX_LENGTH = 256 + attr_reader :major, :minor, :patch, :prerelease, :build ## # Creates a new Version object # # @param version_string [String] The version string to parse + # @raise [ArgumentError] if the version string exceeds MAX_LENGTH # def initialize(version_string) @original = version_string.to_s + if @original.length > MAX_LENGTH + raise ArgumentError, "Version string too long (#{@original.length} > #{MAX_LENGTH})" + end parse_version end @@ -41,6 +50,10 @@ def initialize(version_string) # @return [Version] Cached or new Version object # def self.cached_new(version_string) + # Skip caching for oversized keys to bound cache memory by entry + # count, not by attacker-controlled key length. + return new(version_string) if version_string.to_s.length > MAX_LENGTH + if @@version_cache.size >= @@cache_size_limit # Keep the most recent half instead of clearing everything keys = @@version_cache.keys diff --git a/test/test_security.rb b/test/test_security.rb new file mode 100644 index 0000000..3fa9c7c --- /dev/null +++ b/test/test_security.rb @@ -0,0 +1,172 @@ +# frozen_string_literal: true + +require "test_helper" +require "timeout" + +# Tests covering denial-of-service vectors in version and range parsing. +# These exercise resource limits at the public API boundary where untrusted +# input enters the library. +class TestSecurity < Minitest::Test + # === Input length limits === + # + # Version strings, constraint strings, and range strings flow from user + # input straight into regex matching, String#split, and cache keys. + # Without a length cap, a multi-megabyte string is processed end-to-end + # and retained. + + def test_version_rejects_oversized_input + huge = "1." + ("1." * 5000) + "1" + assert_raises(ArgumentError) do + Vers::Version.new(huge) + end + end + + def test_version_rejects_oversized_prerelease + huge = "1.0.0-" + ("a" * 5000) + assert_raises(ArgumentError) do + Vers::Version.new(huge) + end + end + + def test_constraint_rejects_oversized_input + huge = ">=" + ("1" * 5000) + assert_raises(ArgumentError) do + Vers::Constraint.parse(huge) + end + end + + def test_parser_rejects_oversized_vers_uri + huge = "vers:npm/" + ("1" * 5000) + assert_raises(ArgumentError) do + Vers.parse(huge) + end + end + + def test_parser_rejects_oversized_native_range + huge = "^" + ("1" * 5000) + assert_raises(ArgumentError) do + Vers.parse_native(huge, "npm") + end + end + + def test_version_accepts_reasonable_long_input + # 200 chars is unusual but legitimate (long prerelease tags exist) + v = "1.0.0-" + ("a" * 194) + assert_equal 200, v.length + version = Vers::Version.new(v) + assert_equal 1, version.major + end + + # === Constraint count limits === + # + # parse_constraints splits on [|,] without limit. npm parsing splits on + # || without limit. Each constraint becomes an Interval object, and + # exclusions trigger a quadratic rebuild loop. + + def test_parser_rejects_too_many_constraints + many = (1..200).map { |i| ">=#{i}" }.join("|") + assert_raises(ArgumentError) do + Vers.parse("vers:npm/#{many}") + end + end + + def test_parser_rejects_too_many_npm_or_clauses + many = (1..200).map { |i| "^#{i}.0.0" }.join(" || ") + assert_raises(ArgumentError) do + Vers.parse_native(many, "npm") + end + end + + def test_parser_accepts_reasonable_constraint_count + # Real-world ranges rarely exceed a dozen constraints + some = (1..20).map { |i| "#{i}.0.0" }.join("|") + range = Vers.parse("vers:npm/#{some}") + assert range.contains?("5.0.0") + end + + # === Quadratic exclusion DoS === + # + # Each != exclusion splits the range into one more interval, then + # reconstructs the VersionRange (sort + merge). N exclusions on a range + # that grows from 1 to N intervals does O(N^2 log N) work. With the + # constraint count limit in place this stays bounded, but we also assert + # the bounded case completes quickly. + + def test_many_exclusions_complete_in_reasonable_time + # 64 exclusions sits under MAX_CONSTRAINTS and must finish fast + excl = (1..64).map { |i| "!=#{i}.0.0" }.join("|") + Timeout.timeout(1) do + range = Vers.parse("vers:npm/#{excl}") + refute range.contains?("32.0.0") + assert range.contains?("100.0.0") + end + end + + def test_pathological_exclusion_count_rejected + # Without a constraint count limit this input runs for tens of seconds + excl = (1..2000).map { |i| "!=#{i}.0.0" }.join("|") + assert_raises(ArgumentError) do + Vers.parse("vers:npm/#{excl}") + end + end + + # === Cache key memory exhaustion === + # + # Version.cached_new, Constraint.parse, and Parser caches use the raw + # input string as a hash key. The caches evict by entry count, not by + # byte size. With MAX_LENGTH enforced at construction, oversized input + # raises before reaching any cache. Cache key memory is therefore + # bounded by entry_count * MAX_LENGTH. These tests verify that the + # length cap holds at the cache entry path and that normal-sized keys + # still cache. + + def test_version_cached_new_rejects_oversized_input + huge = "1.0.0-" + ("a" * 5000) + assert_raises(ArgumentError) do + Vers::Version.cached_new(huge) + end + end + + def test_version_cache_still_caches_normal_keys + v1 = Vers::Version.cached_new("1.2.3-cachetest") + v2 = Vers::Version.cached_new("1.2.3-cachetest") + assert_same v1, v2, "normal version strings should hit the cache" + end + + def test_constraint_cache_still_caches_normal_keys + c1 = Vers::Constraint.parse(">=1.2.3-cachetest") + c2 = Vers::Constraint.parse(">=1.2.3-cachetest") + assert_same c1, c2, "normal constraint strings should hit the cache" + end + + # === Maven parser bare rescue === + # + # parser.rb:501 catches everything including Interrupt and NoMemoryError. + # We can't easily test "doesn't swallow Interrupt" without signal hackery, + # but we can assert the narrowed rescue still tolerates malformed segments. + + def test_maven_union_tolerates_malformed_segment + # Second segment is garbage; should be skipped, first segment parsed. + range = Vers.parse_native("[1.0,2.0],[~~~,~~~]", "maven") + assert range.contains?("1.5") + end + + # === Performance smoke tests === + # + # Inputs near the limits should still parse in well under a second. + + def test_long_valid_version_parses_quickly + Timeout.timeout(1) do + 100.times do |i| + Vers::Version.new("1.0.0-rc.#{i}.build.metadata.string") + end + end + end + + def test_long_valid_range_parses_quickly + constraints = (1..50).map { |i| ">=#{i}.0.0" }.join("|") + Timeout.timeout(1) do + 10.times { Vers.parse("vers:npm/#{constraints}") } + end + end +end