From 9ec81252992969b5aacad27d8e4d0e361da92f14 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 21 Apr 2026 21:18:53 +0000 Subject: [PATCH 1/5] Initial plan From 1f0a82413b94579067d3a81ba80d4cf3e23a6ef3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 21 Apr 2026 21:31:20 +0000 Subject: [PATCH 2/5] fix: optimize OR/Contains indexed predicates to avoid cross-collection scan fallback Agent-Logs-Url: https://github.com/EntglDb/BLite/sessions/cb144a09-ce2a-422f-864e-23e1bf3a2320 Co-authored-by: mrdevrobot <12503462+mrdevrobot@users.noreply.github.com> --- .../Collections/DocumentCollection.cs | 61 ++++-- src/BLite.Core/Query/IndexOptimizer.cs | 200 ++++++++++++++++-- .../CrossCollectionQueryIsolationTests.cs | 62 ++++++ tests/BLite.Tests/IndexOptimizationTests.cs | 59 +++++- 4 files changed, 333 insertions(+), 49 deletions(-) create mode 100644 tests/BLite.Tests/CrossCollectionQueryIsolationTests.cs diff --git a/src/BLite.Core/Collections/DocumentCollection.cs b/src/BLite.Core/Collections/DocumentCollection.cs index a2e2b7e..722da1a 100644 --- a/src/BLite.Core/Collections/DocumentCollection.cs +++ b/src/BLite.Core/Collections/DocumentCollection.cs @@ -2469,19 +2469,29 @@ internal async Task CountByPredicateAsync( // When HasResiduePredicate=true (compound AND with a non-indexed clause), we must // fall through to FetchAsync so the residue predicate is applied per document. var indexOpt = Query.IndexOptimizer.TryOptimize(whereClause, GetIndexes(), ConverterRegistry); - if (indexOpt != null - && !indexOpt.IsVectorSearch - && !indexOpt.IsSpatialSearch - && !indexOpt.HasResiduePredicate) - { - var index = _indexManager.GetIndex(indexOpt.IndexName); - if (index != null) - { - // Use the per-bound inclusivity flags from OptimizationResult. - // These are set correctly for every operator (==, >=, >, <=, <) and - // propagated through AND-merges, so compound predicates like - // x.Price > 50 && x.Price < 90 get both boundaries exclusive. - return index.CountRange(indexOpt.MinValue, indexOpt.MaxValue, + if (indexOpt != null + && !indexOpt.IsVectorSearch + && !indexOpt.IsSpatialSearch + && !indexOpt.HasResiduePredicate) + { + var index = _indexManager.GetIndex(indexOpt.IndexName); + if (index != null) + { + if (indexOpt.InValues != null) + { + int inCount = 0; + foreach (var key in indexOpt.InValues) + { + inCount += index.CountRange(key, key, true, true, null); + } + return inCount; + } + + // Use the per-bound inclusivity flags from OptimizationResult. + // These are set correctly for every operator (==, >=, >, <=, <) and + // propagated through AND-merges, so compound predicates like + // x.Price > 50 && x.Price < 90 get both boundaries exclusive. + return index.CountRange(indexOpt.MinValue, indexOpt.MaxValue, indexOpt.StartInclusive, indexOpt.EndInclusive, null); } } @@ -2656,13 +2666,24 @@ Func GetCompiled() => await foreach (var item in spatialSeq) if (indexOpt.FilterCompleteness == Query.IndexOptimizer.FilterCompleteness.Exact || GetCompiled()(item)) { yield return item; if (++yielded >= fetchLimit) yield break; } } - else - { - await foreach (var item in QueryIndexAsync(indexOpt.IndexName, indexOpt.MinValue, indexOpt.MaxValue, true, 0, int.MaxValue, transaction, ct)) - if (indexOpt.FilterCompleteness == Query.IndexOptimizer.FilterCompleteness.Exact || GetCompiled()(item)) { yield return item; if (++yielded >= fetchLimit) yield break; } - } - yield break; - } + else + { + if (indexOpt.InValues != null) + { + foreach (var key in indexOpt.InValues) + { + await foreach (var item in QueryIndexAsync(indexOpt.IndexName, key, key, true, 0, int.MaxValue, transaction, ct)) + if (indexOpt.FilterCompleteness == Query.IndexOptimizer.FilterCompleteness.Exact || GetCompiled()(item)) { yield return item; if (++yielded >= fetchLimit) yield break; } + } + } + else + { + await foreach (var item in QueryIndexAsync(indexOpt.IndexName, indexOpt.MinValue, indexOpt.MaxValue, true, 0, int.MaxValue, transaction, ct)) + if (indexOpt.FilterCompleteness == Query.IndexOptimizer.FilterCompleteness.Exact || GetCompiled()(item)) { yield return item; if (++yielded >= fetchLimit) yield break; } + } + } + yield break; + } // ── Strategy 2: BSON-level predicate scan ───────────────────────── // Filters at raw-BSON level before deserializing — no compiled Func needed. diff --git a/src/BLite.Core/Query/IndexOptimizer.cs b/src/BLite.Core/Query/IndexOptimizer.cs index 5d5bac2..e99a28d 100644 --- a/src/BLite.Core/Query/IndexOptimizer.cs +++ b/src/BLite.Core/Query/IndexOptimizer.cs @@ -9,11 +9,12 @@ namespace BLite.Core.Query; internal static class IndexOptimizer { - public class OptimizationResult - { - public string IndexName { get; set; } = ""; - public object? MinValue { get; set; } - public object? MaxValue { get; set; } + public class OptimizationResult + { + public string IndexName { get; set; } = ""; + public object? MinValue { get; set; } + public object? MaxValue { get; set; } + public IReadOnlyList? InValues { get; set; } /// /// Describes how completely the index scan covers the WHERE expression. @@ -152,11 +153,11 @@ public enum FilterCompleteness } [RequiresDynamicCode("Index optimization may use Expression.Compile() to evaluate complex expressions.")] - private static OptimizationResult? OptimizeExpression(Expression expression, ParameterExpression parameter, IEnumerable indexes, ValueConverterRegistry? registry = null) - { - // ... (Existing AndAlso logic remains the same) ... - if (expression is BinaryExpression binary && binary.NodeType == ExpressionType.AndAlso) - { + private static OptimizationResult? OptimizeExpression(Expression expression, ParameterExpression parameter, IEnumerable indexes, ValueConverterRegistry? registry = null) + { + // ... (Existing AndAlso logic remains the same) ... + if (expression is BinaryExpression binary && binary.NodeType == ExpressionType.AndAlso) + { var left = OptimizeExpression(binary.Left, parameter, indexes, registry); var right = OptimizeExpression(binary.Right, parameter, indexes, registry); @@ -187,8 +188,46 @@ public enum FilterCompleteness // does not fully satisfy the WHERE; caller must post-filter. if (left != null) { left.FilterCompleteness = FilterCompleteness.PartialAnd; left.HasResiduePredicate = true; return left; } if (right != null) { right.FilterCompleteness = FilterCompleteness.PartialAnd; right.HasResiduePredicate = true; return right; } - return null; - } + return null; + } + + // Handle OR over exact matches on the same indexed field: + // x => x.Prop == a || x.Prop == b → multi-point index probes. + if (expression is BinaryExpression orBinary && orBinary.NodeType == ExpressionType.OrElse) + { + var left = OptimizeExpression(orBinary.Left, parameter, indexes, registry); + var right = OptimizeExpression(orBinary.Right, parameter, indexes, registry); + + if (left != null && right != null && + left.IndexName == right.IndexName && + left.FilterCompleteness == FilterCompleteness.Exact && + right.FilterCompleteness == FilterCompleteness.Exact && + TryGetPointValues(left, out var leftValues) && + TryGetPointValues(right, out var rightValues)) + { + var merged = new List(leftValues.Count + rightValues.Count); + var seen = new HashSet(); + foreach (var v in leftValues) + { + if (seen.Add(v)) merged.Add(v); + } + foreach (var v in rightValues) + { + if (seen.Add(v)) merged.Add(v); + } + + return new OptimizationResult + { + IndexName = left.IndexName, + InValues = merged, + IsRange = false, + FilterCompleteness = FilterCompleteness.Exact, + StartInclusive = true, + EndInclusive = true + }; + } + return null; + } // Handle bare bool member: e => e.IsActive (equivalent to e.IsActive == true) // Handle logical NOT over bool member: e => !e.IsActive (equivalent to e.IsActive == false) @@ -283,11 +322,29 @@ notExpr.Operand is MemberExpression notMember && } return result; } - } - - // Handle StartsWith - if (expression is MethodCallExpression call && call.Method.Name == "StartsWith" && call.Object is MemberExpression member) - { + } + + if (TryParseContainsInPredicate(expression, parameter, registry, out var inPropertyPath, out var inValues)) + { + CollectionIndexInfo? index = null; + foreach (var idx in indexes) { if (Matches(idx, inPropertyPath)) { index = idx; break; } } + if (index != null) + { + return new OptimizationResult + { + IndexName = index.Name, + InValues = inValues, + IsRange = false, + FilterCompleteness = FilterCompleteness.Exact, + StartInclusive = true, + EndInclusive = true + }; + } + } + + // Handle StartsWith + if (expression is MethodCallExpression call && call.Method.Name == "StartsWith" && call.Object is MemberExpression member) + { if (member.Expression == parameter && call.Arguments[0] is ConstantExpression constant && constant.Value is string prefix) { CollectionIndexInfo? index = null; @@ -372,8 +429,26 @@ notExpr.Operand is MemberExpression notMember && } } - return null; - } + return null; + } + + private static bool TryGetPointValues(OptimizationResult result, out IReadOnlyList values) + { + if (result.InValues != null) + { + values = result.InValues; + return true; + } + + if (!result.IsRange && Equals(result.MinValue, result.MaxValue)) + { + values = [result.MinValue]; + return true; + } + + values = Array.Empty(); + return false; + } private static string IncrementPrefix(string prefix) { @@ -462,7 +537,7 @@ static string Normalize(string s) => } [RequiresDynamicCode("Index optimization may use Expression.Compile() to evaluate complex expressions.")] - private static (string? propertyName, object? value, ExpressionType op) ParseSimplePredicate(Expression expression, ParameterExpression parameter, ValueConverterRegistry? registry = null) + private static (string? propertyName, object? value, ExpressionType op) ParseSimplePredicate(Expression expression, ParameterExpression parameter, ValueConverterRegistry? registry = null) { if (expression is BinaryExpression binary) { @@ -559,8 +634,89 @@ private static (string? propertyName, object? value, ExpressionType op) ParseSim } } - return (null, null, ExpressionType.Default); - } + return (null, null, ExpressionType.Default); + } + + [RequiresDynamicCode("Index optimization may use Expression.Compile() to evaluate complex expressions.")] + private static bool TryParseContainsInPredicate( + Expression expression, + ParameterExpression parameter, + ValueConverterRegistry? registry, + out string propertyPath, + out IReadOnlyList values) + { + propertyPath = null!; + values = Array.Empty(); + + if (expression is not MethodCallExpression call || call.Method.Name != "Contains") + return false; + + static Expression UnwrapConvert(Expression e) + { + while (e is UnaryExpression { NodeType: ExpressionType.Convert or ExpressionType.ConvertChecked } u) + e = u.Operand; + return e; + } + + Expression? collectionExpr = null; + Expression? memberExpr = null; + + if (call.Object != null && call.Arguments.Count == 1) + { + // Instance Contains: list.Contains(x.Prop) + collectionExpr = call.Object; + memberExpr = call.Arguments[0]; + } + else if (call.Object == null && call.Arguments.Count == 2) + { + // Enumerable.Contains(list, x.Prop) + collectionExpr = call.Arguments[0]; + memberExpr = call.Arguments[1]; + } + else + { + return false; + } + + var unwrappedMember = UnwrapConvert(memberExpr); + if (unwrappedMember is not MemberExpression member) + return false; + + propertyPath = ExtractMemberPath(member, parameter)!; + if (propertyPath == null) + return false; + + if (collectionExpr is MethodCallExpression { Method.Name: "op_Implicit", Object: null } implicitCall && + implicitCall.Arguments.Count == 1) + { + collectionExpr = implicitCall.Arguments[0]; + } + + object? enumerableObj; + try + { + enumerableObj = EvaluateExpression(collectionExpr); + } + catch + { + return false; + } + + if (enumerableObj is not System.Collections.IEnumerable enumerable || enumerableObj is string) + return false; + + var list = new List(); + foreach (var raw in enumerable) + { + var converted = TryApplyConverter(propertyPath, raw, registry); + if (!IsIndexableValue(converted)) + return false; + list.Add(converted ?? DBNull.Value); + } + + values = list; + return true; + } private static object? TryApplyConverter(string propertyPath, object? value, ValueConverterRegistry? registry) { diff --git a/tests/BLite.Tests/CrossCollectionQueryIsolationTests.cs b/tests/BLite.Tests/CrossCollectionQueryIsolationTests.cs new file mode 100644 index 0000000..547c250 --- /dev/null +++ b/tests/BLite.Tests/CrossCollectionQueryIsolationTests.cs @@ -0,0 +1,62 @@ +using BLite.Shared; + +namespace BLite.Tests; + +public class CrossCollectionQueryIsolationTests : IDisposable +{ + private readonly string _dbPath; + private readonly TestDbContext _db; + + public CrossCollectionQueryIsolationTests() + { + _dbPath = Path.Combine(Path.GetTempPath(), $"cross_collection_isolation_{Guid.NewGuid()}.db"); + _db = new TestDbContext(_dbPath); + } + + [Fact] + public async Task OrElse_OnIndexedField_DoesNotReturnCrossCollectionRows() + { + await _db.IntEntities.EnsureIndexAsync(x => x.Name!, "idx_intentities_name", false); + + await _db.IntEntities.InsertAsync(new IntEntity { Id = 1, Name = "A" }); + await _db.IntEntities.InsertAsync(new IntEntity { Id = 2, Name = "B" }); + + await _db.People.InsertAsync(new Person { Id = 101, Name = "A", Age = 20 }); + await _db.People.InsertAsync(new Person { Id = 102, Name = "B", Age = 30 }); + await _db.SaveChangesAsync(); + + var count = _db.IntEntities.AsQueryable() + .Where(x => x.Name == "A" || x.Name == "B") + .Count(); + + Assert.Equal(2, count); + } + + [Fact] + public async Task Contains_OnIndexedField_DoesNotReturnCrossCollectionRows() + { + await _db.IntEntities.EnsureIndexAsync(x => x.Name!, "idx_intentities_name", false); + + await _db.IntEntities.InsertAsync(new IntEntity { Id = 1, Name = "A" }); + await _db.IntEntities.InsertAsync(new IntEntity { Id = 2, Name = "B" }); + + await _db.People.InsertAsync(new Person { Id = 101, Name = "A", Age = 20 }); + await _db.People.InsertAsync(new Person { Id = 102, Name = "B", Age = 30 }); + await _db.SaveChangesAsync(); + + var names = new[] { "A", "B" }; + var count = _db.IntEntities.AsQueryable() + .Where(x => names.Contains(x.Name)) + .Count(); + + Assert.Equal(2, count); + } + + public void Dispose() + { + _db.Dispose(); + if (File.Exists(_dbPath)) File.Delete(_dbPath); + var walPath = Path.ChangeExtension(_dbPath, ".wal"); + if (File.Exists(walPath)) File.Delete(walPath); + } +} diff --git a/tests/BLite.Tests/IndexOptimizationTests.cs b/tests/BLite.Tests/IndexOptimizationTests.cs index 5bce2c6..3341c52 100644 --- a/tests/BLite.Tests/IndexOptimizationTests.cs +++ b/tests/BLite.Tests/IndexOptimizationTests.cs @@ -290,8 +290,8 @@ public void Optimizer_InclusiveGreaterThanOrEqual_ReturnsExact() } [Fact] - public void Optimizer_PartialAnd_OneNonIndexedField_ReturnsPartialAnd() - { + public void Optimizer_PartialAnd_OneNonIndexedField_ReturnsPartialAnd() + { var indexes = new List { new CollectionIndexInfo { Name = "idx_age", PropertyPaths = ["Age"] } @@ -304,8 +304,53 @@ public void Optimizer_PartialAnd_OneNonIndexedField_ReturnsPartialAnd() var result = IndexOptimizer.TryOptimize(model, indexes); Assert.NotNull(result); - Assert.Equal("idx_age", result.IndexName); - Assert.Equal(IndexOptimizer.FilterCompleteness.PartialAnd, result.FilterCompleteness); - } - } -} + Assert.Equal("idx_age", result.IndexName); + Assert.Equal(IndexOptimizer.FilterCompleteness.PartialAnd, result.FilterCompleteness); + } + + [Fact] + public void Optimizer_OrElse_Equality_SameIndexedField_ReturnsInValues() + { + var indexes = new List + { + new CollectionIndexInfo { Name = "idx_name", PropertyPaths = ["Name"] } + }; + + Expression> predicate = x => x.Name == "Alice" || x.Name == "Bob"; + var model = new QueryModel { WhereClause = predicate }; + + var result = IndexOptimizer.TryOptimize(model, indexes); + + Assert.NotNull(result); + Assert.Equal("idx_name", result.IndexName); + Assert.NotNull(result.InValues); + Assert.Equal(2, result.InValues!.Count); + Assert.Contains("Alice", result.InValues); + Assert.Contains("Bob", result.InValues); + Assert.Equal(IndexOptimizer.FilterCompleteness.Exact, result.FilterCompleteness); + } + + [Fact] + public void Optimizer_Contains_OnIndexedField_ReturnsInValues() + { + var indexes = new List + { + new CollectionIndexInfo { Name = "idx_name", PropertyPaths = ["Name"] } + }; + + var names = new[] { "Alice", "Bob" }; + Expression> predicate = x => names.Contains(x.Name); + var model = new QueryModel { WhereClause = predicate }; + + var result = IndexOptimizer.TryOptimize(model, indexes); + + Assert.NotNull(result); + Assert.Equal("idx_name", result.IndexName); + Assert.NotNull(result.InValues); + Assert.Equal(2, result.InValues!.Count); + Assert.Contains("Alice", result.InValues); + Assert.Contains("Bob", result.InValues); + Assert.Equal(IndexOptimizer.FilterCompleteness.Exact, result.FilterCompleteness); + } + } +} From 90ef8f5feed3869ba48b7c44a7fd52d673d00110 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 21 Apr 2026 21:35:48 +0000 Subject: [PATCH 3/5] test: refine OR/Contains optimizer tests and nullability handling Agent-Logs-Url: https://github.com/EntglDb/BLite/sessions/cb144a09-ce2a-422f-864e-23e1bf3a2320 Co-authored-by: mrdevrobot <12503462+mrdevrobot@users.noreply.github.com> --- src/BLite.Core/Query/IndexOptimizer.cs | 7 ++++--- tests/BLite.Tests/IndexOptimizationTests.cs | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/BLite.Core/Query/IndexOptimizer.cs b/src/BLite.Core/Query/IndexOptimizer.cs index e99a28d..8c22099 100644 --- a/src/BLite.Core/Query/IndexOptimizer.cs +++ b/src/BLite.Core/Query/IndexOptimizer.cs @@ -645,7 +645,7 @@ private static bool TryParseContainsInPredicate( out string propertyPath, out IReadOnlyList values) { - propertyPath = null!; + propertyPath = string.Empty; values = Array.Empty(); if (expression is not MethodCallExpression call || call.Method.Name != "Contains") @@ -682,9 +682,10 @@ static Expression UnwrapConvert(Expression e) if (unwrappedMember is not MemberExpression member) return false; - propertyPath = ExtractMemberPath(member, parameter)!; - if (propertyPath == null) + var extractedPath = ExtractMemberPath(member, parameter); + if (extractedPath == null) return false; + propertyPath = extractedPath; if (collectionExpr is MethodCallExpression { Method.Name: "op_Implicit", Object: null } implicitCall && implicitCall.Arguments.Count == 1) diff --git a/tests/BLite.Tests/IndexOptimizationTests.cs b/tests/BLite.Tests/IndexOptimizationTests.cs index 3341c52..c3ab363 100644 --- a/tests/BLite.Tests/IndexOptimizationTests.cs +++ b/tests/BLite.Tests/IndexOptimizationTests.cs @@ -309,7 +309,7 @@ public void Optimizer_PartialAnd_OneNonIndexedField_ReturnsPartialAnd() } [Fact] - public void Optimizer_OrElse_Equality_SameIndexedField_ReturnsInValues() + public void Optimizer_Or_Equality_SameIndexedField_ReturnsInValues() { var indexes = new List { From 5092a7f3846ed9b0cce9cc274f04a6dbe74600a3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Apr 2026 07:21:53 +0000 Subject: [PATCH 4/5] fix: address PR review follow-ups for IN key optimization Agent-Logs-Url: https://github.com/EntglDb/BLite/sessions/38548de0-4db8-4009-bf02-5519c136a64c Co-authored-by: mrdevrobot <12503462+mrdevrobot@users.noreply.github.com> --- .../Collections/DocumentCollection.cs | 41 +++++-- src/BLite.Core/Query/IndexOptimizer.cs | 67 ++++++++++-- .../CrossCollectionQueryIsolationTests.cs | 17 +++ tests/BLite.Tests/IndexOptimizationTests.cs | 101 ++++++++++++++++-- 4 files changed, 203 insertions(+), 23 deletions(-) diff --git a/src/BLite.Core/Collections/DocumentCollection.cs b/src/BLite.Core/Collections/DocumentCollection.cs index 722da1a..de515fc 100644 --- a/src/BLite.Core/Collections/DocumentCollection.cs +++ b/src/BLite.Core/Collections/DocumentCollection.cs @@ -2457,9 +2457,9 @@ private async Task BsonAggregateFieldAsync(BsonAggregator agg, /// [RequiresDynamicCode("Count-by-predicate uses index optimization and Expression.Compile() which require dynamic code generation.")] [RequiresUnreferencedCode("Count-by-predicate uses reflection to resolve members at runtime. Ensure all entity types are preserved.")] - internal async Task CountByPredicateAsync( - System.Linq.Expressions.LambdaExpression whereClause, - CancellationToken ct = default) + internal async Task CountByPredicateAsync( + System.Linq.Expressions.LambdaExpression whereClause, + CancellationToken ct = default) { // Strategy 1: Index key-only scan — no data-page reads at all. // Applicable whenever the predicate targets an indexed field AND the index fully @@ -2480,8 +2480,10 @@ internal async Task CountByPredicateAsync( if (indexOpt.InValues != null) { int inCount = 0; + var seen = new HashSet(s_inProbeKeyComparer); foreach (var key in indexOpt.InValues) { + if (!seen.Add(key)) continue; inCount += index.CountRange(key, key, true, true, null); } return inCount; @@ -2670,8 +2672,10 @@ Func GetCompiled() => { if (indexOpt.InValues != null) { + var seen = new HashSet(s_inProbeKeyComparer); foreach (var key in indexOpt.InValues) { + if (!seen.Add(key)) continue; await foreach (var item in QueryIndexAsync(indexOpt.IndexName, key, key, true, 0, int.MaxValue, transaction, ct)) if (indexOpt.FilterCompleteness == Query.IndexOptimizer.FilterCompleteness.Exact || GetCompiled()(item)) { yield return item; if (++yielded >= fetchLimit) yield break; } } @@ -2711,9 +2715,34 @@ Func GetCompiled() => if (++yielded >= fetchLimit) yield break; } } - } - - #endregion + } + + private static readonly IEqualityComparer s_inProbeKeyComparer = new InProbeKeyComparer(); + + private sealed class InProbeKeyComparer : IEqualityComparer + { + bool IEqualityComparer.Equals(object? x, object? y) + { + if (ReferenceEquals(x, y)) return true; + if (x is null || y is null) return false; + if (x is byte[] xb && y is byte[] yb) return xb.AsSpan().SequenceEqual(yb); + return x.Equals(y); + } + + int IEqualityComparer.GetHashCode(object? obj) + { + if (obj is null) return 0; + if (obj is byte[] bytes) + { + var hc = new HashCode(); + foreach (var b in bytes) hc.Add(b); + return hc.ToHashCode(); + } + return obj.GetHashCode(); + } + } + + #endregion /// /// Serializes an entity with adaptive buffer sizing (Stepped Retry). diff --git a/src/BLite.Core/Query/IndexOptimizer.cs b/src/BLite.Core/Query/IndexOptimizer.cs index 8c22099..c58dfa0 100644 --- a/src/BLite.Core/Query/IndexOptimizer.cs +++ b/src/BLite.Core/Query/IndexOptimizer.cs @@ -206,7 +206,7 @@ public enum FilterCompleteness TryGetPointValues(right, out var rightValues)) { var merged = new List(leftValues.Count + rightValues.Count); - var seen = new HashSet(); + var seen = new HashSet(s_inValueComparer); foreach (var v in leftValues) { if (seen.Add(v)) merged.Add(v); @@ -667,9 +667,26 @@ static Expression UnwrapConvert(Expression e) collectionExpr = call.Object; memberExpr = call.Arguments[0]; } - else if (call.Object == null && call.Arguments.Count == 2) + else if (call.Object == null && (call.Arguments.Count == 2 || call.Arguments.Count == 3)) { - // Enumerable.Contains(list, x.Prop) + // Enumerable.Contains(list, x.Prop) or MemoryExtensions.Contains(span, x.Prop, comparer) + // where the comparer argument is typically null. + if (call.Arguments.Count == 3) + { + object? comparer; + try + { + comparer = EvaluateExpression(call.Arguments[2]); + } + catch + { + return false; + } + + if (comparer != null) + return false; + } + collectionExpr = call.Arguments[0]; memberExpr = call.Arguments[1]; } @@ -707,17 +724,47 @@ static Expression UnwrapConvert(Expression e) return false; var list = new List(); + var seen = new HashSet(s_inValueComparer); foreach (var raw in enumerable) { var converted = TryApplyConverter(propertyPath, raw, registry); if (!IsIndexableValue(converted)) return false; - list.Add(converted ?? DBNull.Value); + var normalized = converted ?? DBNull.Value; + if (seen.Add(normalized)) + { + list.Add(normalized); + } } values = list; return true; } + + private static readonly IEqualityComparer s_inValueComparer = new InValueComparer(); + + private sealed class InValueComparer : IEqualityComparer + { + bool IEqualityComparer.Equals(object? x, object? y) + { + if (ReferenceEquals(x, y)) return true; + if (x is null || y is null) return false; + if (x is byte[] xb && y is byte[] yb) return xb.AsSpan().SequenceEqual(yb); + return x.Equals(y); + } + + int IEqualityComparer.GetHashCode(object? obj) + { + if (obj is null) return 0; + if (obj is byte[] bytes) + { + var hc = new HashCode(); + foreach (var b in bytes) hc.Add(b); + return hc.ToHashCode(); + } + return obj.GetHashCode(); + } + } private static object? TryApplyConverter(string propertyPath, object? value, ValueConverterRegistry? registry) { @@ -727,12 +774,12 @@ static Expression UnwrapConvert(Expression e) return registry.TryConvert(topProp, value, out var pv) ? pv : value; } - private static readonly HashSet _knownBsonPrimitives = - [ - typeof(int), typeof(long), typeof(double), typeof(decimal), - typeof(bool), typeof(string), typeof(DateTime), typeof(DateTimeOffset), - typeof(ObjectId) - ]; + private static readonly HashSet _knownBsonPrimitives = + [ + typeof(int), typeof(long), typeof(double), typeof(decimal), + typeof(bool), typeof(string), typeof(DateTime), typeof(DateTimeOffset), + typeof(ObjectId), typeof(Guid), typeof(byte[]) + ]; /// /// Returns true when can be used directly as an index diff --git a/tests/BLite.Tests/CrossCollectionQueryIsolationTests.cs b/tests/BLite.Tests/CrossCollectionQueryIsolationTests.cs index 547c250..fcc5f01 100644 --- a/tests/BLite.Tests/CrossCollectionQueryIsolationTests.cs +++ b/tests/BLite.Tests/CrossCollectionQueryIsolationTests.cs @@ -52,6 +52,23 @@ public async Task Contains_OnIndexedField_DoesNotReturnCrossCollectionRows() Assert.Equal(2, count); } + [Fact] + public async Task Contains_WithDuplicateValues_DoesNotDoubleCount() + { + await _db.IntEntities.EnsureIndexAsync(x => x.Name!, "idx_intentities_name", false); + + await _db.IntEntities.InsertAsync(new IntEntity { Id = 1, Name = "A" }); + await _db.IntEntities.InsertAsync(new IntEntity { Id = 2, Name = "B" }); + await _db.SaveChangesAsync(); + + var names = new[] { "A", "A", "B", "B" }; + var count = _db.IntEntities.AsQueryable() + .Where(x => names.Contains(x.Name)) + .Count(); + + Assert.Equal(2, count); + } + public void Dispose() { _db.Dispose(); diff --git a/tests/BLite.Tests/IndexOptimizationTests.cs b/tests/BLite.Tests/IndexOptimizationTests.cs index c3ab363..499c539 100644 --- a/tests/BLite.Tests/IndexOptimizationTests.cs +++ b/tests/BLite.Tests/IndexOptimizationTests.cs @@ -6,13 +6,15 @@ namespace BLite.Tests { public class IndexOptimizationTests { - public class TestEntity - { - public int Id { get; set; } - public string Name { get; set; } = ""; - public int Age { get; set; } - public bool IsActive { get; set; } - } + public class TestEntity + { + public int Id { get; set; } + public string Name { get; set; } = ""; + public int Age { get; set; } + public bool IsActive { get; set; } + public Guid ExternalId { get; set; } + public byte[] Payload { get; set; } = []; + } [Fact] public void Optimizer_Identifies_Equality() @@ -352,5 +354,90 @@ public void Optimizer_Contains_OnIndexedField_ReturnsInValues() Assert.Contains("Bob", result.InValues); Assert.Equal(IndexOptimizer.FilterCompleteness.Exact, result.FilterCompleteness); } + + [Fact] + public void Optimizer_Contains_WithDuplicateValues_DeDupesInValues() + { + var indexes = new List + { + new CollectionIndexInfo { Name = "idx_name", PropertyPaths = ["Name"] } + }; + + var names = new[] { "Alice", "Alice", "Bob", "Bob" }; + Expression> predicate = x => names.Contains(x.Name); + var model = new QueryModel { WhereClause = predicate }; + + var result = IndexOptimizer.TryOptimize(model, indexes); + + Assert.NotNull(result); + Assert.NotNull(result.InValues); + Assert.Equal(2, result.InValues!.Count); + Assert.Equal(["Alice", "Bob"], result.InValues); + } + + [Fact] + public void Optimizer_Contains_GuidField_ReturnsInValues() + { + var indexes = new List + { + new CollectionIndexInfo { Name = "idx_external_id", PropertyPaths = ["ExternalId"] } + }; + + var a = Guid.NewGuid(); + var b = Guid.NewGuid(); + var ids = new[] { a, b }; + Expression> predicate = x => ids.Contains(x.ExternalId); + var model = new QueryModel { WhereClause = predicate }; + + var result = IndexOptimizer.TryOptimize(model, indexes); + + Assert.NotNull(result); + Assert.NotNull(result.InValues); + Assert.Equal([a, b], result.InValues); + } + + [Fact] + public void Optimizer_Contains_ByteArrayField_ReturnsInValues() + { + var indexes = new List + { + new CollectionIndexInfo { Name = "idx_payload", PropertyPaths = ["Payload"] } + }; + + var p1 = new byte[] { 1, 2, 3 }; + var p2 = new byte[] { 4, 5, 6 }; + var payloads = new[] { p1, p2, p1 }; + Expression> predicate = x => payloads.Contains(x.Payload); + var model = new QueryModel { WhereClause = predicate }; + + var result = IndexOptimizer.TryOptimize(model, indexes); + + Assert.NotNull(result); + Assert.NotNull(result.InValues); + Assert.Equal(2, result.InValues!.Count); + Assert.Same(p1, result.InValues[0]); + Assert.Same(p2, result.InValues[1]); + } + + [Fact] + public void Optimizer_Contains_ByteArrayField_DeDupesByValue() + { + var indexes = new List + { + new CollectionIndexInfo { Name = "idx_payload", PropertyPaths = ["Payload"] } + }; + + var p1 = new byte[] { 1, 2, 3 }; + var p1Copy = new byte[] { 1, 2, 3 }; + var payloads = new[] { p1, p1Copy }; + Expression> predicate = x => payloads.Contains(x.Payload); + var model = new QueryModel { WhereClause = predicate }; + + var result = IndexOptimizer.TryOptimize(model, indexes); + + Assert.NotNull(result); + Assert.NotNull(result.InValues); + Assert.Single(result.InValues); + } } } From b9992002b088e0aa00a4db3038e0b1a389c5fea9 Mon Sep 17 00:00:00 2001 From: MrDevRobot <12503462+mrdevrobot@users.noreply.github.com> Date: Thu, 23 Apr 2026 10:04:37 +0200 Subject: [PATCH 5/5] Update DocumentCollectionTests.cs --- tests/BLite.Tests/DocumentCollectionTests.cs | 81 ++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/tests/BLite.Tests/DocumentCollectionTests.cs b/tests/BLite.Tests/DocumentCollectionTests.cs index 1080240..4df8b77 100644 --- a/tests/BLite.Tests/DocumentCollectionTests.cs +++ b/tests/BLite.Tests/DocumentCollectionTests.cs @@ -211,6 +211,87 @@ public async Task Insert_With_SpecifiedId_RetainsId() Assert.Equal("SpecifiedID", found.Name); } + /// + /// Regression scenario: two separate DocumentCollection instances that share the same + /// physical "users" collection (same StorageEngine, same on-disk pages) are created + /// after a page is partially filled. Each rebuilds its own in-memory FreeSpaceIndex + /// from disk; both therefore see page P as having ~6 120 B free (stale after process 1 + /// writes). + /// + /// Page = 16 384 B, header = 24 B, SlotEntry = 8 B, BSON overhead ≈ 45 B per User: + /// pre-fill : name = 10 187 chars → doc = 10 232 B → needs 10 240 B → leaves 6 120 B free + /// insert doc: name = 3 500 chars → doc = 3 545 B → needs 3 553 B + /// after col1 writes: 6 120 − 3 553 = 2 567 B remain — too little for col2 (3 553 > 2 567) + /// + /// DESIRED : both inserts succeed (engine handles the stale FSI gracefully by retrying on + /// a newly allocated page). + /// CURRENT : the second InsertAsync throws InvalidOperationException("Not enough space …"). + /// + [Fact] + public async Task TwoCollectionInstances_StaleFSI_SecondInsertShouldSucceed() + { + const int preFillNameLen = 10000; + const int insertNameLen = 400; + + var dbPath = Path.Combine(Path.GetTempPath(), $"test_fsi_stale_{Guid.NewGuid():N}.db"); + try + { + using var db = new TestDbContext(dbPath); + + // ── Step 1: fill the "users" data page to ≈10 KB ────────────────────── + await db.Users.InsertAsync(new User { Name = new string('X', preFillNameLen), Age = 0 }); + await db.SaveChangesAsync(); + + // ── Step 3 (process 1): insert 4 KB document via col1 ───────────────── + // col1 FSI: P = 6120 ≥ 3553 → uses page P → commits. + // col1 FSI updated: P = 2567. col2 FSI is STILL stale: P = 6120. + var col1 = db.Users; + List docs1 = new List(); + + for (int i = 0; i < 15; i++) + { + docs1.Add(new User { Name = new string('A', insertNameLen), Age = i }); + } + + var tasks = new List(); + + var col2 = db.ComplexUsers; + List docs2 = new List(); + for (int i = 0; i < 15; i++) + { + docs2.Add(new ComplexUser { Name = new string('B', insertNameLen) }); + } + + tasks.Add(Task.Run(async () => + { + await Task.Delay(1); + var id1 = await col1.InsertBulkAsync(docs1); + return id1; + })); + + foreach(var doc in docs2) + { + tasks.Add(Task.Run(async () => + { + var id2 = await col2.InsertAsync(doc); + return id2; + })); + } + + await Task.WhenAll(tasks).ContinueWith(t => + { + Assert.False(t.IsFaulted, $"Second insert failed with exception: {t.Exception}"); + }); + + } + finally + { + if (File.Exists(dbPath)) File.Delete(dbPath); + var wal = Path.ChangeExtension(dbPath, ".wal"); + if (File.Exists(wal)) File.Delete(wal); + } + } + public void Dispose() { _db?.Dispose();