From 67ae8dbaafc3852158c260cf220d7a99290241d9 Mon Sep 17 00:00:00 2001 From: Abhi Ojha Date: Mon, 30 Mar 2026 15:04:47 +0000 Subject: [PATCH] Deduplicate rows in table reporter with identical displayed fields The table reporter creates one row per AllocationRecord but only displays the top stack frame. Records with different full call stacks mapping to the same top frame appeared as duplicate rows. Aggregate records sharing the same (tid, allocator, top_frame) key, summing size and n_allocations. Also add --split-threads option to the table command, matching the flamegraph reporter. Signed-off-by: Abhi Ojha --- news/857.bugfix.rst | 3 + src/memray/commands/table.py | 6 + src/memray/reporters/table.py | 21 ++- tests/integration/test_main.py | 43 +++-- tests/unit/test_table_reporter.py | 282 ++++++++++++++++++++++++++++++ 5 files changed, 330 insertions(+), 25 deletions(-) create mode 100644 news/857.bugfix.rst diff --git a/news/857.bugfix.rst b/news/857.bugfix.rst new file mode 100644 index 0000000000..592a81caa7 --- /dev/null +++ b/news/857.bugfix.rst @@ -0,0 +1,3 @@ +Deduplicate rows in the table reporter that had identical displayed fields but +different underlying call stacks. Also add a ``--split-threads`` option to the +table reporter, matching the flamegraph reporter's existing option. diff --git a/src/memray/commands/table.py b/src/memray/commands/table.py index bd9549503e..f2964a7222 100644 --- a/src/memray/commands/table.py +++ b/src/memray/commands/table.py @@ -15,6 +15,12 @@ def __init__(self) -> None: def prepare_parser(self, parser: argparse.ArgumentParser) -> None: super().prepare_parser(parser) + parser.add_argument( + "--split-threads", + help="Do not merge allocations across threads", + action="store_true", + default=False, + ) parser.add_argument( "--no-web", help="Use local assets instead of fetching from CDN", diff --git a/src/memray/reporters/table.py b/src/memray/reporters/table.py index be603d77a9..55cdf6d20d 100644 --- a/src/memray/reporters/table.py +++ b/src/memray/reporters/table.py @@ -33,7 +33,10 @@ def from_snapshot( native_traces: bool, **kwargs: Any, ) -> "TableReporter": - result = [] + # Aggregate records that share the same displayed fields, since the + # table only shows the top stack frame and records with different full + # call stacks can map to the same displayed row. + aggregated: Dict[tuple, Dict[str, Any]] = {} for record in allocations: stack_trace = ( list(record.hybrid_stack_trace(max_stacks=1)) @@ -46,17 +49,21 @@ def from_snapshot( stack = f"{function} at {file}:{line}" allocator = AllocatorType(record.allocator) - result.append( - { - "tid": format_thread_name(record), + tid = format_thread_name(record) + key = (tid, allocator.name.lower(), stack) + if key in aggregated: + aggregated[key]["size"] += record.size + aggregated[key]["n_allocations"] += record.n_allocations + else: + aggregated[key] = { + "tid": tid, "size": record.size, "allocator": allocator.name.lower(), "n_allocations": record.n_allocations, "stack_trace": html.escape(stack), } - ) - return cls(result, memory_records=memory_records) + return cls(list(aggregated.values()), memory_records=memory_records) def render( self, @@ -67,8 +74,6 @@ def render( inverted: bool, no_web: bool = False, ) -> None: - if not merge_threads: - raise NotImplementedError("TableReporter only supports merged threads.") if inverted: raise NotImplementedError( "TableReporter does not support inverted argument" diff --git a/tests/integration/test_main.py b/tests/integration/test_main.py index 8d569fa68e..ab5a172db7 100644 --- a/tests/integration/test_main.py +++ b/tests/integration/test_main.py @@ -1246,23 +1246,32 @@ def test_reads_from_correct_file(self, tmp_path, simple_test_file): assert output_file.exists() assert str(source_file) in output_file.read_text() - def test_no_split_threads(self, tmp_path): - # GIVEN/WHEN/THEN - with pytest.raises(subprocess.CalledProcessError): - subprocess.run( - [ - sys.executable, - "-m", - "memray", - "table", - "--split-threads", - "somefile", - ], - cwd=str(tmp_path), - check=True, - capture_output=True, - text=True, - ) + def test_split_threads(self, tmp_path, simple_test_file): + # GIVEN + results_file, source_file = generate_sample_results( + tmp_path, simple_test_file, native=True + ) + + # WHEN + subprocess.run( + [ + sys.executable, + "-m", + "memray", + "table", + "--split-threads", + str(results_file), + ], + cwd=str(tmp_path), + check=True, + capture_output=True, + text=True, + ) + + # THEN + output_file = tmp_path / "memray-table-result.html" + assert output_file.exists() + assert str(source_file) in output_file.read_text() class TestReporterSubCommands: diff --git a/tests/unit/test_table_reporter.py b/tests/unit/test_table_reporter.py index bfa1c28c03..a7bf28564b 100644 --- a/tests/unit/test_table_reporter.py +++ b/tests/unit/test_table_reporter.py @@ -125,6 +125,288 @@ def test_multiple_allocations(self): }, ] + def test_aggregates_records_with_same_top_frame(self): + """Records with different full stacks but same top frame should be aggregated.""" + # GIVEN + peak_allocations = [ + MockAllocationRecord( + tid=1, + address=0x1000000, + size=1024, + allocator=AllocatorType.MALLOC, + stack_id=1, + n_allocations=1, + _stack=[ + ("me", "fun.py", 12), + ("caller_a", "a.py", 1), + ], + ), + MockAllocationRecord( + tid=1, + address=0x1100000, + size=2048, + allocator=AllocatorType.MALLOC, + stack_id=2, + n_allocations=3, + _stack=[ + ("me", "fun.py", 12), + ("caller_b", "b.py", 5), + ], + ), + ] + + # WHEN + table = TableReporter.from_snapshot( + peak_allocations, memory_records=[], native_traces=False + ) + + # THEN + assert table.data == [ + { + "tid": "0x1", + "size": 1024 + 2048, + "allocator": "malloc", + "n_allocations": 4, + "stack_trace": "me at fun.py:12", + } + ] + + def test_does_not_aggregate_different_allocators(self): + """Records with the same top frame but different allocators stay separate.""" + # GIVEN + peak_allocations = [ + MockAllocationRecord( + tid=1, + address=0x1000000, + size=1024, + allocator=AllocatorType.MALLOC, + stack_id=1, + n_allocations=1, + _stack=[ + ("me", "fun.py", 12), + ], + ), + MockAllocationRecord( + tid=1, + address=0x1100000, + size=2048, + allocator=AllocatorType.CALLOC, + stack_id=2, + n_allocations=1, + _stack=[ + ("me", "fun.py", 12), + ], + ), + ] + + # WHEN + table = TableReporter.from_snapshot( + peak_allocations, memory_records=[], native_traces=False + ) + + # THEN + assert len(table.data) == 2 + assert table.data[0]["allocator"] == "malloc" + assert table.data[1]["allocator"] == "calloc" + + def test_does_not_aggregate_different_threads(self): + """Records from different threads with same top frame stay separate.""" + # GIVEN + peak_allocations = [ + MockAllocationRecord( + tid=1, + address=0x1000000, + size=1024, + allocator=AllocatorType.MALLOC, + stack_id=1, + n_allocations=1, + _stack=[ + ("me", "fun.py", 12), + ], + ), + MockAllocationRecord( + tid=2, + address=0x1100000, + size=2048, + allocator=AllocatorType.MALLOC, + stack_id=2, + n_allocations=1, + _stack=[ + ("me", "fun.py", 12), + ], + ), + ] + + # WHEN + table = TableReporter.from_snapshot( + peak_allocations, memory_records=[], native_traces=False + ) + + # THEN + assert len(table.data) == 2 + assert table.data[0]["tid"] == "0x1" + assert table.data[1]["tid"] == "0x2" + + def test_aggregates_native_records_with_same_top_frame(self): + """Native trace records with same top frame should also be aggregated.""" + # GIVEN + peak_allocations = [ + MockAllocationRecord( + tid=1, + address=0x1000000, + size=512, + allocator=AllocatorType.MALLOC, + stack_id=1, + n_allocations=2, + _hybrid_stack=[ + ("alloc", "mem.c", 42), + ("caller_a", "a.c", 10), + ], + ), + MockAllocationRecord( + tid=1, + address=0x1100000, + size=256, + allocator=AllocatorType.MALLOC, + stack_id=2, + n_allocations=5, + _hybrid_stack=[ + ("alloc", "mem.c", 42), + ("caller_b", "b.c", 20), + ], + ), + ] + + # WHEN + table = TableReporter.from_snapshot( + peak_allocations, memory_records=[], native_traces=True + ) + + # THEN + assert table.data == [ + { + "tid": "0x1", + "size": 768, + "allocator": "malloc", + "n_allocations": 7, + "stack_trace": "alloc at mem.c:42", + } + ] + + def test_aggregates_merged_thread_records(self): + """When merge_threads=True, records arrive with tid=-1 and aggregate.""" + # GIVEN + peak_allocations = [ + MockAllocationRecord( + tid=-1, + address=0x1000000, + size=1024, + allocator=AllocatorType.MALLOC, + stack_id=1, + n_allocations=1, + _stack=[ + ("me", "fun.py", 12), + ("caller_a", "a.py", 1), + ], + ), + MockAllocationRecord( + tid=-1, + address=0x1100000, + size=2048, + allocator=AllocatorType.MALLOC, + stack_id=2, + n_allocations=3, + _stack=[ + ("me", "fun.py", 12), + ("caller_b", "b.py", 5), + ], + ), + ] + + # WHEN + table = TableReporter.from_snapshot( + peak_allocations, memory_records=[], native_traces=False + ) + + # THEN + assert table.data == [ + { + "tid": "merged thread", + "size": 1024 + 2048, + "allocator": "malloc", + "n_allocations": 4, + "stack_trace": "me at fun.py:12", + } + ] + + def test_html_special_chars_in_stack_trace(self): + """Stack traces with HTML special chars are escaped in output.""" + # GIVEN + peak_allocations = [ + MockAllocationRecord( + tid=1, + address=0x1000000, + size=512, + allocator=AllocatorType.MALLOC, + stack_id=1, + n_allocations=1, + _hybrid_stack=[ + ("std::vector::push_back", "vector.h", 100), + ], + ), + ] + + # WHEN + table = TableReporter.from_snapshot( + peak_allocations, memory_records=[], native_traces=True + ) + + # THEN + assert len(table.data) == 1 + assert "<" in table.data[0]["stack_trace"] + assert ">" in table.data[0]["stack_trace"] + assert "" not in table.data[0]["stack_trace"] + + def test_html_special_chars_do_not_break_aggregation(self): + """Records with HTML chars in function names still aggregate correctly.""" + # GIVEN + peak_allocations = [ + MockAllocationRecord( + tid=1, + address=0x1000000, + size=512, + allocator=AllocatorType.MALLOC, + stack_id=1, + n_allocations=1, + _hybrid_stack=[ + ("std::vector::push_back", "vector.h", 100), + ("caller_a", "a.cpp", 10), + ], + ), + MockAllocationRecord( + tid=1, + address=0x1100000, + size=256, + allocator=AllocatorType.MALLOC, + stack_id=2, + n_allocations=2, + _hybrid_stack=[ + ("std::vector::push_back", "vector.h", 100), + ("caller_b", "b.cpp", 20), + ], + ), + ] + + # WHEN + table = TableReporter.from_snapshot( + peak_allocations, memory_records=[], native_traces=True + ) + + # THEN + assert len(table.data) == 1 + assert table.data[0]["size"] == 768 + assert table.data[0]["n_allocations"] == 3 + def test_empty_stack_trace(self): # GIVEN peak_allocations = [