diff --git a/news/857.bugfix.rst b/news/857.bugfix.rst
new file mode 100644
index 0000000000..592a81caa7
--- /dev/null
+++ b/news/857.bugfix.rst
@@ -0,0 +1,3 @@
+Deduplicate rows in the table reporter that had identical displayed fields but
+different underlying call stacks. Also add a ``--split-threads`` option to the
+table reporter, matching the flamegraph reporter's existing option.
diff --git a/src/memray/commands/table.py b/src/memray/commands/table.py
index bd9549503e..f2964a7222 100644
--- a/src/memray/commands/table.py
+++ b/src/memray/commands/table.py
@@ -15,6 +15,12 @@ def __init__(self) -> None:
 
     def prepare_parser(self, parser: argparse.ArgumentParser) -> None:
         super().prepare_parser(parser)
+        parser.add_argument(
+            "--split-threads",
+            help="Do not merge allocations across threads",
+            action="store_true",
+            default=False,
+        )
         parser.add_argument(
             "--no-web",
             help="Use local assets instead of fetching from CDN",
diff --git a/src/memray/reporters/table.py b/src/memray/reporters/table.py
index be603d77a9..55cdf6d20d 100644
--- a/src/memray/reporters/table.py
+++ b/src/memray/reporters/table.py
@@ -33,7 +33,10 @@ def from_snapshot(
         native_traces: bool,
         **kwargs: Any,
     ) -> "TableReporter":
-        result = []
+        # Aggregate records that share the same displayed fields, since the
+        # table only shows the top stack frame and records with different full
+        # call stacks can map to the same displayed row.
+        aggregated: Dict[tuple, Dict[str, Any]] = {}
         for record in allocations:
             stack_trace = (
                 list(record.hybrid_stack_trace(max_stacks=1))
@@ -46,17 +49,21 @@ def from_snapshot(
                 stack = f"{function} at {file}:{line}"
 
             allocator = AllocatorType(record.allocator)
-            result.append(
-                {
-                    "tid": format_thread_name(record),
+            tid = format_thread_name(record)
+            key = (tid, allocator.name.lower(), stack)
+            if key in aggregated:
+                aggregated[key]["size"] += record.size
+                aggregated[key]["n_allocations"] += record.n_allocations
+            else:
+                aggregated[key] = {
+                    "tid": tid,
                     "size": record.size,
                     "allocator": allocator.name.lower(),
                     "n_allocations": record.n_allocations,
                     "stack_trace": html.escape(stack),
                 }
-            )
 
-        return cls(result, memory_records=memory_records)
+        return cls(list(aggregated.values()), memory_records=memory_records)
 
     def render(
         self,
@@ -67,8 +74,6 @@ def render(
         inverted: bool,
         no_web: bool = False,
     ) -> None:
-        if not merge_threads:
-            raise NotImplementedError("TableReporter only supports merged threads.")
         if inverted:
             raise NotImplementedError(
                 "TableReporter does not support inverted argument"
diff --git a/tests/integration/test_main.py b/tests/integration/test_main.py
index 8d569fa68e..ab5a172db7 100644
--- a/tests/integration/test_main.py
+++ b/tests/integration/test_main.py
@@ -1246,23 +1246,32 @@ def test_reads_from_correct_file(self, tmp_path, simple_test_file):
         assert output_file.exists()
         assert str(source_file) in output_file.read_text()
 
-    def test_no_split_threads(self, tmp_path):
-        # GIVEN/WHEN/THEN
-        with pytest.raises(subprocess.CalledProcessError):
-            subprocess.run(
-                [
-                    sys.executable,
-                    "-m",
-                    "memray",
-                    "table",
-                    "--split-threads",
-                    "somefile",
-                ],
-                cwd=str(tmp_path),
-                check=True,
-                capture_output=True,
-                text=True,
-            )
+    def test_split_threads(self, tmp_path, simple_test_file):
+        # GIVEN
+        results_file, source_file = generate_sample_results(
+            tmp_path, simple_test_file, native=True
+        )
+
+        # WHEN
+        subprocess.run(
+            [
+                sys.executable,
+                "-m",
+                "memray",
+                "table",
+                "--split-threads",
+                str(results_file),
+            ],
+            cwd=str(tmp_path),
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+
+        # THEN
+        output_file = tmp_path / "memray-table-result.html"
+        assert output_file.exists()
+        assert str(source_file) in output_file.read_text()
 
 
 class TestReporterSubCommands:
diff --git a/tests/unit/test_table_reporter.py b/tests/unit/test_table_reporter.py
index bfa1c28c03..a7bf28564b 100644
--- a/tests/unit/test_table_reporter.py
+++ b/tests/unit/test_table_reporter.py
@@ -125,6 +125,288 @@ def test_multiple_allocations(self):
             },
         ]
 
+    def test_aggregates_records_with_same_top_frame(self):
+        """Records with different full stacks but same top frame should be aggregated."""
+        # GIVEN
+        peak_allocations = [
+            MockAllocationRecord(
+                tid=1,
+                address=0x1000000,
+                size=1024,
+                allocator=AllocatorType.MALLOC,
+                stack_id=1,
+                n_allocations=1,
+                _stack=[
+                    ("me", "fun.py", 12),
+                    ("caller_a", "a.py", 1),
+                ],
+            ),
+            MockAllocationRecord(
+                tid=1,
+                address=0x1100000,
+                size=2048,
+                allocator=AllocatorType.MALLOC,
+                stack_id=2,
+                n_allocations=3,
+                _stack=[
+                    ("me", "fun.py", 12),
+                    ("caller_b", "b.py", 5),
+                ],
+            ),
+        ]
+
+        # WHEN
+        table = TableReporter.from_snapshot(
+            peak_allocations, memory_records=[], native_traces=False
+        )
+
+        # THEN
+        assert table.data == [
+            {
+                "tid": "0x1",
+                "size": 1024 + 2048,
+                "allocator": "malloc",
+                "n_allocations": 4,
+                "stack_trace": "me at fun.py:12",
+            }
+        ]
+
+    def test_does_not_aggregate_different_allocators(self):
+        """Records with the same top frame but different allocators stay separate."""
+        # GIVEN
+        peak_allocations = [
+            MockAllocationRecord(
+                tid=1,
+                address=0x1000000,
+                size=1024,
+                allocator=AllocatorType.MALLOC,
+                stack_id=1,
+                n_allocations=1,
+                _stack=[
+                    ("me", "fun.py", 12),
+                ],
+            ),
+            MockAllocationRecord(
+                tid=1,
+                address=0x1100000,
+                size=2048,
+                allocator=AllocatorType.CALLOC,
+                stack_id=2,
+                n_allocations=1,
+                _stack=[
+                    ("me", "fun.py", 12),
+                ],
+            ),
+        ]
+
+        # WHEN
+        table = TableReporter.from_snapshot(
+            peak_allocations, memory_records=[], native_traces=False
+        )
+
+        # THEN
+        assert len(table.data) == 2
+        assert table.data[0]["allocator"] == "malloc"
+        assert table.data[1]["allocator"] == "calloc"
+
+    def test_does_not_aggregate_different_threads(self):
+        """Records from different threads with same top frame stay separate."""
+        # GIVEN
+        peak_allocations = [
+            MockAllocationRecord(
+                tid=1,
+                address=0x1000000,
+                size=1024,
+                allocator=AllocatorType.MALLOC,
+                stack_id=1,
+                n_allocations=1,
+                _stack=[
+                    ("me", "fun.py", 12),
+                ],
+            ),
+            MockAllocationRecord(
+                tid=2,
+                address=0x1100000,
+                size=2048,
+                allocator=AllocatorType.MALLOC,
+                stack_id=2,
+                n_allocations=1,
+                _stack=[
+                    ("me", "fun.py", 12),
+                ],
+            ),
+        ]
+
+        # WHEN
+        table = TableReporter.from_snapshot(
+            peak_allocations, memory_records=[], native_traces=False
+        )
+
+        # THEN
+        assert len(table.data) == 2
+        assert table.data[0]["tid"] == "0x1"
+        assert table.data[1]["tid"] == "0x2"
+
+    def test_aggregates_native_records_with_same_top_frame(self):
+        """Native trace records with same top frame should also be aggregated."""
+        # GIVEN
+        peak_allocations = [
+            MockAllocationRecord(
+                tid=1,
+                address=0x1000000,
+                size=512,
+                allocator=AllocatorType.MALLOC,
+                stack_id=1,
+                n_allocations=2,
+                _hybrid_stack=[
+                    ("alloc", "mem.c", 42),
+                    ("caller_a", "a.c", 10),
+                ],
+            ),
+            MockAllocationRecord(
+                tid=1,
+                address=0x1100000,
+                size=256,
+                allocator=AllocatorType.MALLOC,
+                stack_id=2,
+                n_allocations=5,
+                _hybrid_stack=[
+                    ("alloc", "mem.c", 42),
+                    ("caller_b", "b.c", 20),
+                ],
+            ),
+        ]
+
+        # WHEN
+        table = TableReporter.from_snapshot(
+            peak_allocations, memory_records=[], native_traces=True
+        )
+
+        # THEN
+        assert table.data == [
+            {
+                "tid": "0x1",
+                "size": 768,
+                "allocator": "malloc",
+                "n_allocations": 7,
+                "stack_trace": "alloc at mem.c:42",
+            }
+        ]
+
+    def test_aggregates_merged_thread_records(self):
+        """When merge_threads=True, records arrive with tid=-1 and aggregate."""
+        # GIVEN
+        peak_allocations = [
+            MockAllocationRecord(
+                tid=-1,
+                address=0x1000000,
+                size=1024,
+                allocator=AllocatorType.MALLOC,
+                stack_id=1,
+                n_allocations=1,
+                _stack=[
+                    ("me", "fun.py", 12),
+                    ("caller_a", "a.py", 1),
+                ],
+            ),
+            MockAllocationRecord(
+                tid=-1,
+                address=0x1100000,
+                size=2048,
+                allocator=AllocatorType.MALLOC,
+                stack_id=2,
+                n_allocations=3,
+                _stack=[
+                    ("me", "fun.py", 12),
+                    ("caller_b", "b.py", 5),
+                ],
+            ),
+        ]
+
+        # WHEN
+        table = TableReporter.from_snapshot(
+            peak_allocations, memory_records=[], native_traces=False
+        )
+
+        # THEN
+        assert table.data == [
+            {
+                "tid": "merged thread",
+                "size": 1024 + 2048,
+                "allocator": "malloc",
+                "n_allocations": 4,
+                "stack_trace": "me at fun.py:12",
+            }
+        ]
+
+    def test_html_special_chars_in_stack_trace(self):
+        """Stack traces with HTML special chars are escaped in output."""
+        # GIVEN
+        peak_allocations = [
+            MockAllocationRecord(
+                tid=1,
+                address=0x1000000,
+                size=512,
+                allocator=AllocatorType.MALLOC,
+                stack_id=1,
+                n_allocations=1,
+                _hybrid_stack=[
+                    ("std::vector<int>::push_back", "vector.h", 100),
+                ],
+            ),
+        ]
+
+        # WHEN
+        table = TableReporter.from_snapshot(
+            peak_allocations, memory_records=[], native_traces=True
+        )
+
+        # THEN
+        assert len(table.data) == 1
+        assert "&lt;" in table.data[0]["stack_trace"]
+        assert "&gt;" in table.data[0]["stack_trace"]
+        assert "<int>" not in table.data[0]["stack_trace"]
+
+    def test_html_special_chars_do_not_break_aggregation(self):
+        """Records with HTML chars in function names still aggregate correctly."""
+        # GIVEN
+        peak_allocations = [
+            MockAllocationRecord(
+                tid=1,
+                address=0x1000000,
+                size=512,
+                allocator=AllocatorType.MALLOC,
+                stack_id=1,
+                n_allocations=1,
+                _hybrid_stack=[
+                    ("std::vector<int>::push_back", "vector.h", 100),
+                    ("caller_a", "a.cpp", 10),
+                ],
+            ),
+            MockAllocationRecord(
+                tid=1,
+                address=0x1100000,
+                size=256,
+                allocator=AllocatorType.MALLOC,
+                stack_id=2,
+                n_allocations=2,
+                _hybrid_stack=[
+                    ("std::vector<int>::push_back", "vector.h", 100),
+                    ("caller_b", "b.cpp", 20),
+                ],
+            ),
+        ]
+
+        # WHEN
+        table = TableReporter.from_snapshot(
+            peak_allocations, memory_records=[], native_traces=True
+        )
+
+        # THEN
+        assert len(table.data) == 1
+        assert table.data[0]["size"] == 768
+        assert table.data[0]["n_allocations"] == 3
+
     def test_empty_stack_trace(self):
         # GIVEN
         peak_allocations = [