Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/auto-tag.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,24 @@ jobs:
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
echo "tag=v$VERSION" >> "$GITHUB_OUTPUT"

- name: Check runtime version matches package version
run: |
python - <<'PY'
import pathlib
import re
import tomllib

pyproject = tomllib.loads(pathlib.Path("pyproject.toml").read_text())
project_version = pyproject["project"]["version"]
init_text = pathlib.Path("src/hotmem/__init__.py").read_text()
init_version = re.search(r'__version__ = "([^"]+)"', init_text).group(1)

if init_version != project_version:
raise SystemExit(
f"hotmem.__version__ {init_version} does not match pyproject version {project_version}"
)
PY

- name: Check if tag exists
id: check
run: |
Expand Down
20 changes: 20 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,33 @@ jobs:
run: uv build

- name: Verify package metadata
env:
RELEASE_TAG: ${{ github.event_name == 'workflow_dispatch' && inputs.tag || github.ref_name }}
run: |
uvx twine check dist/*
python - <<'PY'
import os
import pathlib
import re
import tarfile
import tomllib

readme = pathlib.Path("README.md").read_text()
pyproject = tomllib.loads(pathlib.Path("pyproject.toml").read_text())
project_version = pyproject["project"]["version"]
init_text = pathlib.Path("src/hotmem/__init__.py").read_text()
init_version = re.search(r'__version__ = "([^"]+)"', init_text).group(1)
release_tag = os.environ["RELEASE_TAG"]

if release_tag != f"v{project_version}":
raise SystemExit(
f"release tag {release_tag} does not match pyproject version {project_version}"
)
if init_version != project_version:
raise SystemExit(
f"hotmem.__version__ {init_version} does not match pyproject version {project_version}"
)

sdist = next(pathlib.Path("dist").glob("*.tar.gz"))

with tarfile.open(sdist) as tar:
Expand Down
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@ All notable changes to HotMem will be documented in this file.

Format follows [Keep a Changelog](https://keepachangelog.com/).

## [0.1.6] - 2026-06-09

### Added
- Batched JSONL hydration with SQLite-native duplicate skipping.
- Snapshot embedding export via `embedding_b64` for faster compatible rehydration.
- Hydration trace counters for parsed rows, loaded rows, duplicate skips, bytes read, and embedding reuse.

### Fixed
- Package version metadata now matches the runtime `hotmem.__version__`.

## [0.1.0] - 2025-05-02

### Added
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "hotmem"
version = "0.1.5"
version = "0.1.6"
description = "A local-first memory sidecar for agent applications"
readme = "README.md"
requires-python = ">=3.11"
Expand Down Expand Up @@ -57,4 +57,3 @@ select = ["E", "F", "I", "UP", "B", "SIM"]

[tool.pytest.ini_options]
testpaths = ["tests"]

2 changes: 1 addition & 1 deletion src/hotmem/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""HotMem — A local-first memory sidecar for agent applications."""

__version__ = "0.1.0"
__version__ = "0.1.6"
94 changes: 89 additions & 5 deletions src/hotmem/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
Interface:
MemoryDB(db_path: str | Path)
.insert(id, identifier, fact_text, embedding_blob, ...) -> None
.insert_many_ignore(records) -> int
.count() -> int
.all_rows() -> list[Row]
.all_rows(include_embedding=False) -> list[Row]
.exists(content_hash: str) -> bool
.close() -> None

Expand All @@ -23,6 +24,8 @@
import re
import sqlite3
import struct
from collections.abc import Iterable
from dataclasses import dataclass
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -75,6 +78,24 @@
_FTS_TOKEN_RE = re.compile(r"[\w]+")


@dataclass(frozen=True)
class MemoryRecord:
"""Database-ready memory row."""

id: str
identifier: str
fact_text: str
embedding: bytes
embedding_dim: int = EMBEDDING_DIM
embedding_model: str = ""
source: str = ""
importance: float = 0.5
metadata_json: str = "{}"
content_hash: str = ""
ttl_seconds: int | None = None
created_at: str | None = None


def _cosine_similarity(blob_a: bytes | None, blob_b: bytes | None) -> float | None:
"""SQLite UDF: cosine similarity between two packed float32 blobs."""
if blob_a is None or blob_b is None:
Expand Down Expand Up @@ -124,6 +145,19 @@ def _migrate(self) -> None:
self._conn.commit()
_trace.info("migrate", "added ttl_seconds column")

try:
self._conn.execute(
"""CREATE UNIQUE INDEX IF NOT EXISTS idx_memories_content_hash_unique
ON memories(content_hash)
WHERE content_hash != ''"""
)
self._conn.commit()
except sqlite3.IntegrityError:
_trace.warn(
"migrate",
"skipped unique content_hash index because duplicate hashes exist",
)

def insert(
self,
id: str,
Expand Down Expand Up @@ -167,6 +201,43 @@ def insert(
self._conn.commit()
_trace.debug("insert", f"stored memory {id[:8]}…", detail={"identifier": identifier})

def insert_many_ignore(self, records: Iterable[MemoryRecord]) -> int:
"""Insert many memory rows in one transaction, ignoring duplicate hashes/ids."""
rows = [
(
record.id,
record.identifier,
record.fact_text,
record.embedding,
record.embedding_dim,
record.embedding_model,
record.source,
record.importance,
record.metadata_json,
record.content_hash,
record.ttl_seconds,
record.created_at,
)
for record in records
]
if not rows:
return 0

cursor = self._conn.executemany(
"""INSERT OR IGNORE INTO memories
(id, identifier, fact_text, embedding, embedding_dim, embedding_model,
source, importance, metadata_json, content_hash, ttl_seconds, created_at)
VALUES (
?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
COALESCE(?, strftime('%Y-%m-%dT%H:%M:%SZ', 'now'))
)""",
rows,
)
self._conn.commit()
inserted = cursor.rowcount if cursor.rowcount != -1 else 0
_trace.debug("insert_many", f"stored {inserted} memories", detail={"attempted": len(rows)})
return inserted

def search_with_cosine(self, query_embedding: bytes) -> list[dict[str, Any]]:
"""Return all memories with their cosine similarity to the query embedding."""
rows = self._conn.execute(
Expand Down Expand Up @@ -206,15 +277,28 @@ def count(self) -> int:
row = self._conn.execute("SELECT COUNT(*) FROM memories").fetchone()
return row[0]

def all_rows(self) -> list[dict[str, Any]]:
def all_rows(self, *, include_embedding: bool = False) -> list[dict[str, Any]]:
"""Return all memory rows as dicts (for snapshot export)."""
rows = self._conn.execute(
query = (
"""SELECT id, identifier, fact_text, embedding_dim, embedding_model,
source, importance, metadata_json, content_hash, ttl_seconds, created_at
source, importance, metadata_json, content_hash, ttl_seconds, created_at,
embedding
FROM memories"""
).fetchall()
if include_embedding
else """SELECT id, identifier, fact_text, embedding_dim, embedding_model,
source, importance, metadata_json, content_hash, ttl_seconds, created_at
FROM memories"""
)
rows = self._conn.execute(query).fetchall()
return [dict(r) for r in rows]

def content_hashes(self) -> set[str]:
"""Return non-empty content hashes currently stored in the database."""
rows = self._conn.execute(
"SELECT content_hash FROM memories WHERE content_hash != ''"
).fetchall()
return {row["content_hash"] for row in rows}

def exists(self, content_hash: str) -> bool:
"""Check if a memory with this content hash already exists."""
row = self._conn.execute(
Expand Down
Loading
Loading