mempalace/Dockerfile.gpu at develop · Acharnite/mempalace · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# syntax=docker/dockerfile:1.7

# MemPalace — GPU (NVIDIA CUDA) image.
#
# Multi-stage build using uv (the project ships a uv.lock, so we install from
# the frozen lockfile for reproducible images). The `gpu` extra pulls
# onnxruntime-gpu, which needs CUDA + cuDNN shared libraries at runtime, so
# this variant builds on an nvidia/cuda base instead of python:slim.
#
# The builder stage is dropped from the final image, so build tools
# (compilers, apt cache) never reach production — only the uv-managed
# interpreter and the resolved virtualenv do.
#
# Build:
#   docker build -f Dockerfile.gpu -t mempalace:gpu .
#
# Run (requires the NVIDIA Container Toolkit on the host):
#   docker run -i --rm --gpus all \
#     -e MEMPALACE_EMBEDDING_DEVICE=cuda \
#     -v mempalace-data:/data mempalace:gpu
#
# NOTE: onnxruntime-gpu ties itself to a CUDA major version. If embeddings
# fail to load on the GPU, align CUDA_IMAGE below with the CUDA release that
# the resolved onnxruntime-gpu wheel targets (see its release notes), then
# rebuild.

ARG CUDA_IMAGE=nvidia/cuda:12.6.3-cudnn-runtime-ubuntu22.04

# --- builder ----------------------------------------------------------------
FROM ${CUDA_IMAGE} AS builder

COPY --from=ghcr.io/astral-sh/uv:0.5 /uv /uvx /bin/

# Minimal toolchain for any source-built wheels; dropped before the runtime
# stage so compilers never ship in the production image.
RUN apt-get update \
    && apt-get install -y --no-install-recommends build-essential \
    && rm -rf /var/lib/apt/lists/*

ARG PYTHON_VERSION=3.12
ARG EXTRAS="extract,spellcheck,gpu"

ENV UV_COMPILE_BYTECODE=1 \
    UV_LINK_MODE=copy \
    UV_PYTHON_INSTALL_DIR=/opt/uv/python \
    UV_PYTHON_PREFERENCE=only-managed

WORKDIR /app

# Layer 1: dependencies only (no project source). Bind-mounted files keep the
# project tree out of this layer, so changing source code does not bust the
# deps cache. The uv-managed interpreter is also installed here, into a
# stable path that the runtime stage can copy verbatim.
RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
    --mount=type=bind,source=uv.lock,target=uv.lock \
    --mount=type=bind,source=README.md,target=README.md \
    set -e; \
    uv python install ${PYTHON_VERSION}; \
    flags=""; \
    for e in $(echo "${EXTRAS}" | tr ',' ' '); do flags="${flags} --extra ${e}"; done; \
    uv sync --frozen --no-install-project --no-dev --python ${PYTHON_VERSION} ${flags}

# Layer 2: the project itself. --no-editable installs mempalace into the
# venv's site-packages (instead of an .pth pointing at /app), so the runtime
# stage can copy only /app/.venv and drop the source tree.
COPY . /app
RUN --mount=type=cache,target=/root/.cache/uv \
    set -e; \
    flags=""; \
    for e in $(echo "${EXTRAS}" | tr ',' ' '); do flags="${flags} --extra ${e}"; done; \
    uv sync --frozen --no-dev --no-editable ${flags}

# --- runtime ----------------------------------------------------------------
FROM ${CUDA_IMAGE} AS runtime

LABEL org.opencontainers.image.title="MemPalace (GPU)" \
      org.opencontainers.image.description="Local-first AI memory with CUDA-accelerated embeddings." \
      org.opencontainers.image.source="https://github.com/MemPalace/mempalace" \
      org.opencontainers.image.licenses="MIT"

# ca-certificates only — needed for the lazy HuggingFace model download on
# first use. No build toolchain in this stage.
RUN apt-get update \
    && apt-get install -y --no-install-recommends ca-certificates \
    && rm -rf /var/lib/apt/lists/*

ENV HOME=/data \
    PATH="/app/.venv/bin:${PATH}" \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    MEMPALACE_EMBEDDING_DEVICE=cuda

# Non-root user owning the data volume.
RUN groupadd --gid 1000 mempalace \
    && useradd --uid 1000 --gid 1000 --home-dir /data --create-home mempalace

WORKDIR /app

# Bring the uv-managed interpreter and the resolved venv across the stage
# boundary. /opt/uv/python must be copied alongside .venv: the venv's
# shebangs and binary launcher reference it.
COPY --from=builder /opt/uv/python /opt/uv/python
COPY --from=builder --chown=mempalace:mempalace /app/.venv /app/.venv
COPY --chown=mempalace:mempalace docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
RUN chmod +x /usr/local/bin/docker-entrypoint.sh

USER mempalace
VOLUME ["/data"]

ENTRYPOINT ["docker-entrypoint.sh"]
CMD ["mcp"]