forked from MemPalace/mempalace
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile.gpu
More file actions
112 lines (93 loc) · 4.33 KB
/
Dockerfile.gpu
File metadata and controls
112 lines (93 loc) · 4.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# syntax=docker/dockerfile:1.7
# MemPalace — GPU (NVIDIA CUDA) image.
#
# Multi-stage build using uv (the project ships a uv.lock, so we install from
# the frozen lockfile for reproducible images). The `gpu` extra pulls
# onnxruntime-gpu, which needs CUDA + cuDNN shared libraries at runtime, so
# this variant builds on an nvidia/cuda base instead of python:slim.
#
# The builder stage is dropped from the final image, so build tools
# (compilers, apt cache) never reach production — only the uv-managed
# interpreter and the resolved virtualenv do.
#
# Build:
# docker build -f Dockerfile.gpu -t mempalace:gpu .
#
# Run (requires the NVIDIA Container Toolkit on the host):
# docker run -i --rm --gpus all \
# -e MEMPALACE_EMBEDDING_DEVICE=cuda \
# -v mempalace-data:/data mempalace:gpu
#
# NOTE: onnxruntime-gpu ties itself to a CUDA major version. If embeddings
# fail to load on the GPU, align CUDA_IMAGE below with the CUDA release that
# the resolved onnxruntime-gpu wheel targets (see its release notes), then
# rebuild.
ARG CUDA_IMAGE=nvidia/cuda:12.6.3-cudnn-runtime-ubuntu22.04
# --- builder ----------------------------------------------------------------
FROM ${CUDA_IMAGE} AS builder
COPY --from=ghcr.io/astral-sh/uv:0.5 /uv /uvx /bin/
# Minimal toolchain for any source-built wheels; dropped before the runtime
# stage so compilers never ship in the production image.
RUN apt-get update \
&& apt-get install -y --no-install-recommends build-essential \
&& rm -rf /var/lib/apt/lists/*
ARG PYTHON_VERSION=3.12
ARG EXTRAS="extract,spellcheck,gpu"
ENV UV_COMPILE_BYTECODE=1 \
UV_LINK_MODE=copy \
UV_PYTHON_INSTALL_DIR=/opt/uv/python \
UV_PYTHON_PREFERENCE=only-managed
WORKDIR /app
# Layer 1: dependencies only (no project source). Bind-mounted files keep the
# project tree out of this layer, so changing source code does not bust the
# deps cache. The uv-managed interpreter is also installed here, into a
# stable path that the runtime stage can copy verbatim.
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=README.md,target=README.md \
set -e; \
uv python install ${PYTHON_VERSION}; \
flags=""; \
for e in $(echo "${EXTRAS}" | tr ',' ' '); do flags="${flags} --extra ${e}"; done; \
uv sync --frozen --no-install-project --no-dev --python ${PYTHON_VERSION} ${flags}
# Layer 2: the project itself. --no-editable installs mempalace into the
# venv's site-packages (instead of an .pth pointing at /app), so the runtime
# stage can copy only /app/.venv and drop the source tree.
COPY . /app
RUN --mount=type=cache,target=/root/.cache/uv \
set -e; \
flags=""; \
for e in $(echo "${EXTRAS}" | tr ',' ' '); do flags="${flags} --extra ${e}"; done; \
uv sync --frozen --no-dev --no-editable ${flags}
# --- runtime ----------------------------------------------------------------
FROM ${CUDA_IMAGE} AS runtime
LABEL org.opencontainers.image.title="MemPalace (GPU)" \
org.opencontainers.image.description="Local-first AI memory with CUDA-accelerated embeddings." \
org.opencontainers.image.source="https://github.com/MemPalace/mempalace" \
org.opencontainers.image.licenses="MIT"
# ca-certificates only — needed for the lazy HuggingFace model download on
# first use. No build toolchain in this stage.
RUN apt-get update \
&& apt-get install -y --no-install-recommends ca-certificates \
&& rm -rf /var/lib/apt/lists/*
ENV HOME=/data \
PATH="/app/.venv/bin:${PATH}" \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
MEMPALACE_EMBEDDING_DEVICE=cuda
# Non-root user owning the data volume.
RUN groupadd --gid 1000 mempalace \
&& useradd --uid 1000 --gid 1000 --home-dir /data --create-home mempalace
WORKDIR /app
# Bring the uv-managed interpreter and the resolved venv across the stage
# boundary. /opt/uv/python must be copied alongside .venv: the venv's
# shebangs and binary launcher reference it.
COPY --from=builder /opt/uv/python /opt/uv/python
COPY --from=builder --chown=mempalace:mempalace /app/.venv /app/.venv
COPY --chown=mempalace:mempalace docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
USER mempalace
VOLUME ["/data"]
ENTRYPOINT ["docker-entrypoint.sh"]
CMD ["mcp"]