-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
93 lines (72 loc) · 3.32 KB
/
Copy pathDockerfile
File metadata and controls
93 lines (72 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# COMPASS Platform — Docker build for Railway (frontend + API)
# Stage 0: Build frontend static files (Node.js discarded after this stage)
FROM node:20-slim AS frontend
WORKDIR /ui
COPY compass-ui/package.json compass-ui/package-lock.json ./
RUN npm ci --no-audit --no-fund
COPY compass-ui/ ./
RUN npm run build
# Stage 1: Build Python packages that need compilers
FROM python:3.11-slim AS builder
WORKDIR /build
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc g++ libffi-dev git && \
rm -rf /var/lib/apt/lists/*
COPY pyproject.toml README.md ./
COPY compass/ ./compass/
# Install CPU-only PyTorch first (small ~200MB vs ~2GB for CUDA)
RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
# disc covers scikit-learn + lightgbm; skip ml extra (umap-learn/numba not needed at runtime)
RUN pip install --no-cache-dir -e ".[primers,api,viz,disc]"
RUN pip install --no-cache-dir xgboost>=2.0
# RNA-FM package (structural embeddings for crRNA folding/accessibility)
RUN cd /tmp && git clone --depth 1 https://github.com/ml4bio/RNA-FM.git && \
cd RNA-FM && touch README_backup.md && pip install --no-cache-dir -e . && \
cd / && rm -rf /tmp/RNA-FM/.git
# RNA-FM weights: downloaded at first pipeline run from HuggingFace CDN (~30s)
COPY scripts/download_rnafm.py /app/scripts/download_rnafm.py
# Stage 2: Lean runtime (no compilers)
FROM python:3.11-slim
WORKDIR /app
# Only bowtie2 at runtime
RUN apt-get update && apt-get install -y --no-install-recommends \
bowtie2 libgomp1 && \
rm -rf /var/lib/apt/lists/*
# Copy installed packages from builder (includes RNA-FM, xgboost, etc.)
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin
# Copy RNA-FM source from builder (editable install needs it)
COPY --from=builder /tmp/RNA-FM /tmp/RNA-FM
# Application code
COPY pyproject.toml README.md ./
COPY compass/ ./compass/
COPY api/ ./api/
COPY configs/ ./configs/
COPY data/ ./data/
# Frontend static files (built in Stage 0, ~5MB)
COPY --from=frontend /ui/dist ./compass-ui/dist/
# Compass-ML model package (architecture + checkpoints + features)
COPY compass-net/ ./compass-net/
# Reference genome download script
COPY scripts/download_references.py ./scripts/download_references.py
# Editable install (egg-link only, no downloads)
RUN pip install --no-cache-dir --no-deps -e .
# Download reference genomes for all supported organisms
RUN python scripts/download_references.py
# Build Bowtie2 indices for all organisms
RUN bowtie2-build data/references/H37Rv.fasta data/references/H37Rv && \
bowtie2-build data/references/ecoli_K12.fasta data/references/ecoli_K12 && \
bowtie2-build data/references/saureus_NCTC8325.fasta data/references/saureus_NCTC8325 && \
bowtie2-build data/references/ngono_FA1090.fasta data/references/ngono_FA1090
RUN mkdir -p results/api results/panels results/validation
# Memory optimisation for constrained Railway containers
ENV MALLOC_TRIM_THRESHOLD_=0
ENV PYTORCH_NO_CUDA_MEMORY_CACHING=1
# PyTorch CPU threading — 4 threads optimal for Railway shared 8 vCPU
ENV OMP_NUM_THREADS=4
ENV MKL_NUM_THREADS=4
ENV TORCH_NUM_THREADS=4
# Railway sets $PORT dynamically via env var
ENV PORT=8000
EXPOSE 8000
CMD sh -c "uvicorn api.main:app --host 0.0.0.0 --port $PORT"