Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,21 @@ FROM python:3.13-bookworm

WORKDIR /home/airflow

# Install Java
# Install Java (Updated with architecture-agnostic symlink for Apple Silicon/ARM64 support)
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
ca-certificates \
openjdk-17-jdk \
wget \
make \
procps \
&& rm -rf /var/lib/apt/lists/*
&& rm -rf /var/lib/apt/lists/* \
&& ln -s /usr/lib/jvm/java-17-openjdk-* /usr/lib/jvm/java-17-generic

ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
ENV JAVA_HOME=/usr/lib/jvm/java-17-generic
ENV PATH=$JAVA_HOME/bin:$PATH

# Install Spark
# Install Spark
ENV SPARK_VERSION=4.0.1
ENV SPARK_HOME=/opt/spark
RUN wget -q https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz && \
Expand Down Expand Up @@ -46,7 +47,9 @@ ENV AIRFLOW_VERSION=3.1.3
ENV PYTHON_VERSION=3.13
ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-no-providers-${PYTHON_VERSION}.txt"

# Create virtual environment and explicitly add it to the global PATH
RUN uv venv /home/airflow/.venv
ENV PATH="/home/airflow/.venv/bin:$PATH"
RUN uv pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
RUN uv pip install pyspark==4.0.1 'pyspark[sql]==4.0.1'
RUN uv pip install ruff
Expand All @@ -58,7 +61,7 @@ RUN uv pip install plotly
# Copy IPython startup scripts
COPY ./ipython_scripts/startup/ /root/.ipython/profile_default/startup/

# mkdir warehouse and spark-events folder
# mkdir warehouse and spark-events folder
RUN mkdir -p /home/airflow/warehouse
RUN mkdir -p /home/airflow/spark-events

Expand Down