Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/mirror.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ on:
- registry.k8s.io/git-sync/git-sync
- registry-1.docker.io/library/golang
- registry-1.docker.io/dxflrs/garage
- ghcr.io/astral-sh/uv
image-index-manifest-tag:
description: |
The image index manifest tag, like 1.0.14 or v1.0.14
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ All notable changes to this project will be documented in this file.
- superset: Add `6.1.0` ([#1514]).
- zookeeper: Add `3.9.5` ([#1515]).
- hadoop: Add `3.5.0` and `3.4.3` ([#1511]).
- airflow: Add `3.2.1` ([#1519]).

### Changed

Expand Down Expand Up @@ -64,6 +65,7 @@ All notable changes to this project will be documented in this file.
[#1514]: https://github.com/stackabletech/docker-images/pull/1514
[#1515]: https://github.com/stackabletech/docker-images/pull/1515
[#1518]: https://github.com/stackabletech/docker-images/pull/1518
[#1519]: https://github.com/stackabletech/docker-images/pull/1519
[#1520]: https://github.com/stackabletech/docker-images/pull/1520
[#1521]: https://github.com/stackabletech/docker-images/pull/1521
[#1525]: https://github.com/stackabletech/docker-images/pull/1525
Expand Down
77 changes: 40 additions & 37 deletions airflow/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,29 @@
# - SecretsUsedInArgOrEnv : OPA_AUTH_MANAGER is a false positive and breaks the build.

ARG GIT_SYNC_VERSION
ARG UV_VERSION

# For updated versions check https://github.com/kubernetes/git-sync/releases
# which should contain a image location (e.g. registry.k8s.io/git-sync/git-sync:v3.6.8)
FROM oci.stackable.tech/sdp/git-sync/git-sync:${GIT_SYNC_VERSION} AS gitsync-image

FROM oci.stackable.tech/stackable/astral-sh/uv:${UV_VERSION} AS uv-image

FROM local-image/shared/statsd-exporter AS statsd_exporter-builder

FROM local-image/vector AS opa-auth-manager-builder

ARG OPA_AUTH_MANAGER
ARG PYTHON_VERSION
ARG UV_VERSION
ARG STACKABLE_USER_UID

COPY airflow/opa-auth-manager/${OPA_AUTH_MANAGER} /tmp/opa-auth-manager

WORKDIR /tmp/opa-auth-manager

RUN <<EOF
microdnf update
microdnf install python${PYTHON_VERSION}-pip
microdnf clean all
COPY --from=uv-image --chown=${STACKABLE_USER_UID}:0 /uv /uvx /bin/

pip${PYTHON_VERSION} install --no-cache-dir uv==${UV_VERSION}
RUN <<EOF

# This folder is required by the tests to set up an sqlite database
mkdir /root/airflow
Expand All @@ -46,7 +46,6 @@ ARG STACKABLE_USER_UID
ARG NODEJS_VERSION
ARG S3FS_VERSION
ARG CYCLONEDX_BOM_VERSION
ARG UV_VERSION

# Airflow "extras" packages are listed here: https://airflow.apache.org/docs/apache-airflow/stable/extra-packages-ref.html
# They evolve over time and thus belong to the version-specific arguments.
Expand All @@ -62,6 +61,8 @@ ARG AIRFLOW_EXTRAS_EXTERNAL_SERVICES=""
ARG AIRFLOW_EXTRAS_LOCALLY_INSTALLED_SOFTWARE=""
ARG AIRFLOW_EXTRAS_OTHER=""

COPY --from=uv-image --chown=${STACKABLE_USER_UID}:0 /uv /uvx /bin/

RUN microdnf module enable -y nodejs:${NODEJS_VERSION} && \
microdnf update && \
microdnf install \
Expand All @@ -78,10 +79,6 @@ RUN microdnf module enable -y nodejs:${NODEJS_VERSION} && \
libpq-devel \
openldap-devel \
openssl-devel \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-devel \
python${PYTHON_VERSION}-pip \
python${PYTHON_VERSION}-wheel \
# The airflow odbc provider can compile without the development files (headers and libraries) (see https://github.com/stackabletech/docker-images/pull/683)
unixODBC \
# Needed for Airflow UI assets
Expand All @@ -103,23 +100,29 @@ COPY --chown=${STACKABLE_USER_UID}:0 airflow/stackable/patches/${PRODUCT_VERSION

WORKDIR /stackable

RUN <<EOF
# Set a cache directory so we can mount the cache for layers that use it.
ENV UV_CACHE_DIR=/var/cache/uv
# Don't use links because they won't work after `COPY --from` in the final stage.
ENV UV_LINK_MODE=copy
# Store the uv-managed Python inside /stackable/ so it gets included in the
# COPY --from=airflow-build-image step. Without this, the venv symlinks would
# point to /usr/local/share/uv/python/... which is not copied to the final image.
ENV UV_PYTHON_INSTALL_DIR=/stackable/python

# This installs the Python runtime
RUN uv venv --python "${PYTHON_VERSION}" --system-site-packages /stackable/app
Comment thread
sbernauer marked this conversation as resolved.

# Instead of activating the environment each time
ENV VIRTUAL_ENV=/stackable/app

RUN --mount=type=cache,target=/var/cache/uv <<EOF

# Compose comma-delimited AIRFLOW_EXTRAS
AIRFLOW_EXTRAS="$AIRFLOW_EXTRAS_CORE,$AIRFLOW_EXTRAS_META,$AIRFLOW_EXTRAS_PROVIDER_APACHE,$AIRFLOW_EXTRAS_EXTERNAL_SERVICES,$AIRFLOW_EXTRAS_LOCALLY_INSTALLED_SOFTWARE,$AIRFLOW_EXTRAS_OTHER"

# Removing duplicates
AIRFLOW_EXTRAS=$(echo "$AIRFLOW_EXTRAS" | tr ',' '\n' | awk 'NF > 0 {if (!seen[$0]++) print $0}' | tr '\n' ',' | sed 's/,$//')

python${PYTHON_VERSION} -m venv --system-site-packages /stackable/app

source /stackable/app/bin/activate

# Upgrade pip to the latest version
# Also install uv to get support for build constraints
pip install --no-cache-dir --upgrade pip
pip install --no-cache-dir uv==${UV_VERSION}

# Pin virtualenv due to a breaking change in 21.0.0 which is pulled
# in by hatch.
# See https://github.com/pypa/hatch/issues/2193
Expand All @@ -134,6 +137,7 @@ if [ -d "./airflow-core" ]; then
cd airflow-core/src/airflow/ui

# build front-end assets
# TODO: Consider making the pnpm version an ARG
npm install -g pnpm@10.18.2
pnpm install --frozen-lockfile
pnpm run build
Expand All @@ -143,13 +147,14 @@ if [ -d "./airflow-core" ]; then
cd ../../..
/root/.local/bin/hatch build -t wheel
# First install the full apache-airflow package to get all dependencies including database drivers
uv pip install --no-cache-dir apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT_VERSION} --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
uv pip install --python ${VIRTUAL_ENV}/bin/python apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT_VERSION} --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
# Then install the locally built core wheel to override the core package
uv pip install --no-cache-dir dist/apache_airflow_core-${PRODUCT_VERSION}-py3-none-any.whl[${AIRFLOW_EXTRAS}] --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
uv pip install --python ${VIRTUAL_ENV}/bin/python dist/apache_airflow_core-${PRODUCT_VERSION}-py3-none-any.whl[${AIRFLOW_EXTRAS}] --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
else
# Airflow 2.x
# build front-end assets
cd airflow/www
# TODO: Consider making the yarn version an ARG
npm install -g yarn@1.22.22
yarn install --frozen-lockfile
yarn run build
Expand All @@ -158,25 +163,22 @@ else
cd ../..
/root/.local/bin/hatch build -t wheel
# First install the full apache-airflow package to get all dependencies including database drivers
uv pip install --no-cache-dir apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT_VERSION} --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
uv pip install --python ${VIRTUAL_ENV}/bin/python apache-airflow[${AIRFLOW_EXTRAS}]==${PRODUCT_VERSION} --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
# Then install the locally built wheel to override with patched version
uv pip install --no-cache-dir dist/apache_airflow-${PRODUCT_VERSION}-py3-none-any.whl[${AIRFLOW_EXTRAS}] --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
uv pip install --python ${VIRTUAL_ENV}/bin/python dist/apache_airflow-${PRODUCT_VERSION}-py3-none-any.whl[${AIRFLOW_EXTRAS}] --constraint /tmp/constraints.txt --build-constraints /tmp/build-constraints.txt
fi

# Needed for pandas S3 integration to e.g. write and read csv and parquet files to/from S3
uv pip install --no-cache-dir s3fs==${S3FS_VERSION} cyclonedx-bom==${CYCLONEDX_BOM_VERSION}
# TODO: s3fs may already be installed via the s3fs extra and constraints. Check if this explicit install is still needed.
uv pip install --python ${VIRTUAL_ENV}/bin/python s3fs==${S3FS_VERSION} cyclonedx-bom==${CYCLONEDX_BOM_VERSION}
# Needed for OIDC
uv pip install --no-cache-dir Flask_OIDC==2.2.0 Flask-OpenID==1.3.1
uv pip install --python ${VIRTUAL_ENV}/bin/python Flask_OIDC==2.2.0 Flask-OpenID==1.3.1

uv pip install --no-cache-dir /tmp/opa_auth_manager-0.1.0-py3-none-any.whl
uv pip install --python ${VIRTUAL_ENV}/bin/python /tmp/opa_auth_manager-0.1.0-py3-none-any.whl

# Create the SBOM for Airflow
# Important: All `pip install` commands must be above this line, otherwise the SBOM will be incomplete
if [ "$PRODUCT_VERSION" == "2.9.3" ] || [ "$PRODUCT_VERSION" == "3.0.6" ]; then
cyclonedx-py environment --schema-version 1.5 --outfile /tmp/sbom.json
else
cyclonedx-py environment --spec-version 1.5 --output-file /tmp/sbom.json
fi
${VIRTUAL_ENV}/bin/cyclonedx-py environment --spec-version 1.5 --output-file /tmp/sbom.json
uv pip uninstall cyclonedx-bom

# Break circular dependencies by removing the apache-airflow dependency from the providers
Expand Down Expand Up @@ -218,8 +220,9 @@ LABEL name="Apache Airflow" \
description="This image is deployed by the Stackable Operator for Apache Airflow."

ENV HOME=/stackable
ENV AIRFLOW_USER_HOME_DIR=/stackable
ENV PATH=$PATH:/bin:$HOME/app/bin
ENV VIRTUAL_ENV=$HOME/app
ENV AIRFLOW_USER_HOME_DIR=$HOME
ENV PATH=$HOME/app/bin:$PATH:/bin
ENV AIRFLOW_HOME=$HOME/airflow

COPY --from=airflow-build-image --chown=${STACKABLE_USER_UID}:0 /stackable/ ${HOME}/
Expand All @@ -233,14 +236,15 @@ COPY --from=gitsync-image --chown=${STACKABLE_USER_UID}:0 /git-sync ${HOME}/git-

COPY airflow/licenses /licenses

COPY --from=uv-image --chown=${STACKABLE_USER_UID}:0 /uv /uvx /bin/

# Update image and install needed packages
RUN <<EOF
microdnf update

# git: Needed for the gitsync functionality
# openldap: Needed for authentication of clients against LDAP servers
# openssh-clients: We need the openssh libs for the gitsync functionality (the clone target could be e.g. git@github.com:org/repo.git)
# python: Airflow needs Python
microdnf install \
ca-certificates \
cyrus-sasl \
Expand All @@ -251,7 +255,6 @@ microdnf install \
openssh-clients \
openssl-libs \
openssl-pkcs11 \
python${PYTHON_VERSION} \
socat \
unixODBC
microdnf clean all
Expand Down
Loading