forked from apache/airflow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
463 lines (384 loc) · 17.3 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# THIS DOCKERFILE IS INTENDED FOR PRODUCTION USE AND DEPLOYMENT.
# NOTE! IT IS ALPHA-QUALITY FOR NOW - WE ARE IN A PROCESS OF TESTING IT
#
#
# This is a multi-segmented image. It actually contains two images:
#
# airflow-build-image - there all airflow dependencies can be installed (and
# built - for those dependencies that require
# build essentials). Airflow is installed there with
# --user switch so that all the dependencies are
# installed to ${HOME}/.local
#
# main - this is the actual production image that is much
# smaller because it does not contain all the build
# essentials. Instead the ${HOME}/.local folder
# is copied from the build-image - this way we have
# only result of installation and we do not need
# all the build essentials. This makes the image
# much smaller.
#
ARG AIRFLOW_VERSION="2.0.0.dev0"
ARG AIRFLOW_EXTRAS="async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,google,microsoft.azure,mysql,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv"
ARG ADDITIONAL_AIRFLOW_EXTRAS=""
ARG ADDITIONAL_PYTHON_DEPS=""
ARG AIRFLOW_HOME=/opt/airflow
ARG AIRFLOW_UID="50000"
ARG AIRFLOW_GID="50000"
ARG CASS_DRIVER_BUILD_CONCURRENCY="8"
ARG PYTHON_BASE_IMAGE="python:3.6-slim-buster"
ARG PYTHON_MAJOR_MINOR_VERSION="3.6"
ARG PIP_VERSION=20.2.4
##############################################################################################
# This is the build image where we build all dependencies
##############################################################################################
FROM ${PYTHON_BASE_IMAGE} as airflow-build-image
SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"]
ARG PYTHON_BASE_IMAGE
ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE}
ARG PYTHON_MAJOR_MINOR_VERSION
ENV PYTHON_MAJOR_MINOR_VERSION=${PYTHON_MAJOR_MINOR_VERSION}
ARG PIP_VERSION
ENV PIP_VERSION=${PIP_VERSION}
# Make sure noninteractive debian install is used and language variables set
ENV DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \
LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8
# Install curl and gnupg2 - needed for many other installation steps
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
curl \
gnupg2 \
&& apt-get autoremove -yqq --purge \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
ARG DEV_APT_DEPS="\
apt-transport-https \
apt-utils \
build-essential \
ca-certificates \
gnupg \
dirmngr \
freetds-bin \
freetds-dev \
gosu \
krb5-user \
ldap-utils \
libffi-dev \
libkrb5-dev \
libpq-dev \
libsasl2-2 \
libsasl2-dev \
libsasl2-modules \
libssl-dev \
locales \
lsb-release \
nodejs \
openssh-client \
postgresql-client \
python-selinux \
sasl2-bin \
software-properties-common \
sqlite3 \
sudo \
unixodbc \
unixodbc-dev \
yarn"
ENV DEV_APT_DEPS=${DEV_APT_DEPS}
ARG ADDITIONAL_DEV_APT_DEPS=""
ENV ADDITIONAL_DEV_APT_DEPS=${ADDITIONAL_DEV_APT_DEPS}
ARG DEV_APT_COMMAND="\
curl --fail --location https://deb.nodesource.com/setup_10.x | bash - \
&& curl https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add - > /dev/null \
&& echo 'deb https://dl.yarnpkg.com/debian/ stable main' > /etc/apt/sources.list.d/yarn.list"
ENV DEV_APT_COMMAND=${DEV_APT_COMMAND}
ARG ADDITIONAL_DEV_APT_COMMAND="echo"
ENV ADDITIONAL_DEV_APT_COMMAND=${ADDITIONAL_DEV_APT_COMMAND}
ARG ADDITIONAL_DEV_ENV_VARS=""
# Note missing man directories on debian-buster
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199
# Install basic and additional apt dependencies
RUN mkdir -pv /usr/share/man/man1 \
&& mkdir -pv /usr/share/man/man7 \
&& export ${ADDITIONAL_DEV_ENV_VARS?} \
&& bash -o pipefail -e -u -x -c "${DEV_APT_COMMAND}" \
&& bash -o pipefail -e -u -x -c "${ADDITIONAL_DEV_APT_COMMAND}" \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
${DEV_APT_DEPS} \
${ADDITIONAL_DEV_APT_DEPS} \
&& apt-get autoremove -yqq --purge \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
ARG INSTALL_MYSQL_CLIENT="true"
ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT}
COPY scripts/docker /scripts/docker
COPY docker-context-files /docker-context-files
# fix permission issue in Azure DevOps when running the script
RUN chmod a+x /scripts/docker/install_mysql.sh
RUN ./scripts/docker/install_mysql.sh dev
ARG AIRFLOW_REPO=apache/airflow
ENV AIRFLOW_REPO=${AIRFLOW_REPO}
ARG AIRFLOW_BRANCH=master
ENV AIRFLOW_BRANCH=${AIRFLOW_BRANCH}
ARG AIRFLOW_EXTRAS
ARG ADDITIONAL_AIRFLOW_EXTRAS=""
ENV AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS}${ADDITIONAL_AIRFLOW_EXTRAS:+,}${ADDITIONAL_AIRFLOW_EXTRAS}
ARG AIRFLOW_CONSTRAINTS_REFERENCE="constraints-master"
ARG AIRFLOW_CONSTRAINTS_LOCATION="https://raw.githubusercontent.com/apache/airflow/${AIRFLOW_CONSTRAINTS_REFERENCE}/constraints-${PYTHON_MAJOR_MINOR_VERSION}.txt"
ENV AIRFLOW_CONSTRAINTS_LOCATION=${AIRFLOW_CONSTRAINTS_LOCATION}
ENV PATH=${PATH}:/root/.local/bin
RUN mkdir -p /root/.local/bin
RUN if [[ -f /docker-context-files/.pypirc ]]; then \
cp /docker-context-files/.pypirc /root/.pypirc; \
fi
RUN pip install --upgrade "pip==${PIP_VERSION}"
# By default we do not use pre-cached packages, but in CI/Breeze environment we override this to speed up
# builds in case setup.py/setup.cfg changed. This is pure optimisation of CI/Breeze builds.
ARG AIRFLOW_PRE_CACHED_PIP_PACKAGES="false"
ENV AIRFLOW_PRE_CACHED_PIP_PACKAGES=${AIRFLOW_PRE_CACHED_PIP_PACKAGES}
# In case of Production build image segment we want to pre-install master version of airflow
# dependencies from GitHub so that we do not have to always reinstall it from the scratch.
RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" ]]; then \
if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then \
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,}; \
fi; \
pip install --user \
"https://github.com/${AIRFLOW_REPO}/archive/${AIRFLOW_BRANCH}.tar.gz#egg=apache-airflow[${AIRFLOW_EXTRAS}]" \
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" \
&& pip uninstall --yes apache-airflow; \
fi
# By default we install latest airflow from PyPI so we do not need to copy sources of Airflow
# but in case of breeze/CI builds we use latest sources and we override those
# those SOURCES_FROM/TO with "." and "/opt/airflow" respectively
ARG AIRFLOW_SOURCES_FROM="empty"
ENV AIRFLOW_SOURCES_FROM=${AIRFLOW_SOURCES_FROM}
ARG AIRFLOW_SOURCES_TO="/empty"
ENV AIRFLOW_SOURCES_TO=${AIRFLOW_SOURCES_TO}
COPY ${AIRFLOW_SOURCES_FROM} ${AIRFLOW_SOURCES_TO}
ARG CASS_DRIVER_BUILD_CONCURRENCY
ENV CASS_DRIVER_BUILD_CONCURRENCY=${CASS_DRIVER_BUILD_CONCURRENCY}
# This is airflow version that is put in the label of the image build
ARG AIRFLOW_VERSION
ENV AIRFLOW_VERSION=${AIRFLOW_VERSION}
ARG ADDITIONAL_PYTHON_DEPS=""
ENV ADDITIONAL_PYTHON_DEPS=${ADDITIONAL_PYTHON_DEPS}
# Determines the way airflow is installed. By default we install airflow from PyPI `apache-airflow` package
# But it also can be `.` from local installation or GitHub URL pointing to specific branch or tag
# Of Airflow. Note That for local source installation you need to have local sources of
# Airflow checked out together with the Dockerfile and AIRFLOW_SOURCES_FROM and AIRFLOW_SOURCES_TO
# set to "." and "/opt/airflow" respectively.
ARG AIRFLOW_INSTALLATION_METHOD="apache-airflow"
ENV AIRFLOW_INSTALLATION_METHOD=${AIRFLOW_INSTALLATION_METHOD}
# By default latest released version of airflow is installed (when empty) but this value can be overriden
# and we can install specific version of airflow this way.
ARG AIRFLOW_INSTALL_VERSION=""
ENV AIRFLOW_INSTALL_VERSION=${AIRFLOW_INSTALL_VERSION}
# We can seet this value to true in case we want to install .whl .tar.gz packages placed in the
# docker-context-files folder. This can be done for both - additional packages you want to install
# and for airflow as well (you have to set INSTALL_FROM_PYPI to false in this case)
ARG INSTALL_FROM_DOCKER_CONTEXT_FILES=""
ENV INSTALL_FROM_DOCKER_CONTEXT_FILES=${INSTALL_FROM_DOCKER_CONTEXT_FILES}
# By default we install latest airflow from PyPI. You can set it to false if you want to install
# Airflow from the .whl or .tar.gz packages placed in `docker-context-files` folder.
ARG INSTALL_FROM_PYPI="true"
ENV INSTALL_FROM_PYPI=${INSTALL_FROM_PYPI}
# By default we install providers from PyPI but in case of Breze build we want to install providers
# from local sources without the neeed of preparing provider packages upfront. This value is
# automatically overridden by Breeze scripts.
ARG INSTALL_PROVIDERS_FROM_SOURCES="false"
ENV INSTALL_PROVIDERS_FROM_SOURCES=${INSTALL_PROVIDERS_FROM_SOURCES}
WORKDIR /opt/airflow
# remove mysql from extras if client is not installed
RUN if [[ ${INSTALL_MYSQL_CLIENT} != "true" ]]; then \
AIRFLOW_EXTRAS=${AIRFLOW_EXTRAS/mysql,}; \
fi; \
if [[ ${INSTALL_FROM_PYPI} == "true" ]]; then \
pip install --user "${AIRFLOW_INSTALLATION_METHOD}[${AIRFLOW_EXTRAS}]${AIRFLOW_INSTALL_VERSION}" \
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
fi; \
if [[ -n "${ADDITIONAL_PYTHON_DEPS}" ]]; then \
pip install --user ${ADDITIONAL_PYTHON_DEPS} --constraint "${AIRFLOW_CONSTRAINTS_LOCATION}"; \
fi; \
if [[ ${INSTALL_FROM_DOCKER_CONTEXT_FILES} == "true" ]]; then \
if ls /docker-context-files/*.{whl,tar.gz} 1> /dev/null 2>&1; then \
pip install --user --no-deps /docker-context-files/*.{whl,tar.gz}; \
fi ; \
fi; \
find /root/.local/ -name '*.pyc' -print0 | xargs -0 rm -r || true ; \
find /root/.local/ -type d -name '__pycache__' -print0 | xargs -0 rm -r || true
RUN AIRFLOW_SITE_PACKAGE="/root/.local/lib/python${PYTHON_MAJOR_MINOR_VERSION}/site-packages/airflow"; \
if [[ -f "${AIRFLOW_SITE_PACKAGE}/www_rbac/package.json" ]]; then \
WWW_DIR="${AIRFLOW_SITE_PACKAGE}/www_rbac"; \
elif [[ -f "${AIRFLOW_SITE_PACKAGE}/www/package.json" ]]; then \
WWW_DIR="${AIRFLOW_SITE_PACKAGE}/www"; \
fi; \
if [[ ${WWW_DIR:=} != "" ]]; then \
yarn --cwd "${WWW_DIR}" install --frozen-lockfile --no-cache; \
yarn --cwd "${WWW_DIR}" run prod; \
rm -rf "${WWW_DIR}/node_modules"; \
rm -vf "${WWW_DIR}"/{package.json,yarn.lock,.eslintignore,.eslintrc,.stylelintignore,.stylelintrc,compile_assets.sh,webpack.config.js} ;\
fi
# make sure that all directories and files in .local are also group accessible
RUN find /root/.local -executable -print0 | xargs --null chmod g+x && \
find /root/.local -print0 | xargs --null chmod g+rw
ARG BUILD_ID
ENV BUILD_ID=${BUILD_ID}
ARG COMMIT_SHA
ENV COMMIT_SHA=${COMMIT_SHA}
LABEL org.apache.airflow.distro="debian" \
org.apache.airflow.distro.version="buster" \
org.apache.airflow.module="airflow" \
org.apache.airflow.component="airflow" \
org.apache.airflow.image="airflow-build-image" \
org.apache.airflow.version="${AIRFLOW_VERSION}" \
org.apache.airflow.buildImage.buildId=${BUILD_ID} \
org.apache.airflow.buildImage.commitSha=${COMMIT_SHA}
##############################################################################################
# This is the actual Airflow image - much smaller than the build one. We copy
# installed Airflow and all it's dependencies from the build image to make it smaller.
##############################################################################################
FROM ${PYTHON_BASE_IMAGE} as main
SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"]
ARG AIRFLOW_UID
ARG AIRFLOW_GID
LABEL org.apache.airflow.distro="debian" \
org.apache.airflow.distro.version="buster" \
org.apache.airflow.module="airflow" \
org.apache.airflow.component="airflow" \
org.apache.airflow.image="airflow" \
org.apache.airflow.uid="${AIRFLOW_UID}" \
org.apache.airflow.gid="${AIRFLOW_GID}"
ARG PYTHON_BASE_IMAGE
ENV PYTHON_BASE_IMAGE=${PYTHON_BASE_IMAGE}
ARG AIRFLOW_VERSION
ENV AIRFLOW_VERSION=${AIRFLOW_VERSION}
# Make sure noninteractive debian install is used and language variables set
ENV DEBIAN_FRONTEND=noninteractive LANGUAGE=C.UTF-8 LANG=C.UTF-8 LC_ALL=C.UTF-8 \
LC_CTYPE=C.UTF-8 LC_MESSAGES=C.UTF-8
ARG PIP_VERSION
ENV PIP_VERSION=${PIP_VERSION}
# Install curl and gnupg2 - needed for many other installation steps
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
curl \
gnupg2 \
&& apt-get autoremove -yqq --purge \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
ARG RUNTIME_APT_DEPS="\
apt-transport-https \
apt-utils \
ca-certificates \
curl \
dumb-init \
freetds-bin \
gnupg \
gosu \
krb5-user \
ldap-utils \
libffi6 \
libsasl2-2 \
libsasl2-modules \
libssl1.1 \
locales \
lsb-release \
netcat \
openssh-client \
postgresql-client \
rsync \
sasl2-bin \
sqlite3 \
sudo \
unixodbc"
ENV RUNTIME_APT_DEPS=${RUNTIME_APT_DEPS}
ARG ADDITIONAL_RUNTIME_APT_DEPS=""
ENV ADDITIONAL_RUNTIME_APT_DEPS=${ADDITIONAL_RUNTIME_APT_DEPS}
ARG RUNTIME_APT_COMMAND="echo"
ENV RUNTIME_APT_COMMAND=${RUNTIME_APT_COMMAND}
ARG ADDITIONAL_RUNTIME_APT_COMMAND=""
ENV ADDITIONAL_RUNTIME_APT_COMMAND=${ADDITIONAL_RUNTIME_APT_COMMAND}
ARG ADDITIONAL_RUNTIME_ENV_VARS=""
# Note missing man directories on debian-buster
# https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=863199
# Install basic and additional apt dependencies
RUN mkdir -pv /usr/share/man/man1 \
&& mkdir -pv /usr/share/man/man7 \
&& export ${ADDITIONAL_RUNTIME_ENV_VARS?} \
&& bash -o pipefail -e -u -x -c "${RUNTIME_APT_COMMAND}" \
&& bash -o pipefail -e -u -x -c "${ADDITIONAL_RUNTIME_APT_COMMAND}" \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
${RUNTIME_APT_DEPS} \
${ADDITIONAL_RUNTIME_APT_DEPS} \
&& apt-get autoremove -yqq --purge \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
ARG INSTALL_MYSQL_CLIENT="true"
ENV INSTALL_MYSQL_CLIENT=${INSTALL_MYSQL_CLIENT}
COPY scripts/docker /scripts/docker
# fix permission issue in Azure DevOps when running the script
RUN chmod a+x /scripts/docker/install_mysql.sh
RUN ./scripts/docker/install_mysql.sh prod
ENV AIRFLOW_UID=${AIRFLOW_UID}
ENV AIRFLOW_GID=${AIRFLOW_GID}
ENV AIRFLOW__CORE__LOAD_EXAMPLES="false"
ARG AIRFLOW_USER_HOME_DIR=/home/airflow
ENV AIRFLOW_USER_HOME_DIR=${AIRFLOW_USER_HOME_DIR}
RUN addgroup --gid "${AIRFLOW_GID}" "airflow" && \
adduser --quiet "airflow" --uid "${AIRFLOW_UID}" \
--gid "${AIRFLOW_GID}" \
--home "${AIRFLOW_USER_HOME_DIR}"
ARG AIRFLOW_HOME
ENV AIRFLOW_HOME=${AIRFLOW_HOME}
# Make Airflow files belong to the root group and are accessible. This is to accomodate the guidelines from
# OpenShift https://docs.openshift.com/enterprise/3.0/creating_images/guidelines.html
RUN mkdir -pv "${AIRFLOW_HOME}"; \
mkdir -pv "${AIRFLOW_HOME}/dags"; \
mkdir -pv "${AIRFLOW_HOME}/logs"; \
chown -R "airflow:root" "${AIRFLOW_USER_HOME_DIR}" "${AIRFLOW_HOME}"; \
find "${AIRFLOW_HOME}" -executable -print0 | xargs --null chmod g+x && \
find "${AIRFLOW_HOME}" -print0 | xargs --null chmod g+rw
COPY --chown=airflow:root --from=airflow-build-image /root/.local "${AIRFLOW_USER_HOME_DIR}/.local"
COPY --chown=airflow:root scripts/in_container/prod/entrypoint_prod.sh /entrypoint
COPY --chown=airflow:root scripts/in_container/prod/clean-logs.sh /clean-logs
RUN chmod a+x /entrypoint /clean-logs
RUN pip install --upgrade "pip==${PIP_VERSION}"
# Make /etc/passwd root-group-writeable so that user can be dynamically added by OpenShift
# See https://github.com/apache/airflow/issues/9248
RUN chmod g=u /etc/passwd
ENV PATH="${AIRFLOW_USER_HOME_DIR}/.local/bin:${PATH}"
ENV GUNICORN_CMD_ARGS="--worker-tmp-dir /dev/shm"
WORKDIR ${AIRFLOW_HOME}
EXPOSE 8080
USER ${AIRFLOW_UID}
ARG BUILD_ID
ENV BUILD_ID=${BUILD_ID}
ARG COMMIT_SHA
ENV COMMIT_SHA=${COMMIT_SHA}
LABEL org.apache.airflow.distro="debian" \
org.apache.airflow.distro.version="buster" \
org.apache.airflow.module="airflow" \
org.apache.airflow.component="airflow" \
org.apache.airflow.image="airflow" \
org.apache.airflow.version="${AIRFLOW_VERSION}" \
org.apache.airflow.uid="${AIRFLOW_UID}" \
org.apache.airflow.gid="${AIRFLOW_GID}" \
org.apache.airflow.mainImage.buildId=${BUILD_ID} \
org.apache.airflow.mainImage.commitSha=${COMMIT_SHA}
ENTRYPOINT ["/usr/bin/dumb-init", "--", "/entrypoint"]
CMD ["--help"]