Skip to content

Commit

Permalink
Merge python container into one build stage
Browse files Browse the repository at this point in the history
  • Loading branch information
nerdcha committed Sep 8, 2016
1 parent 1d94627 commit de08b18
Showing 1 changed file with 190 additions and 6 deletions.
196 changes: 190 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,199 @@
FROM kaggle/python3:latest
FROM continuumio/anaconda3:latest

RUN apt-get update && apt-get install -y build-essential && \
cd /usr/local/src && \
# https://github.com/tensorflow/tensorflow/issues/64#issuecomment-155270240
# Why does this work, when `pip install tensorflow` fails? It is a mystery
wget https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl && \
pip install tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl && \
# Vowpal Rabbit
apt-get install -y libboost-program-options-dev zlib1g-dev libboost-python-dev && \
cd /usr/lib/x86_64-linux-gnu/ && rm -f libboost_python.a && rm -f libboost_python.so && \
ln -sf libboost_python-py34.so libboost_python.so && ln -sf libboost_python-py34.a libboost_python.a && \
pip install vowpalwabbit && \
pip install seaborn python-dateutil spacy dask pytagcloud pyyaml ggplot joblib \
husl geopy ml_metrics mne pyshp gensim && \
# The apt-get version of imagemagick is out of date and has compatibility issues, so we build from source
apt-get -y install dbus fontconfig fontconfig-config fonts-dejavu-core fonts-droid ghostscript gsfonts hicolor-icon-theme \
libavahi-client3 libavahi-common-data libavahi-common3 libcairo2 libcap-ng0 libcroco3 \
libcups2 libcupsfilters1 libcupsimage2 libdatrie1 libdbus-1-3 libdjvulibre-text libdjvulibre21 libfftw3-double3 libfontconfig1 \
libfreetype6 libgdk-pixbuf2.0-0 libgdk-pixbuf2.0-common libgomp1 libgraphite2-3 libgs9 libgs9-common libharfbuzz0b libijs-0.35 \
libilmbase6 libjasper1 libjbig0 libjbig2dec0 libjpeg62-turbo liblcms2-2 liblqr-1-0 libltdl7 libmagickcore-6.q16-2 \
libmagickcore-6.q16-2-extra libmagickwand-6.q16-2 libnetpbm10 libopenexr6 libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 \
libpaper-utils libpaper1 libpixman-1-0 libpng12-0 librsvg2-2 librsvg2-common libthai-data libthai0 libtiff5 libwmf0.2-7 \
libxcb-render0 libxcb-shm0 netpbm poppler-data && \
wget http://www.imagemagick.org/download/ImageMagick-7.0.3-0.tar.gz && \
tar xzf ImageMagick-7.0.3-0.tar.gz && cd ImageMagick-7.0.3-0 && ./configure && \
make -j $(nproc) && make install && \
# clean up ImageMagick source files
cd ../ && rm -rf ImageMagick-7.0.3* && \
apt-get -y install libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev && \
apt-get -y install libtbb2 libtbb-dev libjpeg-dev libtiff-dev libjasper-dev && \
apt-get -y install cmake && \
cd /usr/local/src && git clone --depth 1 https://github.com/Itseez/opencv.git && \
cd opencv && \
mkdir build && cd build && \
cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_TBB=ON -D WITH_FFMPEG=OFF -D WITH_V4L=ON -D WITH_QT=OFF -D WITH_OPENGL=ON -D PYTHON3_LIBRARY=/opt/conda/lib/libpython3.5m.so -D PYTHON3_INCLUDE_DIR=/opt/conda/include/python3.5m/ -D PYTHON_LIBRARY=/opt/conda/lib/libpython3.5m.so -D PYTHON_INCLUDE_DIR=/opt/conda/include/python3.5m/ -D BUILD_PNG=TRUE .. && \
make -j $(nproc) && make install && \
echo "/usr/local/lib/python3.5/site-packages" > /etc/ld.so.conf.d/opencv.conf && ldconfig && \
cp /usr/local/lib/python3.5/site-packages/cv2.cpython-35m-x86_64-linux-gnu.so /opt/conda/lib/python3.5/site-packages/ && \
# Clean up install cruft
rm -rf /usr/local/src/opencv && \
rm /usr/local/src/tensorflow-0.10.0rc0-cp35-cp35m-linux_x86_64.whl && \
rm -rf /root/.cache/pip/* && \
apt-get autoremove -y && apt-get clean

RUN apt-get install -y libfreetype6-dev && \
apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing && \
# textblob
pip install textblob && \
#word cloud
pip install git+git://github.com/amueller/word_cloud.git && \
#igraph
pip install python-igraph && \
#xgboost
cd /usr/local/src && mkdir xgboost && cd xgboost && \
git clone --depth 1 --recursive https://github.com/dmlc/xgboost.git && cd xgboost && \
make && cd python-package && python setup.py install && \
#lasagne
cd /usr/local/src && mkdir Lasagne && cd Lasagne && \
git clone --depth 1 https://github.com/Lasagne/Lasagne.git && cd Lasagne && \
pip install -r requirements.txt && python setup.py install && \
#keras
cd /usr/local/src && mkdir keras && cd keras && \
git clone --depth 1 https://github.com/fchollet/keras.git && \
cd keras && python setup.py install && \
#neon
cd /usr/local/src && \
git clone --depth 1 https://github.com/NervanaSystems/neon.git && \
cd neon && pip install -e . && \
#nolearn
cd /usr/local/src && mkdir nolearn && cd nolearn && \
git clone --depth 1 https://github.com/dnouri/nolearn.git && cd nolearn && \
echo "x" > README.rst && echo "x" > CHANGES.rst && \
python setup.py install && \
# Dev branch of Theano
pip install git+git://github.com/Theano/Theano.git --upgrade --no-deps && \
# put theano compiledir inside /tmp (it needs to be in writable dir)
printf "[global]\nbase_compiledir = /tmp/.theano\n" > /.theanorc && \
cd /usr/local/src && git clone --depth 1 https://github.com/pybrain/pybrain && \
cd pybrain && python setup.py install && \
# Base ATLAS plus tSNE
apt-get install -y libatlas-base-dev && \
# NOTE: we provide the tsne package, but sklearn.manifold.TSNE now does the same
# job
cd /usr/local/src && git clone --depth 1 https://github.com/danielfrg/tsne.git && \
cd tsne && python setup.py install && \
cd /usr/local/src && git clone --depth 1 https://github.com/ztane/python-Levenshtein && \
cd python-Levenshtein && python setup.py install && \
cd /usr/local/src && git clone --depth 1 https://github.com/arogozhnikov/hep_ml.git && \
cd hep_ml && pip install . && \
# chainer
pip install chainer && \
# NLTK Project datasets
mkdir -p /usr/share/nltk_data && \
# NLTK Downloader no longer continues smoothly after an error, so we explicitly list
# the corpuses that work
python -m nltk.downloader -d /usr/share/nltk_data abc alpino \
averaged_perceptron_tagger basque_grammars biocreative_ppi bllip_wsj_no_aux \
book_grammars brown brown_tei cess_cat cess_esp chat80 city_database cmudict \
comparative_sentences comtrans conll2000 conll2002 conll2007 crubadan dependency_treebank \
europarl_raw floresta framenet_v15 gazetteers genesis gutenberg hmm_treebank_pos_tagger \
ieer inaugural indian jeita kimmo knbc large_grammars lin_thesaurus mac_morpho machado \
masc_tagged maxent_ne_chunker maxent_treebank_pos_tagger moses_sample movie_reviews \
mte_teip5 names nps_chat omw opinion_lexicon panlex_swadesh paradigms \
pil pl196x ppattach problem_reports product_reviews_1 product_reviews_2 propbank \
pros_cons ptb punkt qc reuters rslp rte sample_grammars semcor senseval sentence_polarity \
sentiwordnet shakespeare sinica_treebank smultron snowball_data spanish_grammars \
state_union stopwords subjectivity swadesh switchboard tagsets timit toolbox treebank \
twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \
verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe && \
# Stop-words
pip install stop-words && \
# clean up
find /usr/share/nltk_data/ -name *.zip | xargs -n1 -I@ rm @ && \
rm -rf /root/.cache/pip/* && \
apt-get autoremove -y && apt-get clean && \
rm -rf /usr/local/src/*

# Install OpenCV-3 with Python support
RUN apt-get update && \
cd /usr/local/src/opencv && \
mkdir build && cd build && \
cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_TBB=ON -D WITH_FFMPEG=OFF -D WITH_V4L=ON -D WITH_QT=OFF -D WITH_OPENGL=ON -D PYTHON3_LIBRARY=/opt/conda/lib/libpython3.5m.so -D PYTHON3_INCLUDE_DIR=/opt/conda/include/python3.5m/ -D PYTHON_LIBRARY=/opt/conda/lib/libpython3.5m.so -D PYTHON_INCLUDE_DIR=/opt/conda/include/python3.5m/ -D BUILD_PNG=TRUE .. && \
make && make install && \
echo "/usr/local/lib/python3.5/site-packages" > /etc/ld.so.conf.d/opencv.conf && ldconfig && \
cp /usr/local/lib/python3.5/site-packages/cv2.cpython-35m-x86_64-linux-gnu.so /opt/conda/lib/python3.5/site-packages/ && \
apt-get -y install libgeos-dev && \
# pyshp and pyproj are now external dependencies of Basemap
pip install pyshp pyproj && \
cd /usr/local/src && git clone https://github.com/matplotlib/basemap.git && \
export GEOS_DIR=/usr/local && \
cd basemap && python setup.py install && \
# Pillow (PIL)
apt-get -y install zlib1g-dev liblcms2-dev libwebp-dev && \
pip install Pillow && \
cd /usr/local/src && git clone https://github.com/vitruvianscience/opendeep.git && \
cd opendeep && python setup.py develop && \
# sasl is apparently an ibis dependency
apt-get -y install libsasl2-dev && \
pip install ibis-framework && \
# Cartopy plus dependencies
yes | conda install proj4 && \
pip install packaging && \
cd /usr/local/src && git clone https://github.com/Toblerity/Shapely.git && \
cd Shapely && python setup.py install && \
cd /usr/local/src && git clone https://github.com/SciTools/cartopy.git && \
cd cartopy && python setup.py install && \
# MXNet
cd /usr/local/src && git clone --recursive https://github.com/dmlc/mxnet && \
cd /usr/local/src/mxnet && cp make/config.mk . && \
sed -i 's/ADD_LDFLAGS =/ADD_LDFLAGS = -lstdc++/' config.mk && \
make && cd python && python setup.py install && \
# set backend for matplotlib to Agg
matplotlibrc_path=$(python -c "import site, os, fileinput; packages_dir = site.getsitepackages()[0]; print(os.path.join(packages_dir, 'matplotlib', 'mpl-data', 'matplotlibrc'))") && \
sed -i 's/^backend : Qt4Agg/backend : Agg/' $matplotlibrc_path && \
# Stop jupyter nbconvert trying to rewrite its folder hierarchy
mkdir -p /root/.jupyter && touch /root/.jupyter/jupyter_nbconvert_config.py && touch /root/.jupyter/migrated && \
mkdir -p /.jupyter && touch /.jupyter/jupyter_nbconvert_config.py && touch /.jupyter/migrated && \
# Stop Matplotlib printing junk to the console on first load
sed -i "s/^.*Matplotlib is building the font cache using fc-list.*$/# Warning removed by Kaggle/g" /opt/conda/lib/python3.5/site-packages/matplotlib/font_manager.py && \
# Make matplotlib output in Jupyter notebooks display correctly
mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/ipython_config.py && \
# h2o
# This requires python-software-properties and Java.
apt-get install -y python-software-properties zip && \
echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" | tee -a /etc/apt/sources.list && echo "deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" | tee -a /etc/apt/sources.list && apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys EEA14886 C857C906 2B90D010 && \
apt-get update && \
echo debconf shared/accepted-oracle-license-v1-1 select true | debconf-set-selections && \
echo debconf shared/accepted-oracle-license-v1-1 seen true | debconf-set-selections && \
apt-get install -y oracle-java8-installer && \
cd /usr/local/src && mkdir h2o && cd h2o && \
wget http://h2o-release.s3.amazonaws.com/h2o/latest_stable -O latest && \
wget --no-check-certificate -i latest -O h2o.zip && rm latest && \
unzip h2o.zip && rm h2o.zip && cp h2o-*/h2o.jar . && \
pip install `find . -name "*whl"` && \
# Keras setup
# Keras likes to add a config file in a custom directory when it's
# first imported. This doesn't work with our read-only filesystem, so we
# have it done now
python -c "from keras.models import Sequential" && \
# Switch to TF backend
sed -i 's/theano/tensorflow/' /root/.keras/keras.json && \
# Re-run it to flush any more disk writes
python -c "from keras.models import Sequential; from keras import backend; print(backend._BACKEND)" && \
# Keras reverts to /tmp from ~ when it detects a read-only file system
mkdir -p /tmp/.keras && cp /root/.keras/keras.json /tmp/.keras && \
# Scikit-Learn nightly build
RUN cd /usr/local/src && git clone https://github.com/scikit-learn/scikit-learn.git && \
cd /usr/local/src && git clone https://github.com/scikit-learn/scikit-learn.git && \
cd scikit-learn && python setup.py build && python setup.py install && \
# HDF5 support
conda install h5py && \
# https://github.com/biopython/biopython
pip install biopython
pip install biopython && \
rm -rf /root/.cache/pip/* && \
apt-get autoremove -y && apt-get clean && \
rm -rf /usr/local/src/*

###########
#
Expand Down Expand Up @@ -42,6 +229,3 @@ RUN pip install --upgrade mpld3 && \
pip install missingno && \
pip install pandas-profiling && \
pip install s2sphere



0 comments on commit de08b18

Please sign in to comment.