Skip to content

Commit

Permalink
[DEV][add] add dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
fchen authored and fchen committed Nov 2, 2018
1 parent 80fab8b commit 81927b0
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 0 deletions.
53 changes: 53 additions & 0 deletions dev/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
FROM debian:latest

# $ docker build . -t continuumio/miniconda3:latest -t continuumio/miniconda3:4.5.11
# $ docker run --rm -it continuumio/miniconda3:latest /bin/bash
# $ docker push continuumio/miniconda3:latest
# $ docker push continuumio/miniconda3:4.5.11

ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV PATH /opt/conda/bin:$PATH

RUN apt-get update --fix-missing && \
apt-get install -y openjdk-8-jre-headless wget bzip2 ca-certificates curl git && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O ~/miniconda.sh && \
/bin/bash ~/miniconda.sh -b -p /opt/conda && \
rm ~/miniconda.sh && \
/opt/conda/bin/conda clean -tipsy && \
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "conda activate base" >> ~/.bashrc

ENV TINI_VERSION v0.16.1
ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /usr/bin/tini
RUN chmod +x /usr/bin/tini

RUN conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
RUN conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
RUN conda config --set show_channel_urls yes

RUN mkdir ~/.pip

RUN echo ' [global]\n trusted-host = mirrors.aliyun.com\n index-url = https://mirrors.aliyun.com/pypi/simple' > ~/.pip/pip.conf

# download spark binary package from mirror
ENV URL_BASE http://www.apache.org/dyn/closer.cgi/
ENV FILENAME spark-2.3.2-bin-hadoop2.7.tgz
ENV URL_DIRECTORIES spark/spark-2.3.2/

# use the closer.cgi to pick a mirror
RUN CURLCMD="curl -s -L ${URL_BASE}${URL_DIRECTORIES}${FILENAME}?as_json=1" && \
BASE=$(${CURLCMD} | grep preferred | awk '{print $NF}' | sed 's/\"//g') && \
URL="${BASE}${URL_DIRECTORIES}${FILENAME}" && \
mkdir /work && \
curl -o "/work/${FILENAME}" -L "${URL}" && \
cd /work && tar zxf ${FILENAME} && \
rm ${FILENAME}

ENV SPARK_HOME /work/spark-2.3.2-bin-hadoop2.7

ENTRYPOINT [ "/usr/bin/tini", "--" ]
CMD [ "/bin/bash" ]
Empty file added dev/empty.json
Empty file.
11 changes: 11 additions & 0 deletions dev/make-distribution.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash
mvn -DskipTests clean package \
-Ponline -Pscala-2.11 \
-Phive-thrift-server \
-Pspark-2.3.0 \
-Pdsl \
-Pcrawler \
-Passembly \
-Popencv-support \
-Pcarbondata \
-Pstreamingpro-spark-2.3.0-adaptor
23 changes: 23 additions & 0 deletions dev/start-local.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash
if [ -z "${MLSQL_HOME}" ]; then
export MLSQL_HOME="$(cd "`dirname "$0"`"/.; pwd)"
fi
JARS=$(echo ${MLSQL_HOME}/libs/*.jar | tr ' ' ',')
$SPARK_HOME/bin/spark-submit --class streaming.core.StreamingApp \
--jars ${JARS} \
--master local[*] \
--name mlsql \
--conf "spark.sql.hive.thriftServer.singleSession=true" \
--conf "spark.kryoserializer.buffer=256k" \
--conf "spark.kryoserializer.buffer.max=1024m" \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
--conf "spark.scheduler.mode=FAIR" \
${MLSQL_HOME}/libs/streamingpro-mlsql-1.1.3.jar \
-streaming.name mlsql \
-streaming.job.file.path ${MLSQL_HOME}/empty.json \
-streaming.platform spark \
-streaming.rest true \
-streaming.driver.port 9003 \
-streaming.spark.service true \
-streaming.thrift false \
-streaming.enableHiveSupport true
10 changes: 10 additions & 0 deletions streamingpro-assembly/src/main/assembly/assembly.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,14 @@
</binaries>
</moduleSet>
</moduleSets>
<fileSets>
<fileSet>
<directory>${project.parent.basedir}/dev</directory>
<outputDirectory>.</outputDirectory>
<includes>
<include>start-local.sh</include>
<include>empty.json</include>
</includes>
</fileSet>
</fileSets>
</assembly>

0 comments on commit 81927b0

Please sign in to comment.