Skip to content

Commit

Permalink
fix: image build from m1 and bump openssl (Unstructured-IO#97)
Browse files Browse the repository at this point in the history
  • Loading branch information
ryannikolaidis authored May 10, 2023
1 parent 65ca4c1 commit 047db0d
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 37 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## 0.0.16

* Fix image build steps to support detectron2 install from Mac M1/M2
* Upgrade to openssl 1.1.1 to accomodate the latest urllib3
* Bump unstructured for SpooledTemporaryFile fix

## 0.0.15

* Add msg and json types to supported
Expand Down
33 changes: 30 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,33 @@ RUN set -ex && \
$sudo rm -rf /tmp/* && \
yum clean all

# SSL dependency gets baked into Python binary so do this first
RUN yum -y update && \
yum install -y perl-core pcre-devel && \
wget https://ftp.openssl.org/source/openssl-1.1.1k.tar.gz && \
tar -xzvf openssl-1.1.1k.tar.gz && \
cd openssl-1.1.1k && \
./config shared --prefix=/usr/local/ssl --openssldir=/usr/local/ssl && \
make && \
make install && cd .. && \
ldconfig && \
rm -rf openssl-1.1.1k && rm openssl-1.1.1k.tar.gz && \
$sudo yum -y remove perl-core pcre-devel && \
yum clean all

ENV PATH="/usr/local/ssl/bin:${PATH}"
ENV LD_LIBRARY_PATH="/usr/local/ssl/lib:$LD_LIBRARY_PATH"
ENV SSL_CERT_FILE=/etc/ssl/certs/ca-bundle.crt

# Install Python
RUN yum -y install openssl-devel bzip2-devel libffi-devel make git sqlite-devel && \
RUN yum -y install bzip2-devel libffi-devel make git sqlite-devel && \
curl -O https://www.python.org/ftp/python/3.8.15/Python-3.8.15.tgz && tar -xzf Python-3.8.15.tgz && \
cd Python-3.8.15/ && ./configure --enable-optimizations && make altinstall && \
cd Python-3.8.15/ && \
./configure --enable-optimizations --with-openssl=/usr/local/ssl && \
make altinstall && \
cd .. && rm -rf Python-3.8.15* && \
ln -s /usr/local/bin/python3.8 /usr/local/bin/python3 && \
$sudo yum -y remove openssl-devel bzip2-devel libffi-devel make sqlite-devel && \
$sudo yum -y remove bzip2-devel libffi-devel make sqlite-devel && \
$sudo rm -rf /var/cache/yum/* && \
yum clean all

Expand All @@ -77,8 +97,15 @@ ENV PATH="/home/${NB_USER}/.local/bin:${PATH}"
COPY requirements/base.txt requirements-base.txt
RUN python3.8 -m pip install pip==${PIP_VERSION} \
&& pip3.8 install --no-cache -r requirements-base.txt \
&& scl enable devtoolset-9 bash \
# required for detectron2 install on Mac M1
&& pip3.8 install --no-cache tensorboard>=2.12.2 \
&& pip3.8 install --no-cache "detectron2@git+https://github.com/facebookresearch/detectron2.git@e2ce8dc#egg=detectron2"

# fix openssl issue
RUN pip3.8 uninstall --yes urllib3 && \
pip3.8 install urllib3==1.25.11

RUN python3.8 -c "import nltk; nltk.download('punkt')" && \
python3.8 -c "import nltk; nltk.download('averaged_perceptron_tagger')" && \
python3.8 -c "from unstructured.ingest.doc_processor.generalized import initialize; initialize()"
Expand Down
10 changes: 10 additions & 0 deletions prepline_general/api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@


from fastapi import FastAPI, Request, status
import logging

from .general import router as general_router

Expand All @@ -20,6 +21,15 @@
app.include_router(general_router)


# Filter out /healthcheck noise
class HealthCheckFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
return record.getMessage().find("/healthcheck") == -1


logging.getLogger("uvicorn.access").addFilter(HealthCheckFilter())


@app.get("/healthcheck", status_code=status.HTTP_200_OK, include_in_schema=False)
def healthcheck(request: Request):
return {"healthcheck": "HEALTHCHECK STATUS: EVERYTHING OK!"}
2 changes: 1 addition & 1 deletion prepline_general/api/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def return_content_type(filename):


@router.post("/general/v0/general")
@router.post("/general/v0.0.15/general")
@router.post("/general/v0.0.16/general")
def pipeline_1(
request: Request,
gz_uncompressed_content_type: Optional[str] = Form(default=None),
Expand Down
2 changes: 1 addition & 1 deletion preprocessing-pipeline-family.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
name: general
version: 0.0.15
version: 0.0.16
2 changes: 1 addition & 1 deletion requirements/base.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
unstructured[local-inference]>=0.6.2
unstructured[local-inference]>=0.6.5
unstructured-api-tools>=0.6.0
ratelimit
requests
Expand Down
24 changes: 12 additions & 12 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ beautifulsoup4==4.12.2
# via nbconvert
bleach==6.0.0
# via nbconvert
certifi==2022.12.7
certifi==2023.5.7
# via
# httpcore
# httpx
Expand Down Expand Up @@ -67,11 +67,11 @@ filelock==3.12.0
# huggingface-hub
# torch
# transformers
flatbuffers==23.3.3
flatbuffers==23.5.8
# via onnxruntime
fonttools==4.39.3
# via matplotlib
fsspec==2023.4.0
fsspec==2023.5.0
# via huggingface-hub
h11==0.14.0
# via
Expand Down Expand Up @@ -156,7 +156,7 @@ mypy-extensions==1.0.0
# via mypy
nbclient==0.7.4
# via nbconvert
nbconvert==7.3.1
nbconvert==7.4.0
# via unstructured-api-tools
nbformat==5.8.0
# via
Expand Down Expand Up @@ -231,7 +231,7 @@ platformdirs==3.5.0
# via jupyter-core
portalocker==2.7.0
# via iopath
protobuf==4.22.4
protobuf==4.23.0
# via onnxruntime
pycocotools==2.0.6
# via effdet
Expand Down Expand Up @@ -326,13 +326,13 @@ tokenizers==0.13.3
# via transformers
tomli==2.0.1
# via mypy
torch==2.0.0
torch==2.0.1
# via
# effdet
# layoutparser
# timm
# torchvision
torchvision==0.15.1
torchvision==0.15.2
# via
# effdet
# layoutparser
Expand All @@ -355,7 +355,7 @@ traitlets==5.9.0
# nbformat
transformers==4.28.1
# via unstructured-inference
types-requests==2.29.0.0
types-requests==2.30.0.0
# via unstructured-api-tools
types-ujson==5.7.0.5
# via unstructured-api-tools
Expand All @@ -370,11 +370,11 @@ typing-extensions==4.5.0
# rich
# starlette
# torch
unstructured[local-inference]==0.6.3
unstructured[local-inference]==0.6.5
# via -r requirements/base.in
unstructured-api-tools==0.10.3
unstructured-api-tools==0.10.4
# via -r requirements/base.in
unstructured-inference==0.4.3
unstructured-inference==0.4.4
# via unstructured
urllib3==2.0.2
# via requests
Expand All @@ -392,7 +392,7 @@ webencodings==0.5.1
# via
# bleach
# tinycss2
websockets==11.0.2
websockets==11.0.3
# via uvicorn
wrapt==1.14.1
# via
Expand Down
39 changes: 20 additions & 19 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ bleach==6.0.0
# via
# -r requirements/base.txt
# nbconvert
certifi==2022.12.7
certifi==2023.5.7
# via
# -r requirements/base.txt
# httpcore
Expand Down Expand Up @@ -155,7 +155,7 @@ filelock==3.12.0
# transformers
flake8==6.0.0
# via -r requirements/test.in
flatbuffers==23.3.3
flatbuffers==23.5.8
# via
# -r requirements/base.txt
# onnxruntime
Expand All @@ -165,7 +165,7 @@ fonttools==4.39.3
# matplotlib
fqdn==1.5.1
# via jsonschema
fsspec==2023.4.0
fsspec==2023.5.0
# via
# -r requirements/base.txt
# huggingface-hub
Expand Down Expand Up @@ -222,15 +222,15 @@ iopath==0.1.10
# via
# -r requirements/base.txt
# layoutparser
ipykernel==6.22.0
ipykernel==6.23.0
# via
# ipywidgets
# jupyter
# jupyter-console
# nbclassic
# notebook
# qtconsole
ipython==8.12.1
ipython==8.12.2
# via
# execnb
# ipykernel
Expand Down Expand Up @@ -367,13 +367,13 @@ mypy-extensions==1.0.0
# -r requirements/base.txt
# black
# mypy
nbclassic==0.5.6
nbclassic==1.0.0
# via notebook
nbclient==0.7.4
# via
# -r requirements/base.txt
# nbconvert
nbconvert==7.3.1
nbconvert==7.4.0
# via
# -r requirements/base.txt
# jupyter
Expand Down Expand Up @@ -483,6 +483,7 @@ pdfminer-six==20221105
# via
# -r requirements/base.txt
# pdfplumber
# unstructured
pdfplumber==0.9.0
# via
# -r requirements/base.txt
Expand Down Expand Up @@ -526,7 +527,7 @@ prompt-toolkit==3.0.38
# via
# ipython
# jupyter-console
protobuf==4.22.3
protobuf==4.23.0
# via
# -r requirements/base.txt
# onnxruntime
Expand Down Expand Up @@ -637,18 +638,18 @@ pyzmq==25.0.2
# nbclassic
# notebook
# qtconsole
qtconsole==5.4.2
qtconsole==5.4.3
# via jupyter
qtpy==2.3.1
# via qtconsole
ratelimit==2.2.1
# via -r requirements/base.txt
regex==2023.5.4
regex==2023.5.5
# via
# -r requirements/base.txt
# nltk
# transformers
requests==2.29.0
requests==2.30.0
# via
# -r requirements/base.txt
# huggingface-hub
Expand Down Expand Up @@ -734,14 +735,14 @@ tomli==2.0.1
# coverage
# mypy
# pytest
torch==2.0.0
torch==2.0.1
# via
# -r requirements/base.txt
# effdet
# layoutparser
# timm
# torchvision
torchvision==0.15.1
torchvision==0.15.2
# via
# -r requirements/base.txt
# effdet
Expand Down Expand Up @@ -787,7 +788,7 @@ transformers==4.28.1
# via
# -r requirements/base.txt
# unstructured-inference
types-requests==2.29.0.0
types-requests==2.30.0.0
# via
# -r requirements/base.txt
# unstructured-api-tools
Expand All @@ -811,17 +812,17 @@ typing-extensions==4.5.0
# rich
# starlette
# torch
unstructured[local-inference]==0.6.2
unstructured[local-inference]==0.6.5
# via -r requirements/base.txt
unstructured-api-tools==0.10.3
unstructured-api-tools==0.10.4
# via -r requirements/base.txt
unstructured-inference==0.4.2
unstructured-inference==0.4.4
# via
# -r requirements/base.txt
# unstructured
uri-template==1.2.0
# via jsonschema
urllib3==1.26.15
urllib3==2.0.2
# via
# -r requirements/base.txt
# requests
Expand Down Expand Up @@ -855,7 +856,7 @@ webencodings==0.5.1
# tinycss2
websocket-client==1.5.1
# via jupyter-server
websockets==11.0.2
websockets==11.0.3
# via
# -r requirements/base.txt
# uvicorn
Expand Down

0 comments on commit 047db0d

Please sign in to comment.