Skip to content

Commit

Permalink
Chore: Bump unstructured to 0.10.30 (Unstructured-IO#309)
Browse files Browse the repository at this point in the history
^^^
Also releasing this version since `0.10.30` include some 500 error bug
fix
  • Loading branch information
yuming-long authored Nov 16, 2023
1 parent 776d812 commit ab7bc82
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 49 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.0.58

* Bump unstructured to 0.10.30

## 0.0.57
* Make sure `multipage_sections` param defaults to `true` as per the readme
* Bump unstructured to 0.10.29
Expand Down
6 changes: 4 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ RUN python3.10 -m pip install pip==${PIP_VERSION} \
USER ${NB_USER}

FROM python-deps as model-deps

# Note(yuming): quick workaround for ingest import error
# should import initialize within unstructured but out of ingest dir
COPY --chown=${NB_USER}:${NB_USER} scripts/hi_res_model_initialize.py hi_res_model_initialize.py
RUN python3.10 -c "import nltk; nltk.download('punkt')" && \
python3.10 -c "import nltk; nltk.download('averaged_perceptron_tagger')" && \
python3.10 -c "from unstructured.ingest.pipeline.initialize import initialize; initialize()"
python3.10 -c "from hi_res_model_initialize import initialize; initialize()"

FROM model-deps as code
COPY --chown=${NB_USER}:${NB_USER} CHANGELOG.md CHANGELOG.md
Expand Down
2 changes: 1 addition & 1 deletion prepline_general/api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
app = FastAPI(
title="Unstructured Pipeline API",
description="""""",
version="0.0.57",
version="0.0.58",
docs_url="/general/docs",
openapi_url="/general/openapi.json",
)
Expand Down
2 changes: 1 addition & 1 deletion prepline_general/api/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,7 @@ def return_content_type(filename):


@router.post("/general/v0/general")
@router.post("/general/v0.0.57/general")
@router.post("/general/v0.0.58/general")
def pipeline_1(
request: Request,
gz_uncompressed_content_type: Optional[str] = Form(default=None),
Expand Down
2 changes: 1 addition & 1 deletion preprocessing-pipeline-family.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
name: general
version: 0.0.57
version: 0.0.58
32 changes: 17 additions & 15 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ cryptography==41.0.5
# via pdfminer-six
cycler==0.12.1
# via matplotlib
dataclasses-json==0.6.1
dataclasses-json==0.6.2
# via unstructured
effdet==0.4.1
# via layoutparser
Expand All @@ -58,15 +58,15 @@ filetype==1.2.0
# via unstructured
flatbuffers==23.5.26
# via onnxruntime
fonttools==4.44.0
fonttools==4.44.3
# via matplotlib
fsspec==2023.10.0
# via
# huggingface-hub
# torch
h11==0.14.0
# via uvicorn
huggingface-hub==0.17.3
huggingface-hub==0.19.3
# via
# timm
# tokenizers
Expand Down Expand Up @@ -115,7 +115,7 @@ networkx==3.2.1
# unstructured
nltk==3.8.1
# via unstructured
numpy==1.26.1
numpy==1.26.2
# via
# contourpy
# layoutparser
Expand Down Expand Up @@ -154,7 +154,7 @@ packaging==23.2
# pytesseract
# transformers
# unstructured-pytesseract
pandas==2.1.2
pandas==2.1.3
# via
# layoutparser
# unstructured
Expand Down Expand Up @@ -200,9 +200,9 @@ pypandoc==1.12
# via unstructured
pyparsing==3.1.1
# via matplotlib
pypdf==3.17.0
pypdf==3.17.1
# via -r requirements/base.in
pypdfium2==4.23.1
pypdfium2==4.24.0
# via pdfplumber
pytesseract==0.3.10
# via layoutparser
Expand All @@ -218,7 +218,7 @@ python-magic==0.4.27
# via unstructured
python-multipart==0.0.6
# via unstructured-inference
python-pptx==0.6.21
python-pptx==0.6.23
# via unstructured
pytz==2023.3.post1
# via pandas
Expand Down Expand Up @@ -271,15 +271,15 @@ tabulate==0.9.0
# via unstructured
timm==0.9.10
# via effdet
tokenizers==0.14.1
tokenizers==0.15.0
# via transformers
torch==2.1.0
torch==2.1.1
# via
# effdet
# layoutparser
# timm
# torchvision
torchvision==0.16.0
torchvision==0.16.1
# via
# effdet
# layoutparser
Expand All @@ -290,7 +290,7 @@ tqdm==4.66.1
# iopath
# nltk
# transformers
transformers==4.35.0
transformers==4.35.2
# via unstructured-inference
typing-extensions==4.8.0
# via
Expand All @@ -307,13 +307,15 @@ typing-inspect==0.9.0
# via dataclasses-json
tzdata==2023.3
# via pandas
unstructured[local-inference]==0.10.29
# via -r requirements/base.in
unstructured[local-inference]==0.10.30
# via
# -r requirements/base.in
# unstructured
unstructured-inference==0.7.11
# via unstructured
unstructured-pytesseract==0.3.12
# via unstructured
urllib3==2.0.7
urllib3==2.1.0
# via requests
uvicorn==0.24.0.post1
# via -r requirements/base.in
Expand Down
Loading

0 comments on commit ab7bc82

Please sign in to comment.