Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix range() page numbers for Python3 & prevent long cache file names #69

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
language: python
python:
- "2.6"
- "2.7"
- "3.3"
- "3.4"
- "3.5"
- "3.6"
matrix:
include:
- python: 2.7
- python: 3.4
- python: 3.5
- python: 3.6
- python: 3.7
dist: xenial
sudo: true
env: CFLAGS="-O0"

cache:
Expand Down
6 changes: 3 additions & 3 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ environment:
- PYTHON: "C:\\Python27"
- PYTHON: "C:\\Python33"
- PYTHON: "C:\\Python34"
# Appveyor does not currently find the wheels for lxml, and cannot build lxml from source. Disable these for now.
# - PYTHON: "C:\\Python35"
# - PYTHON: "C:\\Python36"
- PYTHON: "C:\\Python35"
- PYTHON: "C:\\Python36"
- PYTHON: "C:\\Python37"

build: off

Expand Down
12 changes: 10 additions & 2 deletions pdfquery/pdfquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import numbers
import re
import chardet
import sys
import hashlib
try:
from collections import OrderedDict
except ImportError:
Expand Down Expand Up @@ -84,7 +86,11 @@ def _comp_bbox(el, el2):


# assorted helpers
def _flatten(l, ltypes=(list, tuple)):
LTYPES = (list, tuple)
if sys.version_info.major > 2:
LTYPES = (list, tuple, range)

def _flatten(l, ltypes=LTYPES):
# via http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
ltype = type(l)
l = list(l)
Expand Down Expand Up @@ -458,7 +464,9 @@ def get_tree(self, *page_numbers):
Return lxml.etree.ElementTree for entire document, or page numbers
given if any.
"""
cache_key = "_".join(map(str, _flatten(page_numbers)))
hasher = hashlib.md5()
hasher.update(str(page_numbers).encode('UTF-8'))
cache_key = "_{}".format(hasher.hexdigest())
tree = self._parse_tree_cacher.get(cache_key)
if tree is None:
# set up root
Expand Down
Binary file added tests/samples/bug67.pdf
Binary file not shown.
31 changes: 31 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import sys
import pdfquery
from pdfquery.cache import FileCache
import tempfile

from .utils import BaseTestCase

Expand Down Expand Up @@ -157,3 +158,33 @@ def test_annot_dereferencing(self):
pdf.load()
pdf = pdfquery.PDFQuery("tests/samples/bug42.pdf")
pdf.load()

class TestPageRange(BaseTestCase):
"""
Test various page number parameters
"""

@classmethod
def setUpClass(cls):
cache_dir = "{}/".format(tempfile.gettempdir())
cls.pdf = pdfquery.PDFQuery("tests/samples/bug67.pdf", parse_tree_cacher=FileCache(cache_dir))

def test_page_int(self):
self.pdf.load(3)
self.assertEqual(len(self.pdf.pq('LTPage')), 1)
self.pdf.load(0, 10, 25, 49)
self.assertEqual(len(self.pdf.pq('LTPage')), 4)

def test_page_array(self):
self.pdf.load([0, 7, 11])
self.assertEqual(len(self.pdf.pq('LTPage')), 3)
self.pdf.load([10], [0, 12], [30, 40])
self.assertEqual(len(self.pdf.pq('LTPage')), 5)

def test_page_mixed(self):
self.pdf.load([0, 7, 11], [0, 44], 1)
self.assertEqual(len(self.pdf.pq('LTPage')), 6)

def test_page_range(self):
self.pdf.load(range(0, 150))
self.assertEqual(len(self.pdf.pq('LTPage')), 150)