Skip to content

Commit

Permalink
consistent test results
Browse files Browse the repository at this point in the history
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@191 1aa58f4a-7d42-0410-adbc-911cccaed67c
  • Loading branch information
yusuke.shinyama.dummy committed Mar 22, 2010
1 parent a6523d1 commit 40b36a7
Show file tree
Hide file tree
Showing 20 changed files with 13,059 additions and 13,043 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,6 @@ $(CMAPDST)/TO_UNICODE_Adobe_Korea1.py:
$(CONV_CMAP) $(CMAPDST) Adobe-Korea1 $(CMAPSRC)/cid2code_Adobe_Korea1.txt cp949 euc-kr

test: cmap
cd samples && $(MAKE) test
cd samples && $(MAKE) test CMP=cmp
test_clean:
-cd samples && $(MAKE) clean
39 changes: 27 additions & 12 deletions pdfminer/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ def uniq(objs):
yield obj
return

def csort(objs, key):
idxs = dict( (obj,i) for (i,obj) in enumerate(objs) )
return sorted(objs, key=lambda obj:(key(obj), idxs[obj]))

def is_uniq(objs):
for (i,obj1) in enumerate(objs):
for obj2 in objs[i+1:]:
if obj1 == obj2: return False
return True


## LAParams
##
Expand Down Expand Up @@ -324,7 +334,7 @@ def __init__(self, objs, word_margin):
LayoutContainer.fixate(self)
objs = []
x1 = INF
for obj in sorted(self.objs, key=lambda obj: obj.x0):
for obj in csort(self.objs, key=lambda obj: obj.x0):
if isinstance(obj, LTChar) and word_margin:
margin = word_margin * obj.width
if x1 < obj.x0-margin:
Expand All @@ -345,7 +355,7 @@ def __init__(self, objs, word_margin):
LayoutContainer.fixate(self)
objs = []
y0 = -INF
for obj in sorted(self.objs, key=lambda obj: -obj.y1):
for obj in csort(self.objs, key=lambda obj: -obj.y1):
if isinstance(obj, LTChar) and word_margin:
margin = word_margin * obj.height
if obj.y1+margin < y0:
Expand Down Expand Up @@ -382,14 +392,14 @@ class LTTextBoxHorizontal(LTTextBox):

def fixate(self):
LTTextBox.fixate(self)
self.objs = sorted(self.objs, key=lambda obj: -obj.y1)
self.objs = csort(self.objs, key=lambda obj: -obj.y1)
return

class LTTextBoxVertical(LTTextBox):

def fixate(self):
LTTextBox.fixate(self)
self.objs = sorted(self.objs, key=lambda obj: -obj.x1)
self.objs = csort(self.objs, key=lambda obj: -obj.x1)
return


Expand All @@ -408,15 +418,15 @@ class LTTextGroupHorizontal(LTTextGroup):
def __init__(self, objs):
LTTextGroup.__init__(self, objs)
# reorder the objects from top-left to bottom-right.
self.objs = sorted(self.objs, key=lambda obj: obj.x0-obj.y1)
self.objs = csort(self.objs, key=lambda obj: obj.x0-obj.y1)
return

class LTTextGroupVertical(LTTextGroup):

def __init__(self, objs):
LTTextGroup.__init__(self, objs)
# reorder the objects from top-right to bottom-left.
self.objs = sorted(self.objs, key=lambda obj: -obj.x1-obj.y1)
self.objs = csort(self.objs, key=lambda obj: -obj.x1-obj.y1)
return


Expand All @@ -432,6 +442,7 @@ class Plane(object):
def __init__(self, objs):
self.xobjs = []
self.yobjs = []
self.idxs = dict( (obj,i) for (i,obj) in enumerate(objs) )
for obj in objs:
self.place(obj)
self.xobjs.sort()
Expand All @@ -456,7 +467,7 @@ def find(self, (x0,y0,x1,y1)):
i1 = bsearch(self.yobjs, y1)[1]
yobjs = set( obj for (_,obj) in self.yobjs[i0:i1] )
xobjs.intersection_update(yobjs)
return list(xobjs)
return sorted(xobjs, key=lambda obj: self.idxs[obj])


## group_lines
Expand All @@ -474,21 +485,25 @@ def group_lines(groupfunc, objs, *args):
group = groupfunc(list(uniq(members)))
for obj in members:
groups[obj] = group
groups = set(groups.values())
for group in groups:
done = set()
r = []
for obj in objs:
group = groups[obj]
if group in done: continue
done.add(group)
group.fixate()
return list(groups)
r.append(group)
return r


## group_boxes
##
def group_boxes(groupfunc, objs, distfunc):
assert objs
objs = objs[:]
while 2 <= len(objs):
mindist = INF
minpair = None
objs.sort(key=lambda obj: (obj.width*obj.height, obj.y0))
objs = csort(objs, key=lambda obj: obj.width*obj.height)
for i in xrange(len(objs)):
for j in xrange(i+1, len(objs)):
d = distfunc(objs[i], objs[j])
Expand Down
3 changes: 2 additions & 1 deletion samples/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# GNUMakefile for test

RM=rm -f
CMP=cmp
#CMP=cmp
CMP=:
PYTHON=python
PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1

Expand Down
2 changes: 1 addition & 1 deletion samples/dmca.html.ref
Original file line number Diff line number Diff line change
Expand Up @@ -1945,7 +1945,7 @@
<span style="position:absolute; border: red 1px solid; left:143px; top:379px; width:8px; height:214px;"></span>
<span style="position:absolute; border: red 1px solid; left:143px; top:379px; width:8px; height:133px;"></span>
<span style="position:absolute; border: red 1px solid; left:144px; top:379px; width:8px; height:93px;"></span>
<span style="position:absolute; border: red 1px solid; left:144px; top:419px; width:8px; height:52px;"></span>
<span style="position:absolute; border: red 1px solid; left:144px; top:379px; width:8px; height:52px;"></span>
<span style="position:absolute; border: red 1px solid; left:108px; top:726px; width:396px; height:37px;"></span>
<span style="position:absolute; border: red 1px solid; left:108px; top:750px; width:396px; height:13px;"></span>
<span style="position:absolute; border: red 1px solid; left:108px; top:750px; width:229px; height:13px;"></span>
Expand Down
4 changes: 2 additions & 2 deletions samples/dmca.xml.ref
Original file line number Diff line number Diff line change
Expand Up @@ -2197,11 +2197,11 @@
<textgroup bbox="144.000,248.400,152.004,462.480">
<textgroup bbox="144.000,329.040,152.004,462.480">
<textgroup bbox="144.000,369.360,152.004,462.480">
<textgroup bbox="144.000,409.680,152.004,462.480">
<textbox id="5" bbox="144.000,450.000,152.004,462.480" />
<textgroup bbox="144.000,369.360,152.004,422.160">
<textbox id="6" bbox="144.000,409.680,152.004,422.160" />
<textbox id="7" bbox="144.000,369.360,152.004,381.840" />
</textgroup>
<textbox id="7" bbox="144.000,369.360,152.004,381.840" />
</textgroup>
<textbox id="8" bbox="144.000,329.040,152.004,341.520" />
</textgroup>
Expand Down
28 changes: 14 additions & 14 deletions samples/f1040nr.html.ref
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,14 @@
<span style="position:absolute; left:425px; top:184px; font-size:6px;">'</span>
<span style="position:absolute; border: blue 1px solid; left:56px; top:193px; width:502px; height:40px;"></span>
<span style="position:absolute; left:425px; top:193px; font-size:5px;"> </span>
<span style="position:absolute; left:56px; top:197px; font-size:9px;">C</span>
<span style="position:absolute; left:62px; top:197px; font-size:9px;">o</span>
<span style="position:absolute; left:66px; top:197px; font-size:9px;">u</span>
<span style="position:absolute; left:71px; top:197px; font-size:9px;">n</span>
<span style="position:absolute; left:75px; top:197px; font-size:9px;">t</span>
<span style="position:absolute; left:78px; top:197px; font-size:9px;">r</span>
<span style="position:absolute; left:80px; top:197px; font-size:9px;">y</span>
<span style="position:absolute; left:88px; top:196px; font-size:7px;">'</span>
<span style="position:absolute; left:251px; top:197px; font-size:9px;">O</span>
<span style="position:absolute; left:256px; top:197px; font-size:9px;">f</span>
<span style="position:absolute; left:257px; top:197px; font-size:9px;"> </span>
Expand Down Expand Up @@ -636,14 +644,6 @@
<span style="position:absolute; left:430px; top:197px; font-size:9px;">r</span>
<span style="position:absolute; left:432px; top:197px; font-size:9px;">?</span>
<span style="position:absolute; left:439px; top:196px; font-size:7px;">'</span>
<span style="position:absolute; left:56px; top:197px; font-size:9px;">C</span>
<span style="position:absolute; left:62px; top:197px; font-size:9px;">o</span>
<span style="position:absolute; left:66px; top:197px; font-size:9px;">u</span>
<span style="position:absolute; left:71px; top:197px; font-size:9px;">n</span>
<span style="position:absolute; left:75px; top:197px; font-size:9px;">t</span>
<span style="position:absolute; left:78px; top:197px; font-size:9px;">r</span>
<span style="position:absolute; left:80px; top:197px; font-size:9px;">y</span>
<span style="position:absolute; left:88px; top:196px; font-size:7px;">'</span>
<span style="position:absolute; left:88px; top:207px; font-size:8px;"> </span>
<span style="position:absolute; left:439px; top:207px; font-size:8px;"> </span>
<span style="position:absolute; left:56px; top:208px; font-size:9px;">G</span>
Expand Down Expand Up @@ -3783,16 +3783,16 @@
<span style="position:absolute; left:358px; top:473px; font-size:10px;">9</span>
<span style="position:absolute; left:358px; top:474px; font-size:10px;"> </span>
<span style="position:absolute; left:363px; top:473px; font-size:10px;">b</span>
<span style="position:absolute; border: blue 1px solid; left:318px; top:727px; width:2px; height:10px;"></span>
<span style="position:absolute; left:318px; top:727px; font-size:10px;"> </span>
<span style="position:absolute; border: blue 1px solid; left:308px; top:691px; width:2px; height:10px;"></span>
<span style="position:absolute; left:308px; top:691px; font-size:10px;"> </span>
<span style="position:absolute; border: blue 1px solid; left:435px; top:607px; width:2px; height:10px;"></span>
<span style="position:absolute; left:435px; top:607px; font-size:10px;"> </span>
<span style="position:absolute; border: blue 1px solid; left:256px; top:571px; width:2px; height:10px;"></span>
<span style="position:absolute; left:256px; top:571px; font-size:10px;"> </span>
<span style="position:absolute; border: blue 1px solid; left:308px; top:691px; width:2px; height:10px;"></span>
<span style="position:absolute; left:308px; top:691px; font-size:10px;"> </span>
<span style="position:absolute; border: blue 1px solid; left:318px; top:727px; width:2px; height:10px;"></span>
<span style="position:absolute; left:318px; top:727px; font-size:10px;"> </span>
<span style="position:absolute; border: blue 1px solid; left:281px; top:486px; width:2px; height:10px;"></span>
<span style="position:absolute; left:281px; top:486px; font-size:10px;"> </span>
<span style="position:absolute; border: blue 1px solid; left:256px; top:571px; width:2px; height:10px;"></span>
<span style="position:absolute; left:256px; top:571px; font-size:10px;"> </span>
<span style="position:absolute; border: blue 1px solid; left:63px; top:437px; width:5px; height:11px;"></span>
<span style="position:absolute; left:63px; top:437px; font-size:10px;">d</span>
<span style="position:absolute; left:63px; top:438px; font-size:10px;"> </span>
Expand Down
2 changes: 1 addition & 1 deletion samples/f1040nr.txt.ref
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ Type of entry visa (see page 8)
'


Of what country were you a citizen or national during the tax year? '
Country '
Of what country were you a citizen or national during the tax year? '


Give address outside the United States to which you want any
Expand Down
58 changes: 29 additions & 29 deletions samples/f1040nr.xml.ref
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,19 @@
<text>
</text>
</textline>
<textline bbox="56.488,635.344,95.968,645.365">
<text font="HelveticaNeue-Roman" vertical="False" bbox="56.488,635.344,62.264,644.672" size="9.328">C</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="62.264,635.344,66.856,644.672" size="9.328">o</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="66.856,635.344,71.304,644.672" size="9.328">u</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="71.304,635.344,75.752,644.672" size="9.328">n</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="75.752,635.344,78.272,644.672" size="9.328">t</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="78.272,635.344,80.936,644.672" size="9.328">r</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="80.936,635.344,84.936,644.672" size="9.328">y</text>
<text> </text>
<text font="Universal-NewswithCommPi" vertical="False" bbox="88.968,638.428,95.968,645.365" size="7.000">'</text>
<text>
</text>
</textline>
<textline bbox="251.598,635.344,446.738,645.365">
<text font="HelveticaNeue-Roman" vertical="False" bbox="251.598,635.344,256.158,644.672" size="9.328">O</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="256.158,635.344,257.934,644.672" size="9.328">f</text>
Expand Down Expand Up @@ -882,19 +895,6 @@
<text>
</text>
</textline>
<textline bbox="56.488,635.344,95.968,645.365">
<text font="HelveticaNeue-Roman" vertical="False" bbox="56.488,635.344,62.264,644.672" size="9.328">C</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="62.264,635.344,66.856,644.672" size="9.328">o</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="66.856,635.344,71.304,644.672" size="9.328">u</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="71.304,635.344,75.752,644.672" size="9.328">n</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="75.752,635.344,78.272,644.672" size="9.328">t</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="78.272,635.344,80.936,644.672" size="9.328">r</text>
<text font="HelveticaNeue-Roman" vertical="False" bbox="80.936,635.344,84.936,644.672" size="9.328">y</text>
<text> </text>
<text font="Universal-NewswithCommPi" vertical="False" bbox="88.968,638.428,95.968,645.365" size="7.000">'</text>
<text>
</text>
</textline>
<textline bbox="88.968,626.542,90.914,634.704">
<text font="HelveticaNeue-Roman" vertical="False" bbox="88.968,626.542,90.914,634.704" size="8.162"> </text>
<text>
Expand Down Expand Up @@ -5326,9 +5326,9 @@
</text>
</textline>
</textbox>
<textbox id="29" bbox="318.048,103.606,320.550,114.100">
<textline bbox="318.048,103.606,320.550,114.100">
<text font="HelveticaNeue-Roman" vertical="False" bbox="318.048,103.606,320.550,114.100" size="10.494"> </text>
<textbox id="29" bbox="435.798,224.046,438.300,234.540">
<textline bbox="435.798,224.046,438.300,234.540">
<text font="HelveticaNeue-Roman" vertical="False" bbox="435.798,224.046,438.300,234.540" size="10.494"> </text>
<text>
</text>
</textline>
Expand All @@ -5340,23 +5340,23 @@
</text>
</textline>
</textbox>
<textbox id="31" bbox="435.798,224.046,438.300,234.540">
<textline bbox="435.798,224.046,438.300,234.540">
<text font="HelveticaNeue-Roman" vertical="False" bbox="435.798,224.046,438.300,234.540" size="10.494"> </text>
<textbox id="31" bbox="318.048,103.606,320.550,114.100">
<textline bbox="318.048,103.606,320.550,114.100">
<text font="HelveticaNeue-Roman" vertical="False" bbox="318.048,103.606,320.550,114.100" size="10.494"> </text>
<text>
</text>
</textline>
</textbox>
<textbox id="32" bbox="256.488,260.196,258.990,270.690">
<textline bbox="256.488,260.196,258.990,270.690">
<text font="HelveticaNeue-Roman" vertical="False" bbox="256.488,260.196,258.990,270.690" size="10.494"> </text>
<textbox id="32" bbox="281.508,344.536,284.010,355.030">
<textline bbox="281.508,344.536,284.010,355.030">
<text font="HelveticaNeue-Roman" vertical="False" bbox="281.508,344.536,284.010,355.030" size="10.494"> </text>
<text>
</text>
</textline>
</textbox>
<textbox id="33" bbox="281.508,344.536,284.010,355.030">
<textline bbox="281.508,344.536,284.010,355.030">
<text font="HelveticaNeue-Roman" vertical="False" bbox="281.508,344.536,284.010,355.030" size="10.494"> </text>
<textbox id="33" bbox="256.488,260.196,258.990,270.690">
<textline bbox="256.488,260.196,258.990,270.690">
<text font="HelveticaNeue-Roman" vertical="False" bbox="256.488,260.196,258.990,270.690" size="10.494"> </text>
<text>
</text>
</textline>
Expand Down Expand Up @@ -5981,15 +5981,15 @@
</textgroup>
<textbox id="28" bbox="358.438,356.790,368.941,368.427" />
</textgroup>
<textbox id="29" bbox="318.048,103.606,320.550,114.100" />
<textbox id="29" bbox="435.798,224.046,438.300,234.540" />
</textgroup>
<textbox id="30" bbox="308.058,139.676,310.560,150.170" />
</textgroup>
<textbox id="31" bbox="435.798,224.046,438.300,234.540" />
<textbox id="31" bbox="318.048,103.606,320.550,114.100" />
</textgroup>
<textbox id="32" bbox="256.488,260.196,258.990,270.690" />
<textbox id="32" bbox="281.508,344.536,284.010,355.030" />
</textgroup>
<textbox id="33" bbox="281.508,344.536,284.010,355.030" />
<textbox id="33" bbox="256.488,260.196,258.990,270.690" />
</textgroup>
<textbox id="34" bbox="63.688,392.640,69.187,404.277" />
</textgroup>
Expand Down
Loading

0 comments on commit 40b36a7

Please sign in to comment.