Skip to content

Commit

Permalink
version 20090517
Browse files Browse the repository at this point in the history
git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@110 1aa58f4a-7d42-0410-adbc-911cccaed67c
  • Loading branch information
yusuke.shinyama.dummy committed May 17, 2009
1 parent 5c1ceba commit 5c2a6d9
Show file tree
Hide file tree
Showing 13 changed files with 50 additions and 48 deletions.
12 changes: 7 additions & 5 deletions README.html
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ <h1>PDFMiner</h1>

<div align=right class=lastmod>
<!-- hhmts start -->
Last Modified: Sun May 17 15:39:06 JST 2009
Last Modified: Sun May 17 22:57:53 JST 2009
<!-- hhmts end -->
</div>

Expand Down Expand Up @@ -51,8 +51,8 @@ <h2>What's It?</h2>
<a name="source"></a>
<p>
<strong>Download:</strong><br>
<a href="http://www.unixuser.org/~euske/python/pdfminer/pdfminer-dist-20090330.tar.gz">
http://www.unixuser.org/~euske/python/pdfminer/pdfminer-dist-20090330.tar.gz
<a href="http://www.unixuser.org/~euske/python/pdfminer/pdfminer-dist-20090517.tar.gz">
http://www.unixuser.org/~euske/python/pdfminer/pdfminer-dist-20090517.tar.gz
</a>
(1.8Mbytes)

Expand Down Expand Up @@ -126,10 +126,11 @@ <h3>For non-ASCII languages</h3>
$ <strong>cd /usr/lib/python2.5/site-packages</strong>
$ <strong>tar jxf CMap.tar.bz2</strong>
</pre></blockquote>
<li> Do the follwoing: (this is optional but highly recommended)<br>
<li> Do the follwoing. (this is optional, but highly recommended)<br>
<blockquote><pre>
$ <strong>python -m pdfminer.cmap /usr/lib/python2.5/site-packages/CMap</strong>
$ <strong>python -m pdfminer.cmap</strong>
</pre></blockquote>
This may take several minutes.
</ol>

<a name="usage"></a>
Expand Down Expand Up @@ -260,6 +261,7 @@ <h3>dumppdf.py</h3>
<hr noshade>
<h2>Changes</h2>
<ul>
<li> 2009/05/17: Bugfixes, massive code restructuring, and simple graphic element support added. setup.py is supported.
<li> 2009/03/30: Text output mode added.
<li> 2009/03/25: Encoding problems fixed. Word splitting option added.
<li> 2009/02/28: Robust handling of corrupted PDFs. Thanks to Troy Bollinger.
Expand Down
2 changes: 1 addition & 1 deletion pdfminer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
__version__ = '20090330'
__version__ = '20090517'

if __name__ == '__main__': print __version__
27 changes: 15 additions & 12 deletions pdfminer/cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
import sys, re, os, os.path
stderr = sys.stderr
from struct import pack, unpack
from utils import choplist, nunpack
from psparser import PSException, PSSyntaxError, PSTypeError, PSEOF, \
from pdfminer.utils import choplist, nunpack
from pdfminer.fontmetrics import FONT_METRICS
from pdfminer.latin_enc import ENCODING
from pdfminer.glyphlist import charname2unicode
from pdfminer.psparser import PSException, PSSyntaxError, PSTypeError, PSEOF, \
PSLiteral, PSKeyword, literal_name, keyword_name, \
PSStackParser
try:
import cdb
except ImportError:
import pycdb as cdb
import pdfminer.pycdb as cdb


class CMapError(Exception): pass
Expand All @@ -28,7 +31,6 @@ def find_cmap_path():

STRIP_NAME = re.compile(r'[0-9]+')
def name2unicode(name):
from glyphlist import charname2unicode
if name in charname2unicode:
return charname2unicode[name]
m = STRIP_NAME.search(name)
Expand Down Expand Up @@ -360,19 +362,16 @@ def do_keyword(self, pos, token):
## FontMetricsDB
##
class FontMetricsDB(object):
from fontmetrics import FONT_METRICS

@classmethod
def get_metrics(klass, fontname):
return klass.FONT_METRICS[fontname]
return FONT_METRICS[fontname]


## EncodingDB
##
class EncodingDB(object):

from latin_enc import ENCODING

std2unicode = {}
mac2unicode = {}
win2unicode = {}
Expand Down Expand Up @@ -447,18 +446,22 @@ def usage():
(opts, args) = getopt.getopt(argv[1:], 'C:D:f')
except getopt.GetoptError:
return usage()
if not args: usage()
cmapdir = args.pop(0)
if args:
cmapdir = args.pop(0)
else:
cmapdir = find_cmap_path()
outputdir = cmapdir
force = False
for (k, v) in opts:
if k == '-f': force = True
elif k == '-C': cmapdir = v
elif k == '-D': outputdir = v
if not os.path.isdir(cmapdir):
raise ValueError('not directory: %r' % cmapdir)
print >>stderr, 'directory does not exist: %r' % cmapdir
return 111
if not os.path.isdir(outputdir):
raise ValueError('not directory: %r' % outputdir)
print >>stderr, 'directory does not exist: %r' % outputdir
return 111
return convert_cmap(cmapdir, outputdir, force=force)

if __name__ == '__main__': sys.exit(main(sys.argv))
8 changes: 4 additions & 4 deletions pdfminer/converter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/usr/bin/env python
import sys
from pdfdevice import PDFDevice
from pdffont import PDFUnicodeNotDefined
from layout import LayoutContainer, LTPage, LTText, LTLine, LTRect, LTFigure, LTTextBox
from utils import mult_matrix, translate_matrix, apply_matrix_pt, enc
from pdfminer.pdfdevice import PDFDevice
from pdfminer.pdffont import PDFUnicodeNotDefined
from pdfminer.layout import LayoutContainer, LTPage, LTText, LTLine, LTRect, LTFigure, LTTextBox
from pdfminer.utils import mult_matrix, translate_matrix, apply_matrix_pt, enc


## PDFPageAggregator
Expand Down
2 changes: 1 addition & 1 deletion pdfminer/layout.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
import sys
from utils import apply_matrix_norm
from pdfminer.utils import apply_matrix_norm
INF = sys.maxint


Expand Down
1 change: 1 addition & 0 deletions pdfminer/lzw.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
stderr = sys.stderr


## LZWDecoder
##
class LZWDecoder(object):
Expand Down
2 changes: 1 addition & 1 deletion pdfminer/pdfcolor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
import sys
from psparser import PSLiteralTable
from pdfminer.psparser import PSLiteralTable


## PDFColorSpace
Expand Down
8 changes: 4 additions & 4 deletions pdfminer/pdffont.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from psparser import PSLiteralTable, PSKeywordTable, PSLiteral, \
from pdfminer.psparser import PSLiteralTable, PSKeywordTable, PSLiteral, \
literal_name, keyword_name, STRICT
from pdftypes import PDFException, \
from pdfminer.pdftypes import PDFException, \
resolve1, int_value, float_value, num_value, \
str_value, list_value, dict_value, stream_value
from cmap import CMap, CMapDB, CMapParser, FontMetricsDB, EncodingDB
from utils import apply_matrix_norm, nunpack
from pdfminer.cmap import CMap, CMapDB, CMapParser, FontMetricsDB, EncodingDB
from pdfminer.utils import apply_matrix_norm, nunpack


## CFFFont
Expand Down
14 changes: 7 additions & 7 deletions pdfminer/pdfinterp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
from psparser import PSException, PSTypeError, PSEOF, \
from pdfminer.psparser import PSException, PSTypeError, PSEOF, \
PSLiteralTable, PSKeywordTable, literal_name, keyword_name, \
PSStackParser, PSKeyword, STRICT
from pdftypes import PDFException, PDFStream, PDFObjRef, \
from pdfminer.pdftypes import PDFException, PDFStream, PDFObjRef, \
resolve1, int_value, float_value, num_value, \
str_value, list_value, dict_value, stream_value
from utils import choplist, mult_matrix, translate_matrix, MATRIX_IDENTITY
from pdffont import PDFFontError, PDFType1Font, PDFTrueTypeFont, PDFType3Font, PDFCIDFont
from pdfparser import PDFDocument, PDFParser, PDFPasswordIncorrect
from pdfcolor import PDFColorSpace, PREDEFINED_COLORSPACE, \
from pdfminer.utils import choplist, mult_matrix, translate_matrix, MATRIX_IDENTITY
from pdfminer.pdffont import PDFFontError, PDFType1Font, PDFTrueTypeFont, PDFType3Font, PDFCIDFont
from pdfminer.pdfparser import PDFDocument, PDFParser, PDFPasswordIncorrect
from pdfminer.pdfcolor import PDFColorSpace, PREDEFINED_COLORSPACE, \
LITERAL_DEVICE_GRAY, LITERAL_DEVICE_RGB, LITERAL_DEVICE_CMYK
from cmap import CMapDB
from pdfminer.cmap import CMapDB


## Exceptions
Expand Down
11 changes: 5 additions & 6 deletions pdfminer/pdfparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
import sys, re
import md5, struct
stderr = sys.stderr
from utils import choplist, nunpack, decode_text
from arcfour import Arcfour
from psparser import PSStackParser, PSSyntaxError, PSEOF, \
PSLiteralTable, PSKeywordTable, literal_name, keyword_name, \
STRICT
from pdftypes import PDFException, PDFTypeError, PDFNotImplementedError, \
from pdfminer.utils import choplist, nunpack, decode_text
from pdfminer.arcfour import Arcfour
from pdfminer.psparser import PSStackParser, PSSyntaxError, PSEOF, \
PSLiteralTable, PSKeywordTable, literal_name, keyword_name, STRICT
from pdfminer.pdftypes import PDFException, PDFTypeError, PDFNotImplementedError, \
PDFStream, PDFObjRef, resolve1, decipher_all, \
int_value, float_value, num_value, str_value, list_value, dict_value, stream_value

Expand Down
5 changes: 2 additions & 3 deletions pdfminer/pdftypes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#!/usr/bin/env python
import sys, zlib
stderr = sys.stderr
from lzw import LZWDecoder
from psparser import PSException, PSObject, \
from pdfminer.lzw import LZWDecoder
from pdfminer.psparser import PSException, PSObject, \
PSLiteral, PSKeyword, PSLiteralTable, PSKeywordTable, \
literal_name, keyword_name, STRICT

Expand Down
3 changes: 1 addition & 2 deletions pdfminer/psparser.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#!/usr/bin/env python
import sys, re
stderr = sys.stderr

from utils import choplist
from pdfminer.utils import choplist

STRICT = 0

Expand Down
3 changes: 1 addition & 2 deletions samples/Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# GNUMakefile for test

PYTHON=python
CMAPDIR=../CMap
PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -C$(CMAPDIR)
PDF2TXT=$(PYTHON) ../tools/pdf2txt.py

HTMLS= \
simple1.html \
Expand Down

0 comments on commit 5c2a6d9

Please sign in to comment.