Skip to content

Commit

Permalink
fix for PDF headers occurring in random locations 1024 bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
urule99 committed May 26, 2011
1 parent de57511 commit 8ec3a6b
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,15 +213,15 @@ def parseObject(self):

#if pdf.DEBUG:
# print '\tstarting object len %d' % len(self.indata)
tags = re.findall('<<(.*)>>[\s\r\n%]*(?:stream[\s\r\n]*(.*?)\n?endstream)?',self.indata,re.MULTILINE|re.DOTALL)
tags = re.findall('<<(.*)>>[\s\r\n%]*(?:stream[\s\r\n]*(.*?)\n?endstream)?',self.indata,re.MULTILINE|re.DOTALL|re.IGNORECASE)
if tags:
for tag,stream in tags:
gttag = tag.find('>>')
streamtag = tag.find('stream')

if 0 < gttag < tag.find('stream'):
#this means that there was an improper parsing because the tag shouldn't contain a stream object
tags = re.findall('<<(.*?)>>[\s\r\n%]*(?:stream[\s\r\n]*(.*?)\n?endstream)?',self.indata,re.MULTILINE|re.DOTALL)
tags = re.findall('<<(.*?)>>[\s\r\n%]*(?:stream[\s\r\n]*(.*?)\n?endstream)?',self.indata,re.MULTILINE|re.DOTALL|re.IGNORECASE)

if not tags: #Error parsing object!
return
Expand Down Expand Up @@ -437,7 +437,7 @@ def parse(self):
self.pages.append(key)

#populate pdfobj's doc_properties with those that exist
enum_properties = ['Title','Author','Subject','Keywords','Creator','Producer','CreationDate','ModDate']
enum_properties = ['Title','Author','Subject','Keywords','Creator','Producer','CreationDate','ModDate','plot']

if k in enum_properties:
value = kval
Expand Down Expand Up @@ -510,7 +510,7 @@ def decryptRC4(self,data,key):


def is_valid(self):
if self.indata.startswith('%PDF-') or self.indata.startswith('%%PDF-'):
if 0 <= self.indata.find('%PDF-') <= 1024:
return True
return False

Expand Down

0 comments on commit 8ec3a6b

Please sign in to comment.