Skip to content

Commit

Permalink
python update
Browse files Browse the repository at this point in the history
  • Loading branch information
henkjan-sneller committed Jul 13, 2021
1 parent a91f236 commit e6c93df
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 18 deletions.
9 changes: 9 additions & 0 deletions Go/.idea/Go.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions Go/LoadIntelDoc/.idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Go/LoadIntelDoc/Main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package LoadIntelDoc
10 changes: 5 additions & 5 deletions VS/Python/intel-doc-2-md/inteldoc2md/parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf8 -*-


from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
Expand Down Expand Up @@ -34,7 +34,7 @@ def extract(self, page_num_start=None, page_num_end=None):
if (page_counter >= page_num_start):
self._interpreter.process_page(page)
layout = self._device.get_result()
print 'page no.' + str(page_counter) + '; extracted page no.' + str(layout.pageid)
print('page no.' + str(page_counter) + '; extracted page no.' + str(layout.pageid))
self._pages[counter] = layout
counter = counter + 1

Expand All @@ -53,10 +53,10 @@ def parse(self, page_num=None):


def _read_file(self, filename):
print 'going to read file '+filename
print('going to read file '+filename)
parser = PDFParser(open(filename, 'rb'))
document = PDFDocument(parser)
print 'done reading file '+filename
print('done reading file '+filename)
return document


Expand All @@ -69,7 +69,7 @@ def _prepare_tools(self):


def _parse_page(self, page):
print 'parsing page '+str(page.pageid)
print('parsing page '+str(page.pageid))
pile = Pile()
pile.parse_layout(page)
piles = pile.split_piles()
Expand Down
11 changes: 5 additions & 6 deletions VS/Python/intel-doc-2-md/inteldoc2md/pile.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf8 -*-

from pdfminer.layout import LTFigure
from pdfminer.layout import LTTextBox
Expand Down Expand Up @@ -60,7 +59,7 @@ def parse_layout(self, layout):
self._adjust_to_close(obj, self.horizontals, 'y0', self._SEARCH_DISTANCE_HORIZONTAL)
self.horizontals.append(obj)
elif type(obj) == LTImage:
print 'Pile:parse_layout: type='+str(type(obj))+'; content = '+ obj.get_text().encode('utf8').strip()
print('Pile:parse_layout: type='+str(type(obj))+'; content = '+ obj.get_text().encode('utf8').strip())
self.images.append(obj)
elif type(obj) == LTCurve:
#print 'Pile:parse_layout: type='+str(type(obj))+'; content = '+ obj.get_text().encode('utf8').strip()
Expand All @@ -72,7 +71,7 @@ def parse_layout(self, layout):
#print 'Pile:parse_layout: type='+str(type(obj))+'; content = '+ obj.get_text().encode('utf8').strip()
pass
else:
print 'Pile:parse_layout: Unrecognized type: ' + str(type(obj))
print('Pile:parse_layout: Unrecognized type: ' + str(type(obj)))

@staticmethod
def get_key(x):
Expand Down Expand Up @@ -442,7 +441,7 @@ def _gen_paragraph_markdown(self, state):
elif state.type == 'exceptions':
if re.search('\#', content):
if re.search('\(\#', content):
print 'Pile:_gen_paragraph_markdown: not changing "(#"'
print('Pile:_gen_paragraph_markdown: not changing "(#"')
#pass
else:
content = content.replace('\#', '<p>#')
Expand Down Expand Up @@ -515,10 +514,10 @@ def _find_cell_texts(self, left, top, right, bottom):
def _in_range(self, left, top, right, bottom, obj):

if (obj.x0 >= obj.x1):
print 'Pile:_in_range: empty x'
print('Pile:_in_range: empty x')
return False
if (obj.y0 >= obj.y1):
print 'Pile:_in_range: empty y'
print('Pile:_in_range: empty y')
return False

left_range = (left - self._SEARCH_DISTANCE_VERTICAL) <= obj.x0
Expand Down
5 changes: 2 additions & 3 deletions VS/Python/intel-doc-2-md/inteldoc2md/writer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf8 -*-

import os
import re
Expand All @@ -14,7 +13,7 @@ def __init__(self):
class Writer(object):

def __init__(self):
self.source = 'Intel® Architecture Instruction Set Extensions and Future Features Programming Reference (May 2019)'
self.source = 'Intel® Architecture Instruction Set Extensions and Future Features Programming Reference (December 2020)'
#self.source = 'Intel® Architecture Software Developer\'s Manual (May 2018)'


Expand Down Expand Up @@ -93,7 +92,7 @@ def close_file(self, instruction, markdown):
markdown = Writer._cleanup_hyphens(markdown)

filename = './output/' + str(instruction).replace('/', '_').replace(' ', '_') + '.md'
print 'writing ' + filename
print('writing ' + filename)
fwrite = open(filename, 'w')

now = datetime.datetime.now()
Expand Down
5 changes: 2 additions & 3 deletions VS/Python/intel-doc-2-md/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# -*- coding: utf8 -*-

import sys
import os
Expand All @@ -8,7 +7,7 @@ def main(argv):
if len(argv) == 2:
filename = argv[1]
title = os.path.splitext(os.path.basename(filename))[0]
print 'Parsing', filename
print('Parsing', filename)
else:
# filename = './resources/test/jcc.pdf' # parse instruction ADD
# filename = './resources/test/selection__(p14-15).pdf' # parse instruction ADD
Expand All @@ -17,7 +16,7 @@ def main(argv):
# filename = './resources/architecture-instruction-set-extensions-programming-reference-selection.pdf'
filename = './resources/selection-ext.pdf'
title = os.path.splitext(os.path.basename(filename))[0]
print 'Parsing', filename
print('Parsing', filename)


parser = inteldoc2md.Parser(filename)
Expand Down
2 changes: 1 addition & 1 deletion VS/Python/intel-doc-2-md/resources/make.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ pdftk A=325462-sdm-vol-1-2abcd-3abcd.pdf cat A590-1134 A1142-1841 A1847-2451 A24
##pdftk A=architecture-instruction-set-extensions-programming-reference.pdf cat A56-125 output selection-ext.pdf
##pdftk A=architecture-instruction-set-extensions-programming-reference.pdf cat A52-100 A102-109 output selection-ext.pdf

pdftk A=architecture-instruction-set-extensions-programming-reference.pdf cat A62-133 output selection-ext.pdf
pdftk A=architecture-instruction-set-extensions-programming-reference.pdf cat A66-93 A106-119 output selection-ext.pdf

0 comments on commit e6c93df

Please sign in to comment.