From e6c93df691a5dd96c6a99b8b47a5ea8ae181e363 Mon Sep 17 00:00:00 2001 From: Henk-Jan Lebbink Date: Tue, 13 Jul 2021 19:53:33 +0200 Subject: [PATCH] python update --- Go/.idea/Go.iml | 9 +++++++++ Go/LoadIntelDoc/.idea/.gitignore | 8 ++++++++ Go/LoadIntelDoc/Main.go | 1 + VS/Python/intel-doc-2-md/inteldoc2md/parser.py | 10 +++++----- VS/Python/intel-doc-2-md/inteldoc2md/pile.py | 11 +++++------ VS/Python/intel-doc-2-md/inteldoc2md/writer.py | 5 ++--- VS/Python/intel-doc-2-md/main.py | 5 ++--- VS/Python/intel-doc-2-md/resources/make.sh | 2 +- 8 files changed, 33 insertions(+), 18 deletions(-) create mode 100644 Go/.idea/Go.iml create mode 100644 Go/LoadIntelDoc/.idea/.gitignore create mode 100644 Go/LoadIntelDoc/Main.go diff --git a/Go/.idea/Go.iml b/Go/.idea/Go.iml new file mode 100644 index 00000000..5e764c4f --- /dev/null +++ b/Go/.idea/Go.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/Go/LoadIntelDoc/.idea/.gitignore b/Go/LoadIntelDoc/.idea/.gitignore new file mode 100644 index 00000000..73f69e09 --- /dev/null +++ b/Go/LoadIntelDoc/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/Go/LoadIntelDoc/Main.go b/Go/LoadIntelDoc/Main.go new file mode 100644 index 00000000..091278bf --- /dev/null +++ b/Go/LoadIntelDoc/Main.go @@ -0,0 +1 @@ +package LoadIntelDoc diff --git a/VS/Python/intel-doc-2-md/inteldoc2md/parser.py b/VS/Python/intel-doc-2-md/inteldoc2md/parser.py index 0f6f04d2..43b3ff90 100644 --- a/VS/Python/intel-doc-2-md/inteldoc2md/parser.py +++ b/VS/Python/intel-doc-2-md/inteldoc2md/parser.py @@ -1,4 +1,4 @@ -# -*- coding: utf8 -*- + from pdfminer.pdfparser import PDFParser from pdfminer.pdfdocument import PDFDocument @@ -34,7 +34,7 @@ def extract(self, page_num_start=None, page_num_end=None): if (page_counter >= page_num_start): self._interpreter.process_page(page) layout = self._device.get_result() - print 'page no.' + str(page_counter) + '; extracted page no.' + str(layout.pageid) + print('page no.' + str(page_counter) + '; extracted page no.' + str(layout.pageid)) self._pages[counter] = layout counter = counter + 1 @@ -53,10 +53,10 @@ def parse(self, page_num=None): def _read_file(self, filename): - print 'going to read file '+filename + print('going to read file '+filename) parser = PDFParser(open(filename, 'rb')) document = PDFDocument(parser) - print 'done reading file '+filename + print('done reading file '+filename) return document @@ -69,7 +69,7 @@ def _prepare_tools(self): def _parse_page(self, page): - print 'parsing page '+str(page.pageid) + print('parsing page '+str(page.pageid)) pile = Pile() pile.parse_layout(page) piles = pile.split_piles() diff --git a/VS/Python/intel-doc-2-md/inteldoc2md/pile.py b/VS/Python/intel-doc-2-md/inteldoc2md/pile.py index 88780339..213960bd 100644 --- a/VS/Python/intel-doc-2-md/inteldoc2md/pile.py +++ b/VS/Python/intel-doc-2-md/inteldoc2md/pile.py @@ -1,4 +1,3 @@ -# -*- coding: utf8 -*- from pdfminer.layout import LTFigure from pdfminer.layout import LTTextBox @@ -60,7 +59,7 @@ def parse_layout(self, layout): self._adjust_to_close(obj, self.horizontals, 'y0', self._SEARCH_DISTANCE_HORIZONTAL) self.horizontals.append(obj) elif type(obj) == LTImage: - print 'Pile:parse_layout: type='+str(type(obj))+'; content = '+ obj.get_text().encode('utf8').strip() + print('Pile:parse_layout: type='+str(type(obj))+'; content = '+ obj.get_text().encode('utf8').strip()) self.images.append(obj) elif type(obj) == LTCurve: #print 'Pile:parse_layout: type='+str(type(obj))+'; content = '+ obj.get_text().encode('utf8').strip() @@ -72,7 +71,7 @@ def parse_layout(self, layout): #print 'Pile:parse_layout: type='+str(type(obj))+'; content = '+ obj.get_text().encode('utf8').strip() pass else: - print 'Pile:parse_layout: Unrecognized type: ' + str(type(obj)) + print('Pile:parse_layout: Unrecognized type: ' + str(type(obj))) @staticmethod def get_key(x): @@ -442,7 +441,7 @@ def _gen_paragraph_markdown(self, state): elif state.type == 'exceptions': if re.search('\#', content): if re.search('\(\#', content): - print 'Pile:_gen_paragraph_markdown: not changing "(#"' + print('Pile:_gen_paragraph_markdown: not changing "(#"') #pass else: content = content.replace('\#', '

#') @@ -515,10 +514,10 @@ def _find_cell_texts(self, left, top, right, bottom): def _in_range(self, left, top, right, bottom, obj): if (obj.x0 >= obj.x1): - print 'Pile:_in_range: empty x' + print('Pile:_in_range: empty x') return False if (obj.y0 >= obj.y1): - print 'Pile:_in_range: empty y' + print('Pile:_in_range: empty y') return False left_range = (left - self._SEARCH_DISTANCE_VERTICAL) <= obj.x0 diff --git a/VS/Python/intel-doc-2-md/inteldoc2md/writer.py b/VS/Python/intel-doc-2-md/inteldoc2md/writer.py index 29fc356c..a2db46e2 100644 --- a/VS/Python/intel-doc-2-md/inteldoc2md/writer.py +++ b/VS/Python/intel-doc-2-md/inteldoc2md/writer.py @@ -1,4 +1,3 @@ -# -*- coding: utf8 -*- import os import re @@ -14,7 +13,7 @@ def __init__(self): class Writer(object): def __init__(self): - self.source = 'Intel® Architecture Instruction Set Extensions and Future Features Programming Reference (May 2019)' + self.source = 'Intel® Architecture Instruction Set Extensions and Future Features Programming Reference (December 2020)' #self.source = 'Intel® Architecture Software Developer\'s Manual (May 2018)' @@ -93,7 +92,7 @@ def close_file(self, instruction, markdown): markdown = Writer._cleanup_hyphens(markdown) filename = './output/' + str(instruction).replace('/', '_').replace(' ', '_') + '.md' - print 'writing ' + filename + print('writing ' + filename) fwrite = open(filename, 'w') now = datetime.datetime.now() diff --git a/VS/Python/intel-doc-2-md/main.py b/VS/Python/intel-doc-2-md/main.py index c70fc261..cb35b65e 100644 --- a/VS/Python/intel-doc-2-md/main.py +++ b/VS/Python/intel-doc-2-md/main.py @@ -1,4 +1,3 @@ -# -*- coding: utf8 -*- import sys import os @@ -8,7 +7,7 @@ def main(argv): if len(argv) == 2: filename = argv[1] title = os.path.splitext(os.path.basename(filename))[0] - print 'Parsing', filename + print('Parsing', filename) else: # filename = './resources/test/jcc.pdf' # parse instruction ADD # filename = './resources/test/selection__(p14-15).pdf' # parse instruction ADD @@ -17,7 +16,7 @@ def main(argv): # filename = './resources/architecture-instruction-set-extensions-programming-reference-selection.pdf' filename = './resources/selection-ext.pdf' title = os.path.splitext(os.path.basename(filename))[0] - print 'Parsing', filename + print('Parsing', filename) parser = inteldoc2md.Parser(filename) diff --git a/VS/Python/intel-doc-2-md/resources/make.sh b/VS/Python/intel-doc-2-md/resources/make.sh index 2416b9d3..8bf9931f 100644 --- a/VS/Python/intel-doc-2-md/resources/make.sh +++ b/VS/Python/intel-doc-2-md/resources/make.sh @@ -8,5 +8,5 @@ pdftk A=325462-sdm-vol-1-2abcd-3abcd.pdf cat A590-1134 A1142-1841 A1847-2451 A24 ##pdftk A=architecture-instruction-set-extensions-programming-reference.pdf cat A56-125 output selection-ext.pdf ##pdftk A=architecture-instruction-set-extensions-programming-reference.pdf cat A52-100 A102-109 output selection-ext.pdf -pdftk A=architecture-instruction-set-extensions-programming-reference.pdf cat A62-133 output selection-ext.pdf +pdftk A=architecture-instruction-set-extensions-programming-reference.pdf cat A66-93 A106-119 output selection-ext.pdf