-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathprint_pdf_decorated.py
162 lines (139 loc) · 5.41 KB
/
print_pdf_decorated.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env python3
# based on: https://groups.google.com/forum/#!topic/selenium-users/PuDpVblziAofrom __future__ import print_function
import getopt
import sys
import datetime
import re
from os import getenv
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import json, base64
class h1:
def __init__(self, url, chromedriver, print_options):
self.url = url
self.chromedriver = chromedriver
self.print_options = print_options
my_date_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
def setup(self):
"""Executed before running tests"""
def teardown(self):
"""Executed after running tests"""
def test_decorator(func):
""" setup, arange, act, assert, teardown """
def wrapper(self, *args, **kwargs):
print('executing {0}'.format(self.setup), file = sys.stderr)
self.setup()
print('executing {0}'.format(func), file = sys.stderr)
func(self, *args, **kwargs)
print('executing {0}'.format(self.teardown), file = sys.stderr)
self.teardown()
return wrapper
@test_decorator
def visit_site(self):
result = self.print_pdf(url, homedir + '/' + 'Downloads' + '/' + 'chromedriver', print_options)
with open(file = output_file, mode = 'wb') as f:
f.write(result)
def print_pdf(self, url, chromedriver = './chromedriver', print_options = {}):
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chromedriver, options = options)
if debug:
print('Loading url: "{}"'.format(url), file = sys.stderr)
driver.get(url)
params = {
'landscape': False,
'displayHeaderFooter': False,
'printBackground': True,
'preferCSSPageSize': False,
}
params.update(print_options)
if debug:
print('params: {}'.format(params))
result = self.send_command_and_get_result(driver, 'Page.printToPDF', params)
# print( result.keys())
driver.quit()
return base64.b64decode(result['data'])
# https://stackoverflow.com/questions/47023842/selenium-chromedriver-printtopdf
# https://www.python-course.eu/python3_formatted_output.php
def send_command_and_get_result(self, driver, cmd, params = {}):
post_url = driver.command_executor._url + '/session/{0:s}/chromium/send_command_and_get_result'.format( driver.session_id)
if debug:
print ('POST to {}'.format(post_url))
print('params: {}'.format(json.dumps({'cmd': cmd, 'params': params})))
response = driver.command_executor._request('POST', post_url, json.dumps({'cmd': cmd, 'params': params}))
if debug:
print( response.keys())
# NOTE: 'has_key()' is even removed from P 3.x
# see also: https://stackoverflow.com/questions/1323410/should-i-use-has-key-or-in-on-python-dicts
# NOTE: KeyError: 'status'
# early imlementation returns JSON with ['status', 'sessionId', 'value'] keys
# with recent versions of chrome response contains only has ['value']['data']
# print( response.keys())
if ('status' in response ) and response['status']:
raise Exception(response.get('value'))
return response.get('value')
# NOTE: on Windows 7 node occationally seeing commctl32.dll warning:
# 'A program running on this computer is trying to display a message'
# no meaningful message shown when 'View the Message' is chosen - repeated multiple times
if __name__ == '__main__':
# https://docs.python.org/2/library/getopt.html
try:
opts, args = getopt.getopt(sys.argv[1:], 'hdi:o:s:p:', ['help', 'debug', 'input=', 'output=','size=', 'pages='])
except getopt.GetoptError as err:
print('usage: print_pdf.py --input <html page> --output <output file>')
print(str(err))
exit()
output_file = None
url = None
paper_size = None
pages = None
global debug
debug = False
for option, argument in opts:
if option == '-d':
debug = True
elif option in ('-h', '--help'):
print ('usage: print_pdf.py --input <html page> --output <output file>')
exit()
elif option in ('-i', '--input'):
url = argument
elif option in ('-p', '--pages'):
pages = argument
elif option in ('-s', '--size'):
paper_size = argument
elif option in ('-o', '--output'):
output_file = argument
else:
assert False, 'unhandled option: {}'.format(option)
if url == None or output_file == None:
print ('usage: print_pdf.py --input <html page> --output <output file>')
exit()
if getenv('OS') != None :
homedir = getenv('USERPROFILE').replace('\\', '/')
else:
homedir = getenv('HOME')
# NOTE: when schema prefix is omitted from the url, an exception is raised:
# selenium.common.exceptions.InvalidArgumentException:
# Message: invalid argument (Session info: headless chrome=76.0.3809.100)
match = re.match(r'^(https?://).*$', url, re.UNICODE)
if match == None:
url = 'https://{}'.format(url)
match = None
if paper_size != None:
match = re.match(r'a4', paper_size, re.UNICODE|re.IGNORECASE )
if match == None:
print_options = {}
else:
# specify custom page size (default is Letter), through dimensions
print_options = {
'paperWidth': 8.27,
'paperHeight': 11.69,
}
if pages != None:
print_options.update({'pageRanges': pages})
if debug:
print('opts: {}'.format(opts))
# exit()
helper = h1(url, homedir + '/' + 'Downloads' + '/' + 'chromedriver', print_options)
helper.visit_site()