-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawler.py
34 lines (29 loc) · 1.33 KB
/
crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import traceback
import chromedriver_autoinstaller
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
class Crawler:
def __init__(self):
chromedriver_autoinstaller.install()
caps = DesiredCapabilities.CHROME
caps['goog:loggingPrefs'] = {'performance': 'ALL'}
options = webdriver.ChromeOptions()
options.add_argument('window-size=900,900')
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)
self.driver = webdriver.Chrome(options=options, desired_capabilities=caps)
self.driver.implicitly_wait(5)
self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
self.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source': 'navigator.'})
self.driver.set_page_load_timeout(30)
def req(self, url):
try:
print(f"[+] doing crawler req {url}")
self.driver.get(url)
except:
print(f"[x] crawler driver req fail - {url}")
print(traceback.format_exc())
return False
return self.driver