forked from sundowndev/phoneinfoga
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgooglesearch.py
125 lines (94 loc) · 3.27 KB
/
googlesearch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#
# @name : PhoneInfoga - Phone numbers OSINT tool
# @url : https://github.com/sundowndev
# @author : Raphael Cerveaux (sundowndev)
import os
import re
import json
from urllib.parse import urlencode
from bs4 import BeautifulSoup
from lib.output import *
from lib.request import send
from config import *
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
browser = None
def getFirefoxBrowser():
if os.environ.get("webdriverRemote"):
return webdriver.Remote(
os.environ.get("webdriverRemote"),
webdriver.DesiredCapabilities.FIREFOX.copy(),
)
if firefox_path == "":
return webdriver.Firefox()
binary = FirefoxBinary(firefox_path)
return webdriver.Firefox(firefox_binary=binary)
def closeBrowser():
if browser is not None:
browser.quit()
def search(req, stop):
global browser
if google_api_key and google_cx_id:
return searchApi(req, stop)
browser = getFirefoxBrowser()
try:
REQ = urlencode({"q": req, "num": stop, "hl": "en"})
URL = "https://www.google.com/search?tbs=li:1&{}&gws_rd=ssl&gl=us".format(
REQ
)
browser.get(URL)
htmlBody = browser.find_element_by_css_selector("body").get_attribute(
"innerHTML"
)
soup = BeautifulSoup(htmlBody, "html5lib")
while soup.find("div", id="recaptcha") is not None:
warn(
"You are temporary blacklisted from Google search. Complete the captcha then press ENTER."
)
token = ask(">")
htmlBody = browser.find_element_by_css_selector("body").get_attribute(
"innerHTML"
)
soup = BeautifulSoup(htmlBody, "html5lib")
results = soup.find("div", id="search").find_all("div", class_="g")
links = []
for result in results:
url = result.find("a").get("href")
url = re.sub(r"(?:\/url\?q\=)", "", url)
url = re.sub(r"(?:\/url\?url\=)", "", url)
url = re.sub(r"(?:\&sa\=)(?:.*)", "", url)
url = re.sub(r"(?:\&rct\=)(?:.*)", "", url)
if re.match(r"^(?:\/search\?q\=)", url) is not None:
url = "https://google.com" + url
if url is not None:
links.append(url)
return links
except Exception as e:
error(
"Request failed. Please retry or open an issue on https://github.com/sundowndev/PhoneInfoga."
)
print(e)
return []
def searchApi(req, stop):
options = urlencode(
{"q": req, "key": google_api_key, "cx": google_cx_id, "num": stop}
)
r = send("GET", "https://www.googleapis.com/customsearch/v1?%s" % (options))
response = r.json()
if "error" in response:
error(
"Error while fetching Google search API. Maybe usage limit ? Please verify your keys."
)
print(response["error"])
askForExit()
return []
if "items" not in response:
return []
results = response["items"]
links = []
for result in results:
if result["link"] is not None:
links.append(result["link"])
return links