-
Notifications
You must be signed in to change notification settings - Fork 2
/
fake_website.py
205 lines (185 loc) · 6.46 KB
/
fake_website.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# @Description: 获取虚拟信息
# @PreInstall: BeautifulSoup,lxml,requests
# @Author : https://www.bajins.com
# @File : fake_website.py
# @Version: 1.0.0
# @Time : 2019/12/26/026 12:10
# @Project: reptile-python
# @Package:
# @Software: PyCharm
import random
import re
import requests
from bs4 import BeautifulSoup
from urllib3.exceptions import InsecureRequestWarning
# 移除不验证SSL的警告
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/72.0.3626.109 Safari/537.36 "
}
hao_wei_chi_url = {
# 美国
"usa": "http://www.haoweichi.com",
# 加拿大
"canada": "http://www.haoweichi.com/Others/jia_na_da_shen_fen_sheng_cheng",
# 澳大利亚
"australia": "http://www.haoweichi.com/Others/ao_da_li_ya_ren_shen_fen_sheng_cheng",
# 法国
"france": "http://www.haoweichi.com/Others/fa_guo_ren_shen_fen_sheng_cheng",
# 意大利
"italy": "http://www.haoweichi.com/Others/yi_da_li_ren_shen_fen_sheng_cheng",
}
def get_hao_wei_chi(url):
result = BeautifulSoup(requests.get(url, timeout=600, verify=False).text, headers=headers, features="lxml")
parent = result.select("body > div.container.index > div.row.main-left > "
"div.col-md-9.col-sm-9.col-xs-12.no-margin.no-padding > div > div.row.no-margin")
parent = parent[0].find_all("input")
data = {
# 全名
"full_name": parent[0].attrs["value"],
# 性别
"gender": parent[1].attrs["value"],
# 名字
"first_name": parent[2].attrs["value"],
# 姓
"last_name": parent[3].attrs["value"],
# 中间名
"middle_name": parent[4].attrs["value"],
# 称呼
"call": parent[5].attrs["value"],
# 生日
"birthday": parent[6].attrs["value"],
# 州
"State": parent[7].attrs["value"],
# 街道地址
"street_address": parent[8].attrs["value"],
# 城市
"city": parent[9].attrs["value"],
# 电话
"phone": parent[10].attrs["value"],
# 邮编
"postcode": parent[11].attrs["value"],
# 州全称
"full_state_name": parent[12].attrs["value"],
# SSN社会保险号
"ssn_social_security_number": parent[13].attrs["value"],
# 临时邮箱
"temporary_mailbox": parent[14].attrs["value"],
# 网络用户名
"network_username": parent[15].attrs["value"],
# 随机密码
"random_code": parent[16].attrs["value"],
# 信用卡类型
"credit_card_type": parent[17].attrs["value"],
# 信用卡号
"credit_card_number": parent[18].attrs["value"],
# CVV2
"cvv2": parent[19].attrs["value"],
# 有效期
"expiration_date": parent[20].attrs["value"],
# 职位(职称)
"position ": parent[21].attrs["value"],
# 所属公司
"affiliates ": parent[22].attrs["value"],
# 身高
"height ": parent[23].attrs["value"],
# 体重
"body_weight ": parent[24].attrs["value"],
}
return data
def get_fake_name_generator(url, params):
"""
根据url和参数生成相应信息并返回
:param url:
:param params:
:return:
"""
result = BeautifulSoup(requests.get(url, params, headers=headers, timeout=600, verify=False).text, features="lxml")
parent = result.select("#details > div.content > div.info > div")[0]
# 替换<br/>为-
_address = parent.select("div.address > .adr")[0].get_text("-", strip=True).split(",")
address = _address[0].split("-")
full_name = parent.select("div.address > h3")[0].text
_full_name = full_name.split(" ")
firstname = _full_name[0]
lastname = _full_name[2]
data = {
"full_name": full_name,
"first_name": firstname,
"last_name": lastname,
"address": address[0],
"city": address[1],
"zip_code": _address[1].strip(),
}
extras = parent.find_all("dl")
for extra in extras:
dt = extra.find("dt")
dd = extra.find("dd")
content = dd.text
# 替换除中文字母数字空格以外的内容
name = re.sub(r"[^\u4E00-\u9FA5A-Za-z\d\s]", "", dt.text, 0, re.I)
# 替换空格并转小写
name = re.sub(r"\s", "_", name, 0, re.I).lower()
if name == "email_address":
email = re.sub(r"\s.*$", "", dd.text, 0, re.I)
content = f"""{email},{dd.find("a").attrs["href"]}"""
if name == "phone":
if content.find('-') == -1:
# 区号
data["area_code"] = "001"
else:
data["area_code"] = content[:content.find('-')]
content = content[content.find('-') + 1:]
if name == "qr_code":
continue
if name == "birthday":
birthday = content.split(",")
# 高中毕业时间
data["high_school_graduation"] = int(birthday[1]) + 17
if name == 'ssn' and "online" in content:
ssn = content.split(' ')[0].replace("XXXX", "".join(random.choices("0123456789", k=4)))
content = f"""{ssn},{dd.find("a").attrs["href"]}"""
# 去掉开头或者结尾空白字符
data[name] = content.strip()
return data
def get_fake_name_generator_index(params=None):
"""
生成单个
:param params:
:return:
"""
if params is None:
params = {
# 姓名命名的国家
"n": "us",
# 国家:ca、us
"c": "ca",
# 性别:random、male、female
"gen": "random",
}
return get_fake_name_generator("https://www.fakenamegenerator.com/index.php", params)
def get_fake_name_generator_advanced(params=None):
"""
高级生成,可指定年龄阶段
:param params:
:return:
"""
if params is None:
params = {
"t": "country",
# 数组多选,最多5个
"n[]": "us",
# 数组多选,最多5个
"c[]": "us",
"gen": "78",
"age-min": "18",
"age-max": "25",
}
return get_fake_name_generator("https://www.fakenamegenerator.com/advanced.php", params)
if __name__ == '__main__':
print(get_fake_name_generator_index())
print(get_fake_name_generator_advanced())