forked from injetlee/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of https://github.com/injetlee/Python
- Loading branch information
Showing
3 changed files
with
278 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
# -*- coding:UTF-8 -*- | ||
|
||
import requests , time ,random | ||
import hmac ,json ,base64 | ||
from bs4 import BeautifulSoup | ||
from hashlib import sha1 | ||
import TencentYoutuyun | ||
from PIL import Image | ||
import uuid | ||
|
||
|
||
|
||
def recognition_captcha(data): | ||
''' 识别验证码 ''' | ||
|
||
file_id = str(uuid.uuid1()) | ||
filename = 'captcha_'+ file_id +'.gif' | ||
filename_png = 'captcha_'+ file_id +'.png' | ||
|
||
if(data is None): | ||
return | ||
data = base64.b64decode(data.encode('utf-8')) | ||
with open( filename ,'wb') as fb: | ||
fb.write( data ) | ||
|
||
appid = 'appid' # 接入优图服务,注册账号获取 | ||
secret_id = 'secret_id' | ||
secret_key = 'secret_key' | ||
userid= 'userid' | ||
end_point = TencentYoutuyun.conf.API_YOUTU_END_POINT | ||
|
||
youtu = TencentYoutuyun.YouTu(appid, secret_id, secret_key, userid, end_point) # 初始化 | ||
|
||
# 拿到的是gif格式,而优图只支持 JPG PNG BMP 其中之一,这时我们需要 pip install Pillow 来转换格式 | ||
im = Image.open( filename) | ||
im.save( filename_png ,"png") | ||
im.close() | ||
|
||
result = youtu.generalocr( filename_png , data_type = 0 , seq = '') # 0代表本地路径,1代表url | ||
|
||
return result | ||
|
||
|
||
def get_captcha(sessiona,headers): | ||
''' 获取验证码 ''' | ||
|
||
need_cap = False | ||
|
||
while( need_cap is not True): | ||
try: | ||
sessiona.get('https://www.zhihu.com/signin',headers=headers) # 拿cookie:_xsrf | ||
resp2 = sessiona.get('https://www.zhihu.com/api/v3/oauth/captcha?lang=cn',headers=headers) # 拿cookie:capsion_ticket | ||
need_cap = json.loads(resp2.text)["show_captcha"] # {"show_captcha":false} 表示不用验证码 | ||
time.sleep( 0.5 + random.randint(1,9)/10 ) | ||
except Exception: | ||
continue | ||
|
||
try: | ||
resp3 = sessiona.put('https://www.zhihu.com/api/v3/oauth/captcha?lang=cn',headers=headers) # 拿到验证码数据,注意是put | ||
img_data = json.loads(resp3.text)["img_base64"] | ||
except Exception: | ||
return | ||
|
||
|
||
return img_data | ||
|
||
def create_point( point_data, confidence ): | ||
''' 获得点阵 ''' | ||
|
||
# 实际操作下,套路不深,x间隔25,y相同,共7个点 ,先模拟意思一下 | ||
points = {1:[ 20.5,25.1875],2:[ 45.5,25.1875],3:[ 70.5,25.1875],4:[ 95.5,25.1875],5:[120.5,25.1875],6:[145.5,25.1875],7:[170.5,25.1875]} | ||
wi = 0 | ||
input_points = [] | ||
|
||
for word in ( point_data['items'][0]['words'] ): | ||
wi = wi+1 | ||
if( word['confidence'] < confidence ): | ||
try: | ||
input_points.append(points[wi]) # 倒置的中文,优图识别不出来,置信度会低于0.5 | ||
except KeyError: | ||
continue | ||
|
||
if( len(input_points) > 2 or len(input_points) == 0 ): | ||
return [] # 7个字中只有2个倒置中文的成功率高 | ||
|
||
result = {} | ||
result['img_size']=[200,44] | ||
result['input_points']=input_points | ||
result = json.dumps(result) | ||
print(result) | ||
return result | ||
|
||
def bolting(k_low,k_hi,k3_confidence): | ||
''' 筛选把握大的进行验证 ''' | ||
|
||
start = time.time() | ||
|
||
is_success = False | ||
while(is_success is not True): | ||
|
||
points_len = 1 | ||
angle = -20 | ||
img_ko = [] | ||
|
||
while(points_len != 21 or angle < k_low or angle > k_hi ): | ||
img_data = get_captcha(sessiona,headers) | ||
img_ko = recognition_captcha(img_data) | ||
|
||
## json.dumps 序列化时对中文默认使用的ascii编码.想输出真正的中文需要指定ensure_ascii=False | ||
# img_ko_json = json.dumps(img_ko , indent =2 ,ensure_ascii=False ) | ||
# img_ko_json = img_ko_json.encode('raw_unicode_escape') ## 因为python3的原因,也因为优图自身的原因,此处要特殊处理 | ||
|
||
# with open( "json.txt" ,'wb') as fb: | ||
# fb.write( img_ko_json ) | ||
|
||
try: | ||
points_len = len(img_ko['items'][0]['itemstring']) | ||
angle = img_ko['angle'] | ||
except Exception: | ||
points_len = 1 | ||
angle = -20 | ||
continue | ||
|
||
# print(img_ko_json.decode('utf8')) ## stdout用的是utf8,需转码才能正常显示 | ||
# print('-'*50) | ||
|
||
input_text = create_point( img_ko ,k3_confidence ) | ||
if(type(input_text) == type([])): | ||
continue | ||
|
||
data = { | ||
"input_text":input_text | ||
} | ||
|
||
# 提交过快会被拒绝,{"code":120005,"name":"ERR_VERIFY_CAPTCHA_TOO_QUICK"} ,假装思考5秒钟 | ||
time.sleep( 4 + random.randint(1,9)/10 ) | ||
try: | ||
resp5 = sessiona.post('https://www.zhihu.com/api/v3/oauth/captcha?lang=cn',data,headers=headers) | ||
except Exception: | ||
continue | ||
|
||
print("angle: "+ str(angle) ) | ||
print(BeautifulSoup(resp5.content ,'html.parser')) # 如果验证成功,会回应{"success":true},开心 | ||
print('-'*50) | ||
try: | ||
is_success = json.loads(resp5.text)["success"] | ||
except KeyError: | ||
continue | ||
|
||
end = time.time() | ||
|
||
return end-start | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
sessiona = requests.Session() | ||
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0','authorization':'oauth c3cef7c66a1843f8b3a9e6a1e3160e20'} | ||
|
||
k3_confidence = 0.71 | ||
|
||
''' | ||
# 可视化数据会被保存在云端供浏览 | ||
# https://plot.ly/~weldon2010/4 | ||
# 纯属学习,并未看出"角度"范围扩大对图像识别的影响,大部分时候60s内能搞定,说明优图还是很强悍的,识别速度也非常快 | ||
''' | ||
runtime_list_x = [] | ||
runtime_list_y = [] | ||
nn = range(1,11) # 愿意的话搞多线程,1百万次更有意思 | ||
|
||
# 成功尝试100次,形成2维数据以热力图的方式展示 | ||
for y in nn : | ||
for x in nn : | ||
runtime_list_x.append( bolting(-3,3,k3_confidence) ) | ||
print( "y: " + str(runtime_list_y) ) | ||
print( "x: " + str(runtime_list_x) ) | ||
runtime_list_y.append(runtime_list_x.copy()) | ||
runtime_list_x = [] | ||
|
||
print ("-"*30) | ||
print( runtime_list_y ) | ||
print ("-"*30) | ||
|
||
# pip install plotly 数据可视化 | ||
import plotly | ||
import plotly.graph_objs as go | ||
plotly.tools.set_credentials_file(username='username', api_key='username') # 设置账号,去官网注册 | ||
trace = go.Heatmap(z = runtime_list_y , x = [n for n in nn ] ,y =[n for n in nn ]) | ||
data=[trace] | ||
plotly.plotly.plot(data, filename='weldon-time2-heatmap') | ||
|
||
# 尝试后发现一个特点,基本都是1~2个倒置中文,这样我们可以借此提速 | ||
# 角度范围放大,仅当识别出倒置中文为1~2个时才提交验证否则放弃继续寻找 | ||
|
||
### chcp 65001 (win下改变cmd字符集) | ||
### python c:\python34\image_recognition_zhihu.py | ||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,27 +1,84 @@ | ||
import requests,time | ||
# -*- coding:UTF-8 -*- | ||
|
||
import requests , time | ||
import hmac ,json | ||
from bs4 import BeautifulSoup | ||
url = 'https://www.zhihu.com/login/email' | ||
def get_captcha(data): | ||
from hashlib import sha1 | ||
|
||
|
||
def get_captcha(data,need_cap): | ||
''' 处理验证码 ''' | ||
if need_cap is False: | ||
return | ||
with open('captcha.gif','wb') as fb: | ||
fb.write(data) | ||
return input('captcha') | ||
return input('captcha:') | ||
|
||
def get_signature(grantType,clientId,source,timestamp): | ||
''' 处理签名 ''' | ||
|
||
hm = hmac.new(b'd1b964811afb40118a12068ff74a12f4',None,sha1) | ||
hm.update(str.encode(grantType)) | ||
hm.update(str.encode(clientId)) | ||
hm.update(str.encode(source)) | ||
hm.update(str.encode(timestamp)) | ||
|
||
def login(username,password,oncaptcha): | ||
sessiona = requests.Session() | ||
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'} | ||
xyz = sessiona.get('https://www.zhihu.com/#signin',headers=headers).content | ||
_xsrf = BeautifulSoup(sessiona.get('https://www.zhihu.com/#signin',headers=headers).content,'html.parser').find('input',attrs={'name':'_xsrf'}).get('value') | ||
return str(hm.hexdigest()) | ||
|
||
|
||
|
||
def login(username,password,oncaptcha,sessiona,headers): | ||
''' 处理登录 ''' | ||
|
||
resp1 = sessiona.get('https://www.zhihu.com/signin',headers=headers) # 拿cookie:_xsrf | ||
resp2 = sessiona.get('https://www.zhihu.com/api/v3/oauth/captcha?lang=cn',headers=headers) # 拿cookie:capsion_ticket | ||
need_cap = json.loads(resp2.text)["show_captcha"] # {"show_captcha":false} 表示不用验证码 | ||
|
||
grantType = 'password' | ||
clientId = 'c3cef7c66a1843f8b3a9e6a1e3160e20' | ||
source ='com.zhihu.web' | ||
timestamp = str((time.time()*1000)).split('.')[0] # 签名只按这个时间戳变化 | ||
|
||
captcha_content = sessiona.get('https://www.zhihu.com/captcha.gif?r=%d&type=login'%(time.time()*1000),headers=headers).content | ||
|
||
data = { | ||
"_xsrf":_xsrf, | ||
"email":username, | ||
"client_id":clientId, | ||
"grant_type":grantType, | ||
"timestamp":timestamp, | ||
"source":source, | ||
"signature": get_signature(grantType,clientId,source,timestamp), # 获取签名 | ||
"username":username, | ||
"password":password, | ||
"remember_me":True, | ||
"captcha":oncaptcha(captcha_content) | ||
"lang":"cn", | ||
"captcha":oncaptcha(captcha_content,need_cap), # 获取图片验证码 | ||
"ref_source":"other_", | ||
"utm_source":"" | ||
} | ||
resp = sessiona.post('https://www.zhihu.com/login/email',data,headers=headers).content | ||
print(resp) | ||
|
||
print("**2**: "+str(data)) | ||
print("-"*50) | ||
resp = sessiona.post('https://www.zhihu.com/api/v3/oauth/sign_in',data,headers=headers).content | ||
print(BeautifulSoup(resp,'html.parser')) | ||
|
||
print("-"*50) | ||
return resp | ||
|
||
if __name__ == "__main__": | ||
login('email','password',get_captcha) | ||
sessiona = requests.Session() | ||
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0','authorization':'oauth c3cef7c66a1843f8b3a9e6a1e3160e20'} | ||
|
||
login('[email protected]','12345678',get_captcha,sessiona,headers) # 用户名密码换自己的就好了 | ||
resp = sessiona.get('https://www.zhihu.com/inbox',headers=headers) # 登录进去了,可以看私信了 | ||
print(BeautifulSoup(resp.content ,'html.parser')) | ||
|
||
|
||
|
||
|
||
### chcp 65001 (win下改变cmd字符集) | ||
### python c:\python34\login_zhihu.py | ||
### 有非常无语的事情发生,还以为代码没生效 | ||
|
||
|
||
|
||
|
||
|