-
Notifications
You must be signed in to change notification settings - Fork 12
/
get_user_info.py
64 lines (54 loc) · 1.62 KB
/
get_user_info.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# coding=utf-8
import requests
from bs4 import BeautifulSoup
import re
from fake_useragent import UserAgent
from pymysql import *
import time
'''
今日头条号主description信息抓取
'''
def get_content(openid):
ua = UserAgent()
url = 'http://m.toutiao.com/profile/' + str(openid) + '/'
headers = {
'User-Agent': ua.random,
}
body = requests.get(url,headers=headers).text
response = BeautifulSoup(body,'lxml')
try:
soup = response.find_all('p',{'id':'description'})[0]
soup2 = BeautifulSoup(str(soup), 'lxml')
desc = soup2.get_text()
except:
desc = '[]'
return desc
def update_user_desc():
try:
sql = """select id,uid,flag,`describe` from toutiao_media"""
cursor.execute(sql)
data = cursor.fetchall()
db.commit()
except:
db.rollback()
for i in range(len(data)):
id = data[i][0]
flag = data[i][2]
desc = data[i][3]
if flag == '今日头条' and desc == '[]':
uid = data[i][1]
con = get_content(uid)
param = [con, id]
try:
sql = """update toutiao_media set `describe` = %s where id = %s"""
cursor.execute(sql, param)
db.commit()
print('ok!!!!!')
except:
db.rollback()
else:
pass
if __name__ == '__main__':
db = connect(host='localhost', port=3306, db='spider', user='root', password='secret',charset='utf8')
cursor = db.cursor()
db.close()