Skip to content

Commit

Permalink
feat: 添加cookie选项,此为可选参数
Browse files Browse the repository at this point in the history
  • Loading branch information
dataabc committed Jan 2, 2020
1 parent e208edb commit 2df4092
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 8 deletions.
5 changes: 3 additions & 2 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
"since_date": "2018-01-01",
"write_mode": ["csv"],
"original_pic_download": 1,
"retweet_pic_download": 0,
"retweet_pic_download": 0,
"original_video_download": 1,
"retweet_video_download": 0,
"retweet_video_download": 0,
"cookie": "your cookie",
"mysql_config": {
"host": "localhost",
"port": 3306,
Expand Down
16 changes: 10 additions & 6 deletions weibo.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def __init__(self, config):
'original_video_download'] # 取值范围为0、1, 0代表不下载原创微博视频,1代表下载
self.retweet_video_download = config[
'retweet_video_download'] # 取值范围为0、1, 0代表不下载转发微博视频,1代表下载
self.cookie = {'Cookie': config['cookie']}
self.mysql_config = config['mysql_config'] # MySQL数据库连接配置,可以不填
user_id_list = config['user_id_list']
if not isinstance(user_id_list, list):
Expand Down Expand Up @@ -102,7 +103,7 @@ def is_date(self, since_date):
def get_json(self, params):
"""获取网页中json数据"""
url = 'https://m.weibo.cn/api/container/getIndex?'
r = requests.get(url, params=params)
r = requests.get(url, params=params, cookies=self.cookie)
return r.json()

def get_weibo_json(self, page):
Expand Down Expand Up @@ -191,7 +192,7 @@ def get_user_info(self):
def get_long_weibo(self, id):
"""获取长微博"""
url = 'https://m.weibo.cn/detail/%s' % id
html = requests.get(url).text
html = requests.get(url, cookies=self.cookie).text
html = html[html.find('"status":'):]
html = html[:html.rfind('"hotScheme"')]
html = html[:html.rfind(',')]
Expand Down Expand Up @@ -254,7 +255,7 @@ def download_one_file(self, url, file_path, type, weibo_id):
if not os.path.isfile(file_path):
s = requests.Session()
s.mount(url, HTTPAdapter(max_retries=5))
downloaded = s.get(url, timeout=(5, 10))
downloaded = s.get(url, cookies=self.cookie, timeout=(5, 10))
with open(file_path, 'wb') as f:
f.write(downloaded.content)
except Exception as e:
Expand Down Expand Up @@ -560,9 +561,12 @@ def get_one_page(self, page):

def get_page_count(self):
"""获取微博页数"""
weibo_count = self.user['statuses_count']
page_count = int(math.ceil(weibo_count / 10.0))
return page_count
try:
weibo_count = self.user['statuses_count']
page_count = int(math.ceil(weibo_count / 10.0))
return page_count
except KeyError:
sys.exit(u'此用户微博可能需要cookie才能爬取')

def get_write_info(self, wrote_count):
"""获取要写入的微博信息"""
Expand Down

0 comments on commit 2df4092

Please sign in to comment.