Skip to content

Commit

Permalink
新增 requirements.txt文件
Browse files Browse the repository at this point in the history
修改 将原先的ua获取类升级为最新的版本
  • Loading branch information
Jerry1014 committed Jul 11, 2020
1 parent dda327c commit 227b30d
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 29 deletions.
5 changes: 1 addition & 4 deletions .idea/FundCrawler.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions CrawlingWebpage.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from multiprocessing import Process, Queue, Event
from time import time, sleep

from FakeUA import fake_ua
from FakeUAGetter import my_fake_ua


class GetPage:
Expand All @@ -33,7 +33,7 @@ def get_page_context(cls, url, timeout, *args) -> tuple:
:param url:要爬取的url
:return: 返回二元组 爬取结果,网页内容
"""
header = {"User-Agent": fake_ua.random}
header = {"User-Agent": my_fake_ua.random}
import requests
try:
page = requests.get(url, headers=header, timeout=timeout)
Expand Down
40 changes: 18 additions & 22 deletions FakeUA.py → FakeUAGetter.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,34 @@
# -*- coding:utf-8 -*-
"""
使用方法
from FakeUA import fake_ua
fake_ua.random # return a random ua
获取虚假ua的模块
"""

import random
from socket import timeout

from fake_useragent import UserAgent, FakeUserAgentError, VERSION

IF_UPDATE_FAKE_UA = False
try:
from fake_useragent import UserAgent, FakeUserAgentError
offline = False
except ImportError:
print('没有安装fake_useragent模块,offline将设置为True')
offline = True
FakeUserAgentError = Exception


class FakeUA:
"""
用于提供ua,单例模式,fake_ua能用则用,否则用自带的ua集,通过FakeUA.random获取随机ua
"""

def __new__(cls, *args, **kwargs):
if not hasattr(cls, 'instance'):
cls.instance = super(FakeUA, cls).__new__(cls)
return cls.instance

def __init__(self, if_update_fake_ua=False):
def __init__(self, user_offline=True):
user_offline = offline or user_offline
self.fake_ua = None
try:
if if_update_fake_ua:
self.fake_ua = UserAgent(path='fake_useragent%s.json' % VERSION)
self.fake_ua.update()
else:
raise FakeUserAgentError()
except FakeUserAgentError:
if user_offline:
print('offline设置为True,将不更新ua库')
raise timeout()
self.fake_ua = UserAgent()
except (FakeUserAgentError, timeout):
print('fake_useragent库更新失败,正在使用本地ua库')
self.some = [
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60',
'Opera/8.0 (Windows NT 5.1; U; en)',
Expand Down Expand Up @@ -66,6 +64,4 @@ def __getattr__(self, item):
raise AttributeError(r"Object does'n has attribute '%s'" % item)


print('正在初始化随机UA模块,若此步消耗了大量时间,请将FakeUA.py中的IF_UPDATE_FAKE_UA修改为False(默认值)')
fake_ua = FakeUA(IF_UPDATE_FAKE_UA)
print('随机UA模块初始化完成')
my_fake_ua = FakeUA()
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
requests~=2.24.0

0 comments on commit 227b30d

Please sign in to comment.