forked from NanmiCoder/MediaCrawler
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
19 changed files
with
614 additions
and
253 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from abc import ABC, abstractmethod | ||
|
||
|
||
class AbstractCrawler(ABC): | ||
@abstractmethod | ||
def init_config(self, **kwargs): | ||
pass | ||
|
||
@abstractmethod | ||
async def start(self): | ||
pass | ||
|
||
@abstractmethod | ||
async def search_posts(self): | ||
pass | ||
|
||
@abstractmethod | ||
async def get_comments(self, item_id: int): | ||
pass | ||
|
||
|
||
class AbstractLogin(ABC): | ||
@abstractmethod | ||
async def begin(self): | ||
pass | ||
|
||
@abstractmethod | ||
async def check_login_state(self): | ||
pass | ||
|
||
@abstractmethod | ||
async def login_by_qrcode(self): | ||
pass | ||
|
||
@abstractmethod | ||
async def login_by_mobile(self): | ||
pass | ||
|
||
@abstractmethod | ||
async def login_by_cookies(self): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
import config | ||
|
||
|
||
class PhonePool: | ||
"""phone pool class""" | ||
|
||
def __init__(self): | ||
self.phones = [] | ||
self.used_phones = set() | ||
|
||
def add_phone(self, phone): | ||
"""add phone to the pool""" | ||
if phone not in self.phones: | ||
self.phones.append(phone) | ||
return True | ||
return False | ||
|
||
def remove_phone(self, phone): | ||
"""remove phone from the pool""" | ||
if phone in self.used_phones: | ||
self.phones.remove(phone) | ||
self.used_phones.remove(phone) | ||
return True | ||
return False | ||
|
||
def get_phone(self): | ||
"""get phone and mark as used""" | ||
if self.phones: | ||
left_phone = self.phones.pop(0) | ||
self.used_phones.add(left_phone) | ||
return left_phone | ||
return None | ||
|
||
def clear(self): | ||
"""clear phone pool""" | ||
self.phones = [] | ||
self.used_phones = set() | ||
|
||
|
||
class IPPool: | ||
def __init__(self): | ||
self.ips = [] | ||
self.used_ips = set() | ||
|
||
def add_ip(self, ip): | ||
"""添加ip""" | ||
if ip not in self.ips: | ||
self.ips.append(ip) | ||
return True | ||
return False | ||
|
||
def remove_ip(self, ip): | ||
"""remove ip""" | ||
if ip in self.used_ips: | ||
self.ips.remove(ip) | ||
self.used_ips.remove(ip) | ||
return True | ||
return False | ||
|
||
def get_ip(self): | ||
"""get ip and mark as used""" | ||
if self.ips: | ||
left_ips = self.ips.pop(0) | ||
self.used_ips.add(left_ips) | ||
return left_ips | ||
return None | ||
|
||
def clear(self): | ||
""" clear ip pool""" | ||
self.ips = [] | ||
self.used_ips = set() | ||
|
||
|
||
class AccountPool: | ||
"""account pool class""" | ||
|
||
def __init__(self): | ||
self.phone_pool = PhonePool() | ||
self.ip_pool = IPPool() | ||
|
||
def add_account(self, phone, ip): | ||
"""add account to pool with phone and ip""" | ||
if self.phone_pool.add_phone(phone) and self.ip_pool.add_ip(ip): | ||
return True | ||
return False | ||
|
||
def remove_account(self, phone, ip): | ||
"""remove account from pool """ | ||
if self.phone_pool.remove_phone(phone) and self.ip_pool.remove_ip(ip): | ||
return True | ||
return False | ||
|
||
def get_account(self): | ||
"""get account if no account, reload account pool""" | ||
phone = self.phone_pool.get_phone() | ||
ip = self.ip_pool.get_ip() | ||
if not phone or not ip: | ||
reload_account_pool(self) | ||
return self.get_account() | ||
return phone, ip | ||
|
||
def clear_account(self): | ||
"""clear account pool""" | ||
self.phone_pool.clear() | ||
self.ip_pool.clear() | ||
|
||
|
||
def reload_account_pool(apo: AccountPool): | ||
"""reload account pool""" | ||
apo.clear_account() | ||
for phone, ip in zip(config.PHONE_LIST, config.IP_PROXY_LIST): | ||
apo.add_account(phone, ip) | ||
|
||
|
||
def create_account_pool() -> AccountPool: | ||
"""create account pool""" | ||
apo = AccountPool() | ||
reload_account_pool(apo=apo) | ||
return apo | ||
|
||
|
||
if __name__ == '__main__': | ||
import time | ||
|
||
ac_pool = create_account_pool() | ||
p, i = ac_pool.get_account() | ||
while p: | ||
print(f"get phone:{p}, ip proxy:{i} from account pool") | ||
p, i = ac_pool.get_account() | ||
time.sleep(1) |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .base_config import * | ||
from .account_config import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# -*- coding: utf-8 -*- | ||
# account_config.py | ||
|
||
PHONE_LIST = [ | ||
"13012345671", | ||
"13012345672", | ||
"13012345673", | ||
"13012345674", | ||
"13012345675", | ||
"13012345676", | ||
# ... | ||
] | ||
|
||
IP_PROXY_LIST = [ | ||
"111.122.xx.xx1:8888", | ||
"111.122.xx.xx2:8888", | ||
"111.122.xx.xx3:8888", | ||
"111.122.xx.xx4:8888", | ||
"111.122.xx.xx5:8888", | ||
"111.122.xx.xx6:8888", | ||
# ... | ||
] | ||
|
||
IP_PROXY_PROTOCOL = "http://" | ||
IP_PROXY_USER = "xxxx" | ||
IP_PROXY_PASSWORD = "xxxx" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
PLATFORM = "xhs" | ||
KEYWORDS = "健身,旅游" | ||
LOGIN_TYPE = "qrcode" # qrcode or phone or cookies | ||
# If it's on the Xiaohongshu platform, only the web_session cookie will be kept. | ||
# xhs cookie format -> web_session=040069b2acxxxxxxxxxxxxxxxxxxxx; | ||
COOKIES = "" | ||
|
||
# redis config | ||
REDIS_DB_HOST = "redis://127.0.0.1" # your redis host | ||
REDIS_DB_PWD = "123456" # your redis password | ||
|
||
# enable ip proxy | ||
ENABLE_IP_PROXY = False | ||
|
||
# retry_interval | ||
RETRY_INTERVAL = 60 * 30 # 30 minutes | ||
|
||
# playwright headless | ||
HEADLESS = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.