forked from whusnoopy/renrenBackup
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fetch.py
134 lines (89 loc) · 4.09 KB
/
fetch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# coding: utf8
import argparse
from playhouse.shortcuts import model_to_dict
from config import config
def prepare_db():
from models import database, FetchedUser, User, Comment, Like
from models import Status, Gossip, Album, Photo, Blog
with database:
database.create_tables([FetchedUser, User, Comment, Like])
database.create_tables([Status, Gossip, Album, Photo, Blog])
def prepare_crawler(args):
from crawl.crawler import Crawler
config.crawler = Crawler(args.email, args.password, Crawler.load_cookie())
return config.crawler
def update_fetch_info(uid):
from models import database, FetchedUser, User, Status, Gossip, Album, Photo, Blog
with database:
user = User.get_or_none(User.uid == uid)
if not user:
raise KeyError("no such user")
fetched_info = model_to_dict(user)
fetched_info.update(
status=Status.select().where(Status.uid == uid).count(),
gossip=Gossip.select().where(Gossip.uid == uid).count(),
album=Album.select().where(Album.uid == uid).count(),
photo=Photo.select().where(Photo.uid == uid).count(),
blog=Blog.select().where(Blog.uid == uid).count(),
)
FetchedUser.insert(**fetched_info).on_conflict('replace').execute()
print('update fetched info {fetched_info}'.format(fetched_info=fetched_info))
return True
def fetch_status(uid):
print('prepare to fetch status')
from crawl import status as crawl_status
status_count = crawl_status.get_status(uid)
print('fetched {status_count} status'.format(status_count=status_count))
def fetch_gossip(uid):
print('prepare to fetch gossip')
from crawl import gossip as crawl_gossip
gossip_count = crawl_gossip.get_gossip(uid)
print('fetched {gossip_count} gossips'.format(gossip_count=gossip_count))
def fetch_album(uid):
print('prepare to fetch albums')
from crawl import album as crawl_album
album_count = crawl_album.get_albums(uid)
print('fetched {album_count} albums'.format(album_count=album_count))
def fetch_blog(uid):
print('prepare to fetch blogs')
from crawl import blog as crawl_blog
blog_count = crawl_blog.get_blogs(uid)
print('fetched {blog_count} blogs'.format(blog_count=blog_count))
def fetch_user(uid, args):
fetched_flag = False
from crawl.utils import get_user
get_user(uid)
if args.fetch_status:
fetch_status(uid)
fetched_flag = True
if args.fetch_gossip:
fetch_gossip(uid)
fetched_flag = True
if args.fetch_album:
fetch_album(uid)
fetched_flag = True
if args.fetch_blog:
fetch_blog(uid)
fetched_flag = True
return fetched_flag
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="fetch renren data to backup")
parser.add_argument('email', help="your renren email for login")
parser.add_argument('password', help="your renren password for login")
parser.add_argument('-s', '--fetch-status', help="fetch status or not", action="store_true")
parser.add_argument('-g', '--fetch-gossip', help="fetch gossip or not", action="store_true")
parser.add_argument('-a', '--fetch-album', help="fetch album or not", action="store_true")
parser.add_argument('-b', '--fetch-blog', help="fetch blog or not", action="store_true")
parser.add_argument('-u', '--fetch-uid',
help="user to fetch, or the login user by default", type=int)
parser.add_argument('-r', '--refresh-count',
help="refresh fetched user count", action="store_true")
cmd_args = parser.parse_args()
prepare_db()
cralwer = prepare_crawler(cmd_args)
fetch_uid = cmd_args.fetch_uid if cmd_args.fetch_uid else cralwer.uid
fetched = fetch_user(fetch_uid, cmd_args)
if not fetched:
print('nothing need to fetch, just test login')
if fetched or cmd_args.refresh_count:
update_fetch_info(fetch_uid)