Skip to content

Commit

Permalink
add twitter crawler
Browse files Browse the repository at this point in the history
  • Loading branch information
YingtongDou committed Mar 8, 2022
1 parent ceaf0ad commit b3dfad2
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions utils/twitter_crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import tweepy
import json

# Twitter Developer API tokens
auth = tweepy.OAuthHandler('xxx', 'xxx')
auth.set_access_token('xxx', 'xxx')

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

m, n = 0, 0
for i, user in enumerate(id_mappings): # user id to twitter id mappings {user_id: twitter_account_id}
try:
# get recent 200 tweets of the user
statuses = api.user_timeline(user_id=user, count=200)
json_object = [json.dumps(s._json) + '\n' for s in statuses]
# write the recent 200 tweet objects into a json file
with open(str(user) + ".json", "w") as outfile:
outfile.writelines(json_object)
outfile.close()
except tweepy.TweepError as err: # handle deleted/suspended accounts
if str(err) == 'Not authorized.':
m+=1
print(f'Not authorized: {m}')
else:
n+=1
print(f'Page does not exist: {n}')
print(f'user number: {i}')

print(f'Not authorized: {m}, Page does not exist: {n}.')

0 comments on commit b3dfad2

Please sign in to comment.