-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtweets_REST_extract.py
58 lines (43 loc) · 1.78 KB
/
tweets_REST_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
### Extracts One Time Batch Pull of Tweets and Queues to Kafka Messaging
import json, urllib3
from kafka import KafkaProducer
from pandas.io.json import json_normalize
from twitter import Twitter,OAuth, TwitterStream, TwitterHTTPError
tweet_num = 1000 # Number of tweets that need to be extracted * by 100
######## API URL UPDATE ############
data = json.load(urllib3.urlopen('http://127.0.0.1:6000/')) #Insert Topic after slash at the end of url
topicName = data['topicName'][0]
print(topicName)
topicName = ' OR '.join(topicName)
######### API CREDENTIALS #############
config = {}
exec('config.py',config)
######## TWITTER API OBJECT CREATION #########
twitter = Twitter(auth=OAuth(config["access_key"],config['access_secret'],config['consumer_key'],config['consumer_secret']) )
prod = KafkaProducer(serialize_vals=lamda v:json.dumps(v).encode('utf-8')) #Producer for writing json messages to kafka
file = open('C:\Users\techm\Projects\Twitter_Projects\Tweet_Extract','w')
i = 0
iterator = twitter.search.tweets(q=topicName, result_type='recent',lang='en',count = 100)
Nmin = float('-inf')
Nmax =float('+inf')
for i in range(tweet_num):
print('Completed Search {}' % (iterator['search_metadata']['completed_in']))
count = 0
for tweet in iterator['statuses']:
count +=1
jsontweet = json.loads(tweet)
Id_tweet = tweet['id']
print(tweet['id'])
if Id_tweet < Nmax:
Id_min = Id_tweet
Nmax = Id_tweet
if Id_tweet > Nmin:
Id_max = Id_tweet
Nmin = Id_tweet
producer.send('Twitter',tweet)
json.dump(tweet,file)
file.write('\n')
### Minimum ID ##
i +=1
print(count)
iterator = twitter.search.tweets(q = topicName,result_type='recent',lang='en',count=100,Id_max =Id_min)