-
Notifications
You must be signed in to change notification settings - Fork 65
/
twitter_sentiment.py
132 lines (110 loc) · 4.76 KB
/
twitter_sentiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -*- coding: utf-8 -*-
"""twitter_sentiment.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1hpvsrDsdq2fHKNpQYvbOTJxVSvkKz5_V
Author: Utkarsh Singhal: https://www.linkedin.com/in/utkarsh-singhal-584770181
Article Link on TradeWithPython: https://tradewithpython.com/sentiment-analysis-for-stocks-from-twitter
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import re
from wordcloud import WordCloud, STOPWORDS
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import snscrape.modules.twitter as sntwitter
nltk.download('vader_lexicon')
#Get user input
query = input("Query: ")
#As long as the query is valid (not empty or equal to '#')...
if query != '':
noOfTweet = input("Enter the number of tweets you want to Analyze: ")
if noOfTweet != '' :
noOfDays = input("Enter the number of days you want to Scrape Twitter for: ")
if noOfDays != '':
#Creating list to append tweet data
tweets_list = []
now = dt.date.today()
now = now.strftime('%Y-%m-%d')
yesterday = dt.date.today() - dt.timedelta(days = int(noOfDays))
yesterday = yesterday.strftime('%Y-%m-%d')
for i,tweet in enumerate(sntwitter.TwitterSearchScraper(query + ' lang:en since:' + yesterday + ' until:' + now + ' -filter:links -filter:replies').get_items()):
if i > int(noOfTweet):
break
tweets_list.append([tweet.date, tweet.id, tweet.content, tweet.username])
#Creating a dataframe from the tweets list above
df = pd.DataFrame(tweets_list, columns=['Datetime', 'Tweet Id', 'Text', 'Username'])
print(df)
# Create a function to clean the tweets
def cleanTxt(text):
text = re.sub('@[A-Za-z0–9]+', '', text) #Removing @mentions
text = re.sub('#', '', text) # Removing '#' hash tag
text = re.sub('RT[\s]+', '', text) # Removing RT
text = re.sub('https?:\/\/\S+', '', text) # Removing hyperlink
return text
df["Text"] = df["Text"].apply(cleanTxt)
#Sentiment Analysis
def percentage(part,whole):
return 100 * float(part)/float(whole)
#Assigning Initial Values
positive = 0
negative = 0
neutral = 0
#Creating empty lists
tweet_list1 = []
neutral_list = []
negative_list = []
positive_list = []
#Iterating over the tweets in the dataframe
for tweet in df['Text']:
tweet_list1.append(tweet)
analyzer = SentimentIntensityAnalyzer().polarity_scores(tweet)
neg = analyzer['neg']
neu = analyzer['neu']
pos = analyzer['pos']
comp = analyzer['compound']
if neg > pos:
negative_list.append(tweet) #appending the tweet that satisfies this condition
negative += 1 #increasing the count by 1
elif pos > neg:
positive_list.append(tweet) #appending the tweet that satisfies this condition
positive += 1 #increasing the count by 1
elif pos == neg:
neutral_list.append(tweet) #appending the tweet that satisfies this condition
neutral += 1 #increasing the count by 1
positive = percentage(positive, len(df)) #percentage is the function defined above
negative = percentage(negative, len(df))
neutral = percentage(neutral, len(df))
#Converting lists to pandas dataframe
tweet_list1 = pd.DataFrame(tweet_list1)
neutral_list = pd.DataFrame(neutral_list)
negative_list = pd.DataFrame(negative_list)
positive_list = pd.DataFrame(positive_list)
#using len(length) function for counting
print("Since " + noOfDays + " days, there have been", len(tweet_list1) , "tweets on " + query, end='\n*')
print("Positive Sentiment:", '%.2f' % len(positive_list), end='\n*')
print("Neutral Sentiment:", '%.2f' % len(neutral_list), end='\n*')
print("Negative Sentiment:", '%.2f' % len(negative_list), end='\n*')
#Creating PieCart
labels = ['Positive ['+str(round(positive))+'%]' , 'Neutral ['+str(round(neutral))+'%]','Negative ['+str(round(negative))+'%]']
sizes = [positive, neutral, negative]
colors = ['yellowgreen', 'blue','red']
patches, texts = plt.pie(sizes,colors=colors, startangle=90)
plt.style.use('default')
plt.legend(labels)
plt.title("Sentiment Analysis Result for keyword= "+query+"" )
plt.axis('equal')
plt.show()
# word cloud visualization
def word_cloud(text):
stopwords = set(STOPWORDS)
allWords = ' '.join([twts for twts in text])
wordCloud = WordCloud(background_color='black',width = 1600, height = 800,stopwords = stopwords,min_font_size = 20,max_font_size=150,colormap='prism').generate(allWords)
fig, ax = plt.subplots(figsize=(20,10), facecolor='k')
plt.imshow(wordCloud)
ax.axis("off")
fig.tight_layout(pad=0)
plt.show()
print('Wordcloud for ' + query)
word_cloud(df['Text'].values)