Skip to content

Commit

Permalink
Added slides, fixed code, cooked dinner, swept the floor...
Browse files Browse the repository at this point in the history
  • Loading branch information
Max committed Nov 9, 2011
1 parent 2dd5fcc commit 4544141
Show file tree
Hide file tree
Showing 18 changed files with 176 additions and 2 deletions.
Binary file modified webcast1/Twitter_webcast.key
Binary file not shown.
Binary file added webcast1/Twitter_webcast.ppt
Binary file not shown.
42 changes: 42 additions & 0 deletions webcast1/collect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import tweepy

# First, the basics

"""
Consumer key wADh1LqyQCR3OmEGqK3SDg
Consumer secret FzKWL6bMfL6oHvHwh9daANHuSScXua5K386513FbU6c
Request token URL https://api.twitter.com/oauth/request_token
Authorize URL https://api.twitter.com/oauth/authorize
Access token URL https://api.twitter.com/oauth/access_token
Access token 153439378-AuXJgQ8oHmnY0JSabav6kGNoVg5iOB7t9CF3B3cF
Access token secret LKm3AlD0fhCE4ofZXYZALxtsMNBaRqXmJWiTgUT1Jlo
"""

access_token='153439378-AuXJgQ8oHmnY0JSabav6kGNoVg5iOB7t9CF3B3cF'
access_token_secret='LKm3AlD0fhCE4ofZXYZALxtsMNBaRqXmJWiTgUT1Jlo'


def connect():
auth = tweepy.OAuthHandler("myAuthToken",access_token)
auth.set_access_token("myAccessToken", access_token_secret)
api = tweepy.API(auth)
if api and api.verity_credentials():
return api
else:
raise "Login failed."

query = '"someScreenName" OR "#sometag"' # a valid Twitter search query

def run_search(query = query):
q = {
'q': query,
'lang': 'en',
}

api = connect()
try:
for status in Cursor(api.search, **q).items():
process_tweet(status)
except TweepError:
traceback.print_exc()
raise
Binary file added webcast1/collect.pyc
Binary file not shown.
45 changes: 45 additions & 0 deletions webcast1/collect_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import tweepy

class MyStreamListener(tweepy.StreamListener):
def on_error(self, status_code):
print 'An error has occured! Status code %s.' % status_code
return True # keep stream alive

def on_timeout(self):
print 'Snoozing Zzzzzz'
time.sleep(10)
return True

def on_delete(self, status_id, user_id):
"""Called when a delete notice arrives for a status"""
#print "Delete notice for %s. %s" % (status_id, user_id)
return

def on_limit(self, track):
"""Called when a limitation notice arrvies"""
print "!!! Limitation notice received: %s" % str(track)
return

def on_status(self, status):
process_tweet(status)
return True # or False if you want the stream to disconnect


def start_stream(username, password, listener, follow=(), track=():
'''
follow: list of users to follow
track: list of keywords to track
'''
print 'Connecting as %s/%s' % (username, password)
stream = tweepy.Stream(username, password, listener, timeout=60)
if follow or track:
print "Starting filter on %s/%s" % (','.join(follow), ','.join(track))
stream.filter(follow=follow, track=track, async=True)
else:
print "Starting sample"
stream.sample(async=True)

# Process a sample stream:

listener = MyStreamListener()
start_stream("myusername","mypassword",listener)
35 changes: 35 additions & 0 deletions webcast1/commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

>>> len(retweets)
>>> net.draw(retweets)
>>> undir_retweets=retweets.to_undirected()
>>> comps=net.connected_component_subgraphs(undir_retweets)
>>> len(comps)
>>> len(comps[0])
>>> net.draw(comps[0])

degrees=net.degree(comps[0])

degrees=sorted_degree(comps[0])
degrees[:10]

plot.hist(net.degree(comps[0]).values(),50)

core=trim_degrees(comps[0])

len(core)
2836

len(hashtag_net)
1753

net.draw(hashtag_net)

core=net.connected_component_subgraphs(hashtag_net)[0]
net.draw(core)

core.remove_node('earthquake')
core2=trim_edges(hashtag_net, weight=2)
net.draw(core2)

core3=trim_edges(hashtag_net, weight=10)
net.draw(core3)
Binary file added webcast1/core_comp0.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added webcast1/degree_hist.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added webcast1/hashtag_core1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added webcast1/hashtag_core2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added webcast1/hashtag_core3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added webcast1/hashtag_core4_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added webcast1/hashtag_core4_2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added webcast1/hashtag_hairball.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added webcast1/rt_comp0.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added webcast1/rt_comp1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added webcast1/rt_hairball.pdf
Binary file not shown.
56 changes: 54 additions & 2 deletions webcast1/webcast.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,65 @@
import json
import heatmap
import networkx as net
import matplotlib.pyplot as plot

file="data.json"
def trim_degrees(g, degree=1):
"""
Trim the graph by removing nodes with degree less then value of the degree parameter
Returns a copy of the graph, so it's non-destructive.
"""
g2=g.copy()
d=net.degree(g2)
for n in g2.nodes():
if d[n]<=degree: g2.remove_node(n)
return g2

def sorted_degree(g):
d=net.degree(g)
ds = sorted(d.iteritems(), key=lambda (k,v): (-v,k))
return ds

def add_or_inc_edge(g,f,t):
"""
Adds an edge to the graph IF the edge does not exist already.
If it does exist, increment the edge weight.
Used for quick-and-dirty calculation of projected graphs from 2-mode networks.
"""
if g.has_edge(f,t):
g[f][t]['weight']+=1
else:
g.add_edge(f,t,weight=1)

file="data.json"
i = open(file,'rb')

points =[]
retweets=net.DiGraph()
hashtag_net=net.Graph()

for tweet in i:
js=json.loads(tweet)

### process tweet to extract information
try:
author=js['user']['screen_name']
entities=js['entities']
mentions=entities['user_mentions']
hashtags=entities['hashtags']

for rt in mentions:
alter=rt['screen_name']
retweets.add_edge(author,alter)

tags=[str.lower(tag['text']) for tag in hashtags]
for t1 in tags:
for t2 in tags:
if t1 is not t2:
add_or_inc_edge(hashtag_net,t1,t2)
except KeyError:
print ':-('
continue


place=''
try:
place=js['coordinates']
Expand Down Expand Up @@ -40,3 +91,4 @@
hm.heatmap(points, "hm.png", dotsize=10)
hm.saveKML("geo.kml")

points =[]

0 comments on commit 4544141

Please sign in to comment.