Skip to content

Commit

Permalink
Add digg to woid
Browse files Browse the repository at this point in the history
  • Loading branch information
vitorfs committed Nov 17, 2015
1 parent 5e82ea3 commit adfb442
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 12 deletions.
1 change: 1 addition & 0 deletions scripts/top.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def main_loop():
crawlers.RedditCrawler().update_top_stories()
crawlers.HackerNewsCrawler().update_top_stories()
crawlers.MediumCrawler().update_top_stories()
crawlers.DiggCrawler().update_top_stories()

def lazy_loop():
crawlers.GithubCrawler().update_top_stories()
Expand Down
34 changes: 34 additions & 0 deletions woid/apps/services/crawlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,37 @@ def update_top_stories(self):

except Exception, e:
logging.error(e)


class DiggCrawler(object):
def __init__(self):
self.service = Service.objects.get(slug='digg')
self.client = wrappers.DiggClient()

def update_top_stories(self):
try:
popular_stories = self.client.get_top_stories()
today = timezone.now()

for story_data in popular_stories:
story, created = Story.objects.get_or_create(
service=self.service,
code=story_data['id'],
date=timezone.datetime(today.year, today.month, today.day, tzinfo=timezone.get_current_timezone())
)

score = story_data['score']
has_changes = (score != story.score)
if not story.status == Story.NEW and has_changes:
update = StoryUpdate(story=story)
update.score_changes = score - story.score
update.save()

story.title = story_data['title']
story.url = story_data['url']
story.score = score
story.status = Story.OK
story.save()

except Exception, e:
logging.error(e)
10 changes: 10 additions & 0 deletions woid/apps/services/fixtures/initial_data.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,15 @@
"url": "http://www.nytimes.com/most-popular-viewed",
"story_url": "http://nytimes.com"
}
},
{
"model": "services.Service",
"pk": 6,
"fields": {
"name": "Digg",
"slug": "digg",
"url": "http://www.digg.com",
"story_url": "http://digg.com"
}
}
]
44 changes: 32 additions & 12 deletions woid/apps/services/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
requests.packages.urllib3.disable_warnings()


class AbstractBaseClient(object):
def __init__(self):
self.headers = { 'user-agent': 'woid/1.0' }


class HackerNewsClient(object):
def __init__(self):
self.firebase_app = firebase.FirebaseApplication('https://hacker-news.firebaseio.com', None)
Expand All @@ -31,9 +36,7 @@ def get_max_item(self):
return result


class RedditClient(object):
def __init__(self):
self.headers = { 'user-agent': 'woid/1.0' }
class RedditClient(AbstractBaseClient):

def get_front_page_stories(self):
r = None
Expand All @@ -56,9 +59,7 @@ def get_front_page_stories(self):
return stories


class GithubClient(object):
def __init__(self):
self.headers = { 'user-agent': 'woid/1.0' }
class GithubClient(AbstractBaseClient):

def get_today_trending_repositories(self):
r = requests.get('https://github.com/trending', headers=self.headers)
Expand Down Expand Up @@ -92,9 +93,7 @@ def get_today_trending_repositories(self):
return data


class MediumClient(object):
def __init__(self):
self.headers = { 'user-agent': 'woid/1.0' }
class MediumClient(AbstractBaseClient):

def get_top_stories(self):
r = requests.get('https://medium.com/top-stories?format=json', headers=self.headers)
Expand All @@ -103,9 +102,7 @@ def get_top_stories(self):
return json_data['payload']['value']['posts']


class NyTimesClient(object):
def __init__(self):
self.headers = { 'user-agent': 'woid/1.0' }
class NyTimesClient(AbstractBaseClient):

def get_most_popular_stories(self):
data = dict()
Expand All @@ -126,3 +123,26 @@ def get_most_popular_stories(self):
data['mostshared'] = json_data['results']

return data

class DiggClient(AbstractBaseClient):

def get_top_stories(self):
r = requests.get('http://digg.com/', headers=self.headers)
html = r.text
soup = BeautifulSoup(html, 'html.parser')
diggs = soup(attrs={ 'class': 'digg-story' })

data = list()
for digg in diggs:
story_data = dict()
title = digg.find(attrs={ 'class': 'entry-title' })
story_data['title'] = title.text.strip()
try:
story_data['score'] = int(re.sub(r'\D', '', digg['data-digg-score']))
except:
story_data['score'] = 0
story_data['id'] = digg['data-content-id']
story_data['url'] = digg['data-contenturl']
data.append(story_data)

return data
1 change: 1 addition & 0 deletions woid/templates/includes/menu.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
<li><a href="{% url 'services:index' 'github' %}"{% if request.path|startswith:'/github/' %} class="active"{% endif %}>github</a></li>
<li><a href="{% url 'services:index' 'medium' %}"{% if request.path|startswith:'/medium/' %} class="active"{% endif %}>medium</a></li>
<li><a href="{% url 'services:index' 'nytimes' %}"{% if request.path|startswith:'/nytimes/' %} class="active"{% endif %}>nytimes</a></li>
<li><a href="{% url 'services:index' 'digg' %}"{% if request.path|startswith:'/digg/' %} class="active"{% endif %}>digg</a></li>
</ul>
11 changes: 11 additions & 0 deletions woid/templates/services/includes/digg_story.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<li>
<a href="{{ story.url }}" target="_blank">{{ story.title }}</a>
{% if append_service %}
<small><strong>{{ story.service.name }}</strong></small>
{% endif %}
<p>
{{ story.score }} digg score
&bullet;
{{ story.date|date:'M d, Y'|lower }}
</p>
</li>

0 comments on commit adfb442

Please sign in to comment.