Add digg to woid

clock21am · Nov 17, 2015 · adfb442 · adfb442
1 parent 5e82ea3
commit adfb442
Show file tree

Hide file tree

Showing 6 changed files with 89 additions and 12 deletions.
diff --git a/scripts/top.py b/scripts/top.py
@@ -25,6 +25,7 @@ def main_loop():
     crawlers.RedditCrawler().update_top_stories()
     crawlers.HackerNewsCrawler().update_top_stories()
     crawlers.MediumCrawler().update_top_stories()
+    crawlers.DiggCrawler().update_top_stories()
 
 def lazy_loop():
     crawlers.GithubCrawler().update_top_stories()

diff --git a/woid/apps/services/crawlers.py b/woid/apps/services/crawlers.py
@@ -255,3 +255,37 @@ def update_top_stories(self):
 
         except Exception, e:
             logging.error(e)
+
+
+class DiggCrawler(object):
+    def __init__(self):
+        self.service = Service.objects.get(slug='digg')
+        self.client = wrappers.DiggClient()
+
+    def update_top_stories(self):
+        try:
+            popular_stories = self.client.get_top_stories()
+            today = timezone.now()
+
+            for story_data in popular_stories:
+                story, created = Story.objects.get_or_create(
+                        service=self.service,
+                        code=story_data['id'],
+                        date=timezone.datetime(today.year, today.month, today.day, tzinfo=timezone.get_current_timezone())
+                    )
+
+                score = story_data['score']
+                has_changes = (score != story.score)
+                if not story.status == Story.NEW and has_changes:
+                    update = StoryUpdate(story=story)
+                    update.score_changes = score - story.score
+                    update.save()
+
+                story.title = story_data['title']
+                story.url = story_data['url']
+                story.score = score
+                story.status = Story.OK
+                story.save()
+
+        except Exception, e:
+            logging.error(e)
diff --git a/woid/apps/services/fixtures/initial_data.json b/woid/apps/services/fixtures/initial_data.json
@@ -48,5 +48,15 @@
       "url": "http://www.nytimes.com/most-popular-viewed",
       "story_url": "http://nytimes.com"
     }
+  },
+  {
+    "model": "services.Service",
+    "pk": 6,
+    "fields": {
+      "name": "Digg",
+      "slug": "digg",
+      "url": "http://www.digg.com",
+      "story_url": "http://digg.com"
+    }
   }
 ]
diff --git a/woid/apps/services/wrappers.py b/woid/apps/services/wrappers.py
@@ -14,6 +14,11 @@
 requests.packages.urllib3.disable_warnings()
 
 
+class AbstractBaseClient(object):
+    def __init__(self):
+        self.headers = { 'user-agent': 'woid/1.0' }
+
+
 class HackerNewsClient(object):
     def __init__(self):
         self.firebase_app = firebase.FirebaseApplication('https://hacker-news.firebaseio.com', None)
@@ -31,9 +36,7 @@ def get_max_item(self):
         return result
 
 
-class RedditClient(object):
-    def __init__(self):
-        self.headers = { 'user-agent': 'woid/1.0' }
+class RedditClient(AbstractBaseClient):
 
     def get_front_page_stories(self):
         r = None
@@ -56,9 +59,7 @@ def get_front_page_stories(self):
         return stories
 
 
-class GithubClient(object):
-    def __init__(self):
-        self.headers = { 'user-agent': 'woid/1.0' }
+class GithubClient(AbstractBaseClient):
 
     def get_today_trending_repositories(self):
         r = requests.get('https://github.com/trending', headers=self.headers)
@@ -92,9 +93,7 @@ def get_today_trending_repositories(self):
         return data
 
 
-class MediumClient(object):
-    def __init__(self):
-        self.headers = { 'user-agent': 'woid/1.0' }
+class MediumClient(AbstractBaseClient):
 
     def get_top_stories(self):
         r = requests.get('https://medium.com/top-stories?format=json', headers=self.headers)
@@ -103,9 +102,7 @@ def get_top_stories(self):
         return json_data['payload']['value']['posts']
 
 
-class NyTimesClient(object):
-    def __init__(self):
-        self.headers = { 'user-agent': 'woid/1.0' }
+class NyTimesClient(AbstractBaseClient):
 
     def get_most_popular_stories(self):
         data = dict()
@@ -126,3 +123,26 @@ def get_most_popular_stories(self):
         data['mostshared'] = json_data['results']
 
         return data
+
+class DiggClient(AbstractBaseClient):
+
+    def get_top_stories(self):
+        r = requests.get('http://digg.com/', headers=self.headers)
+        html = r.text
+        soup = BeautifulSoup(html, 'html.parser')
+        diggs = soup(attrs={ 'class': 'digg-story' })
+
+        data = list()
+        for digg in diggs:
+            story_data = dict()
+            title = digg.find(attrs={ 'class': 'entry-title' })
+            story_data['title'] = title.text.strip()
+            try:
+                story_data['score'] = int(re.sub(r'\D', '', digg['data-digg-score']))
+            except:
+                story_data['score'] = 0
+            story_data['id'] = digg['data-content-id']
+            story_data['url'] = digg['data-contenturl']
+            data.append(story_data)
+
+        return data
diff --git a/woid/templates/includes/menu.html b/woid/templates/includes/menu.html
@@ -7,4 +7,5 @@
   <li><a href="{% url 'services:index' 'github' %}"{% if request.path|startswith:'/github/' %} class="active"{% endif %}>github</a></li>
   <li><a href="{% url 'services:index' 'medium' %}"{% if request.path|startswith:'/medium/' %} class="active"{% endif %}>medium</a></li>
   <li><a href="{% url 'services:index' 'nytimes' %}"{% if request.path|startswith:'/nytimes/' %} class="active"{% endif %}>nytimes</a></li>
+  <li><a href="{% url 'services:index' 'digg' %}"{% if request.path|startswith:'/digg/' %} class="active"{% endif %}>digg</a></li>
 </ul>
diff --git a/woid/templates/services/includes/digg_story.html b/woid/templates/services/includes/digg_story.html
@@ -0,0 +1,11 @@
+<li>
+  <a href="{{ story.url }}" target="_blank">{{ story.title }}</a>
+  {% if append_service %}
+    <small><strong>{{ story.service.name }}</strong></small>
+  {% endif %}
+  <p>
+    {{ story.score }} digg score
+    &bullet;
+    {{ story.date|date:'M d, Y'|lower }}
+  </p>
+</li>