076

nusibrains · Jun 11, 2017 · d32627e · d32627e
1 parent c87ef8b
commit d32627e
Show file tree

Hide file tree

Showing 4 changed files with 110 additions and 1 deletion.
diff --git a/076/.gitignore b/076/.gitignore
@@ -0,0 +1 @@
+free-learning
diff --git a/076/packt_notification.py b/076/packt_notification.py
@@ -0,0 +1,101 @@
+from collections import namedtuple
+from datetime import datetime
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+import os
+import smtplib
+import sys
+
+from bs4 import BeautifulSoup as Soup
+import requests
+
+FROM_MAIL = os.environ.get('FROM_MAIL')
+TO_MAIL = os.environ.get('TO_PACKT_EMAILS').split()
+
+BASE_URL = 'https://www.packtpub.com'
+FREE_LEARNING_PAGE = 'free-learning'
+PACKT_FREE_LEARNING_LINK = BASE_URL + '/packt/offers/' + FREE_LEARNING_PAGE
+TIME_LEFT = '{} hours and {} minutes'
+SUBJECT = 'Free Packt ebook of the day: {} (time left: {})'
+
+Book = namedtuple('Book', 'title description summary image link timeleft')
+
+
+def retrieve_page_html():
+    if os.path.isfile(FREE_LEARNING_PAGE):
+        with open(FREE_LEARNING_PAGE) as f:
+            return f.read()
+    else:
+        return requests.get(PACKT_FREE_LEARNING_LINK).text
+
+
+def _create_time_left_string(countdown_unix_tstamp):
+    expires = datetime.fromtimestamp(int(countdown_unix_tstamp))
+    now = datetime.now()
+    left = str(expires - now)
+    hh, mm, _ = left.split(':')
+    return TIME_LEFT.format(hh, mm)
+
+
+def extract_book_data_page(content):
+    soup = Soup(content, 'html.parser')
+    book_image = soup.find('div', {'class': 'dotd-main-book-image'})
+    link = BASE_URL + book_image.find('a').get('href')
+    image = 'https:' + book_image.find('img').get('src')
+    book_main = soup.find('div', {'class': 'dotd-main-book-summary'})
+    title_div = book_main.find('div', {'class': 'dotd-title'})
+    title = title_div.find('h2').text.strip()
+    descr_div = title_div.find_next_sibling("div")
+    description = descr_div.text.strip()
+    summary_html = descr_div.find_next_sibling("div")
+    js_countdown = book_main.find('span', {'class': 'packt-js-countdown'})
+    countdown = js_countdown.get('data-countdown-to')
+    timeleft = _create_time_left_string(countdown)
+    return Book(title=title,
+                description=description,
+                summary=summary_html,
+                image=image,
+                link=link,
+                timeleft=timeleft)
+
+
+def generate_mail_msg(book):
+    return '''<h2><a href='{link}'>{title}</a></h2>
+        <div>{description}</div>
+        <img src='{image}' title='{title}'>
+        <hr>
+        {summary_html}
+        <h2><a href='{link}'>Download in {timeleft}</a></h2>'''.format(
+                link=book.link,
+                title=book.title,
+                description=book.description,
+                image=book.image,
+                summary_html=book.summary,
+                timeleft=book.timeleft)
+
+
+def mail_html(subject, content, recipients=TO_MAIL):
+    sender = FROM_MAIL
+    msg = MIMEMultipart('alternative')
+    msg['Subject'] = subject
+    msg['From'] = sender
+    msg['To'] = ", ".join(recipients)
+    part = MIMEText(content, 'html')
+    msg.attach(part)
+    s = smtplib.SMTP('localhost')
+    s.sendmail(sender, recipients, msg.as_string())
+    s.quit()
+
+
+if __name__ == '__main__':
+    if not FROM_MAIL or not TO_MAIL:
+        print('Please set FROM_MAIL and TO_PACKT_EMAILS env vars')
+        sys.exit(1)
+
+    content = retrieve_page_html()
+    book = extract_book_data_page(content)
+
+    subject = SUBJECT.format(book.title, book.timeleft)
+    msg_body = generate_mail_msg(book)
+
+    mail_html(subject, msg_body)
diff --git a/076/requirements.txt b/076/requirements.txt
@@ -0,0 +1,7 @@
+beautifulsoup4==4.6.0
+bs4==0.0.1
+certifi==2017.4.17
+chardet==3.0.4
+idna==2.5
+requests==2.17.3
+urllib3==1.21.1
diff --git a/LOG.md b/LOG.md
@@ -77,7 +77,7 @@
 | 073 | Jun 10, 2017 | [#Python script to download a file using #FTP](073) | A simple and quick script to download a file from an FTP server using ftplib. Would ideally be matched with a cron job. |
 | 074 | Jun 11, 2017 | [Using Pillow to add text and opacity to an image = your own cards](074) | Played with the Pillow module. Script to let user enter an image path (or url) and text to put on the image. Pillow does the rest. Could be a useful recipe to make your own Birthday cards :) |
 | 075 | Jun 12, 2017 | [TITLE](075) | LEARNING |
-| 076 | Jun 13, 2017 | [TITLE](076) | LEARNING |
+| 076 | Jun 13, 2017 | [Script to scrape Packt free ebook site and send html notification mail](076) | Using requests, BeautifulSoup, namedtuple, datetime. Cronjob script to send out a nice html email with image and metadata of the book, and how long till free offer expires. As it's [hard to automate this now with the new captcha](https://github.com/igbt6/Packt-Publishing-Free-Learning/issues/51) at least we get notified about each new title. |
 | 077 | Jun 14, 2017 | [TITLE](077) | LEARNING |
 | 078 | Jun 15, 2017 | [TITLE](078) | LEARNING |
 | 079 | Jun 16, 2017 | [TITLE](079) | LEARNING |