Skip to content

Commit

Permalink
Revert "New downloader using stdlib only"
Browse files Browse the repository at this point in the history
This reverts commit 7204633.
  • Loading branch information
Leandro Facchinetti committed Jul 1, 2013
1 parent f01a73f commit 321a58c
Showing 1 changed file with 113 additions and 103 deletions.
216 changes: 113 additions & 103 deletions ruby_tapas_downloader.rb
Original file line number Diff line number Diff line change
@@ -1,134 +1,144 @@
require 'mechanize'
require 'active_support/core_ext/string/inflections'
require 'logger'
require 'open-uri'
require 'rss'
require 'rexml/document'
require 'yaml'

class RubyTapasDownloader
FEED_URL = 'https://rubytapas.dpdcart.com/feed'

class Options
attr_reader :username
attr_reader :password
attr_reader :episodes_path

def initialize args
self.username, self.password, self.episodes_path = args
if [username, password].any?(&:nil?)
warn 'Usage: ruby ruby_tapas_downloader.rb <username> <password> ' \
'[episodes_path]'
exit 1
end
self.episodes_path ||= 'episodes'
end

protected
MAIN_URL = 'https://rubytapas.dpdcart.com/subscriber/content'
EPISODE_URL = 'https://rubytapas.dpdcart.com/subscriber/post?id='
FILE_URL = 'https://rubytapas.dpdcart.com/subscriber/download?file_id='

def initialize(episodes_path = 'episodes')
@episodes_path = episodes_path
@agent = Mechanize.new
@agent.log = self.class.logger
@index_filename = File.join(@episodes_path, 'index.yml')
@pages = {}
end

attr_writer :username
attr_writer :password
attr_writer :episodes_path
def start
self.class.logger.info("Starting download")
retrieve_env_vars!
restore_episodes!
login_subscriber
extract_episodes
extract_files
download_files
self.class.logger.info("Finished download")
end

class Episode
class File
attr_reader :title
attr_reader :url
class << self
attr_writer :logger
def logger
@logger ||= Logger.new(STDOUT).tap { |logger|
logger.level = ENV['VERBOSE'] == 'true' ? Logger::DEBUG : Logger::INFO
}
end
end

def initialize title, url
self.title = title
self.url = url
private
def retrieve_env_vars!
@username = ENV['USERNAME']
@password = ENV['PASSWORD']
if @username.nil? || @password.nil?
self.class.logger.fatal("Set `USERNAME' and `PASSWORD' environment variables.")
exit 1
end

protected

attr_writer :title
attr_writer :url
end

attr_reader :title
attr_reader :files

def initialize title, files
self.title = title
self.files = files
def restore_episodes!
self.class.logger.info("Restoring episode index from `#@index_filename'")
@episodes ||= if File.exists? @index_filename
YAML.load(File.read(@index_filename))
else
{}
end
end

def canonical_title
title.downcase.gsub(/\s+/, '-')
def dump_episodes
self.class.logger.info("Dumping episode index in `#@index_filename'")
FileUtils.mkdir_p File.dirname(@index_filename)
YAML.dump(@episodes, File.open(@index_filename, 'w')).close
end

protected

attr_writer :title
attr_writer :files
end

attr_reader :options
def login_subscriber
self.class.logger.info("Logging in subscriber `#@username'")
@pages[:login] = login_page = @agent.get(MAIN_URL)
login_form = login_page.form_with(action: %r{\A/subscriber/login})
login_form.username = @username
login_form.password = @password
@pages[:episodes_index] = login_form.submit
end

def initialize args
self.options = Options.new args
end
def extract_episodes
self.class.logger.info("Extracting episodes information")
episodes_elements = @pages[:episodes_index].search('.blog-entry')
episodes_elements.each { |episode_element|
title = episode_element.search('h3').text
id = episode_element.search('a')
.last
.attribute('href')
.value
.match(/id=(\d+)/)[1]
@episodes[id] ||= { title: title }
}
end

def start
download episodes
end
def extract_files
@episodes.each { |id, episode|
if @episodes[id][:files].nil?
self.class.logger.info("Extracting files information for episode `#{ episode[:title] }'")
@episodes[id][:files] = extract_episode_files(id)
dump_episodes
else
self.class.logger.debug("Skipping extraction of files information for episode `#{ episode[:title] }'")
end
}
end

def episodes
if @episodes.nil?
self.class.logger.info 'Starting retrieval of episodes using feed ' \
"from `#{ FEED_URL }'"
rss = open(FEED_URL,
http_basic_authentication: [options.username,
options.password]).read
feed = RSS::Parser.parse rss
@episodes = feed.items.map { |item|
description = REXML::Document.new item.description
files = description.elements.to_a('//li//a').map { |link|
Episode::File.new link.text, link.attribute('href').value
def extract_episode_files id
@pages[:episodes] ||= {}
@pages[:episodes][id] =
episode_page = @agent.get(episode_url(id))
files_link = episode_page.links_with href: %r{\A/subscriber/download}
files_link.map { |file_link|
{
id: file_link.href.match(/file_id=(\d+)/)[1],
filename: file_link.text
}
Episode.new item.title, files
}
end
@episodes
end

def download episodes
Array(episodes).each do |episode|
self.class.logger.info "Starting download of episode " \
"`#{ episode.title }'"
episode_path = File.join options.episodes_path, episode.canonical_title
FileUtils.mkdir_p episode_path
episode.files.each do |episode_file|
file_path = File.join(episode_path, episode_file.title)
if File.exists? file_path
self.class.logger.debug "Skipping download of already existing " \
"file `#{ file_path }'"
else
self.class.logger.info "Starting download of file `#{ file_path }'"
open(episode_file.url, 'rb') do |content|
File.open(file_path, 'wb') do |file|
file.write content
end
def download_files
@episodes.each_value do |episode|
self.class.logger.info("Downloading files for episode `#{ episode[:title] }'")
episode[:files].each do |file|
episode_path = episode_path episode
FileUtils.mkdir_p(episode_path)
filename = File.join episode_path, file[:filename]
if File.exists? filename
self.class.logger.debug("Skipping already existing file `#{ filename }'")
else
self.class.logger.info("Start downloading file `#{ filename }'")
@agent.download file_url(file[:id]), filename
self.class.logger.info("Finish downloading file `#{ filename }'")
end
end
self.class.logger.info("Finish downloading files for episode `#{ episode[:title] }'")
end
end
end

class << self
attr_writer :logger

def logger
@logger ||= Logger.new(STDOUT).tap do |logger|
unless %w(1 true yes).include? ENV['VERBOSE']
logger.level = Logger::INFO
end
end
def episode_url id
"#{ EPISODE_URL }#{ id }"
end
end

protected
def file_url id
"#{ FILE_URL }#{ id }"
end

attr_writer :options
def episode_path episode
File.join @episodes_path, episode[:title].parameterize
end
end

RubyTapasDownloader.new(ARGV).start
RubyTapasDownloader.new.start

0 comments on commit 321a58c

Please sign in to comment.