forked from stupied4ever/ruby-tapas-downloader
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Revert "New downloader using stdlib only"
This reverts commit 7204633.
- Loading branch information
Leandro Facchinetti
committed
Jul 1, 2013
1 parent
f01a73f
commit 321a58c
Showing
1 changed file
with
113 additions
and
103 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,134 +1,144 @@ | ||
require 'mechanize' | ||
require 'active_support/core_ext/string/inflections' | ||
require 'logger' | ||
require 'open-uri' | ||
require 'rss' | ||
require 'rexml/document' | ||
require 'yaml' | ||
|
||
class RubyTapasDownloader | ||
FEED_URL = 'https://rubytapas.dpdcart.com/feed' | ||
|
||
class Options | ||
attr_reader :username | ||
attr_reader :password | ||
attr_reader :episodes_path | ||
|
||
def initialize args | ||
self.username, self.password, self.episodes_path = args | ||
if [username, password].any?(&:nil?) | ||
warn 'Usage: ruby ruby_tapas_downloader.rb <username> <password> ' \ | ||
'[episodes_path]' | ||
exit 1 | ||
end | ||
self.episodes_path ||= 'episodes' | ||
end | ||
|
||
protected | ||
MAIN_URL = 'https://rubytapas.dpdcart.com/subscriber/content' | ||
EPISODE_URL = 'https://rubytapas.dpdcart.com/subscriber/post?id=' | ||
FILE_URL = 'https://rubytapas.dpdcart.com/subscriber/download?file_id=' | ||
|
||
def initialize(episodes_path = 'episodes') | ||
@episodes_path = episodes_path | ||
@agent = Mechanize.new | ||
@agent.log = self.class.logger | ||
@index_filename = File.join(@episodes_path, 'index.yml') | ||
@pages = {} | ||
end | ||
|
||
attr_writer :username | ||
attr_writer :password | ||
attr_writer :episodes_path | ||
def start | ||
self.class.logger.info("Starting download") | ||
retrieve_env_vars! | ||
restore_episodes! | ||
login_subscriber | ||
extract_episodes | ||
extract_files | ||
download_files | ||
self.class.logger.info("Finished download") | ||
end | ||
|
||
class Episode | ||
class File | ||
attr_reader :title | ||
attr_reader :url | ||
class << self | ||
attr_writer :logger | ||
def logger | ||
@logger ||= Logger.new(STDOUT).tap { |logger| | ||
logger.level = ENV['VERBOSE'] == 'true' ? Logger::DEBUG : Logger::INFO | ||
} | ||
end | ||
end | ||
|
||
def initialize title, url | ||
self.title = title | ||
self.url = url | ||
private | ||
def retrieve_env_vars! | ||
@username = ENV['USERNAME'] | ||
@password = ENV['PASSWORD'] | ||
if @username.nil? || @password.nil? | ||
self.class.logger.fatal("Set `USERNAME' and `PASSWORD' environment variables.") | ||
exit 1 | ||
end | ||
|
||
protected | ||
|
||
attr_writer :title | ||
attr_writer :url | ||
end | ||
|
||
attr_reader :title | ||
attr_reader :files | ||
|
||
def initialize title, files | ||
self.title = title | ||
self.files = files | ||
def restore_episodes! | ||
self.class.logger.info("Restoring episode index from `#@index_filename'") | ||
@episodes ||= if File.exists? @index_filename | ||
YAML.load(File.read(@index_filename)) | ||
else | ||
{} | ||
end | ||
end | ||
|
||
def canonical_title | ||
title.downcase.gsub(/\s+/, '-') | ||
def dump_episodes | ||
self.class.logger.info("Dumping episode index in `#@index_filename'") | ||
FileUtils.mkdir_p File.dirname(@index_filename) | ||
YAML.dump(@episodes, File.open(@index_filename, 'w')).close | ||
end | ||
|
||
protected | ||
|
||
attr_writer :title | ||
attr_writer :files | ||
end | ||
|
||
attr_reader :options | ||
def login_subscriber | ||
self.class.logger.info("Logging in subscriber `#@username'") | ||
@pages[:login] = login_page = @agent.get(MAIN_URL) | ||
login_form = login_page.form_with(action: %r{\A/subscriber/login}) | ||
login_form.username = @username | ||
login_form.password = @password | ||
@pages[:episodes_index] = login_form.submit | ||
end | ||
|
||
def initialize args | ||
self.options = Options.new args | ||
end | ||
def extract_episodes | ||
self.class.logger.info("Extracting episodes information") | ||
episodes_elements = @pages[:episodes_index].search('.blog-entry') | ||
episodes_elements.each { |episode_element| | ||
title = episode_element.search('h3').text | ||
id = episode_element.search('a') | ||
.last | ||
.attribute('href') | ||
.value | ||
.match(/id=(\d+)/)[1] | ||
@episodes[id] ||= { title: title } | ||
} | ||
end | ||
|
||
def start | ||
download episodes | ||
end | ||
def extract_files | ||
@episodes.each { |id, episode| | ||
if @episodes[id][:files].nil? | ||
self.class.logger.info("Extracting files information for episode `#{ episode[:title] }'") | ||
@episodes[id][:files] = extract_episode_files(id) | ||
dump_episodes | ||
else | ||
self.class.logger.debug("Skipping extraction of files information for episode `#{ episode[:title] }'") | ||
end | ||
} | ||
end | ||
|
||
def episodes | ||
if @episodes.nil? | ||
self.class.logger.info 'Starting retrieval of episodes using feed ' \ | ||
"from `#{ FEED_URL }'" | ||
rss = open(FEED_URL, | ||
http_basic_authentication: [options.username, | ||
options.password]).read | ||
feed = RSS::Parser.parse rss | ||
@episodes = feed.items.map { |item| | ||
description = REXML::Document.new item.description | ||
files = description.elements.to_a('//li//a').map { |link| | ||
Episode::File.new link.text, link.attribute('href').value | ||
def extract_episode_files id | ||
@pages[:episodes] ||= {} | ||
@pages[:episodes][id] = | ||
episode_page = @agent.get(episode_url(id)) | ||
files_link = episode_page.links_with href: %r{\A/subscriber/download} | ||
files_link.map { |file_link| | ||
{ | ||
id: file_link.href.match(/file_id=(\d+)/)[1], | ||
filename: file_link.text | ||
} | ||
Episode.new item.title, files | ||
} | ||
end | ||
@episodes | ||
end | ||
|
||
def download episodes | ||
Array(episodes).each do |episode| | ||
self.class.logger.info "Starting download of episode " \ | ||
"`#{ episode.title }'" | ||
episode_path = File.join options.episodes_path, episode.canonical_title | ||
FileUtils.mkdir_p episode_path | ||
episode.files.each do |episode_file| | ||
file_path = File.join(episode_path, episode_file.title) | ||
if File.exists? file_path | ||
self.class.logger.debug "Skipping download of already existing " \ | ||
"file `#{ file_path }'" | ||
else | ||
self.class.logger.info "Starting download of file `#{ file_path }'" | ||
open(episode_file.url, 'rb') do |content| | ||
File.open(file_path, 'wb') do |file| | ||
file.write content | ||
end | ||
def download_files | ||
@episodes.each_value do |episode| | ||
self.class.logger.info("Downloading files for episode `#{ episode[:title] }'") | ||
episode[:files].each do |file| | ||
episode_path = episode_path episode | ||
FileUtils.mkdir_p(episode_path) | ||
filename = File.join episode_path, file[:filename] | ||
if File.exists? filename | ||
self.class.logger.debug("Skipping already existing file `#{ filename }'") | ||
else | ||
self.class.logger.info("Start downloading file `#{ filename }'") | ||
@agent.download file_url(file[:id]), filename | ||
self.class.logger.info("Finish downloading file `#{ filename }'") | ||
end | ||
end | ||
self.class.logger.info("Finish downloading files for episode `#{ episode[:title] }'") | ||
end | ||
end | ||
end | ||
|
||
class << self | ||
attr_writer :logger | ||
|
||
def logger | ||
@logger ||= Logger.new(STDOUT).tap do |logger| | ||
unless %w(1 true yes).include? ENV['VERBOSE'] | ||
logger.level = Logger::INFO | ||
end | ||
end | ||
def episode_url id | ||
"#{ EPISODE_URL }#{ id }" | ||
end | ||
end | ||
|
||
protected | ||
def file_url id | ||
"#{ FILE_URL }#{ id }" | ||
end | ||
|
||
attr_writer :options | ||
def episode_path episode | ||
File.join @episodes_path, episode[:title].parameterize | ||
end | ||
end | ||
|
||
RubyTapasDownloader.new(ARGV).start | ||
RubyTapasDownloader.new.start |