Skip to content

Commit

Permalink
[generic] Add support for RSS feeds (Fixes #667)
Browse files Browse the repository at this point in the history
  • Loading branch information
phihag committed Feb 20, 2014
1 parent 280bc5d commit 4fc946b
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
9 changes: 9 additions & 0 deletions test/test_playlists.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,5 +250,14 @@ def test_GoogleSearch(self):
self.assertEqual(result['title'], 'python language')
self.assertTrue(len(result['entries']) == 15)

def test_generic_rss_feed(self):
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
self.assertEqual(result['title'], 'Zero Punctuation')
self.assertTrue(len(result['entries']) > 10)

if __name__ == '__main__':
unittest.main()
28 changes: 28 additions & 0 deletions youtube_dl/extractor/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import os
import re
import xml.etree.ElementTree

from .common import InfoExtractor
from .youtube import YoutubeIE
Expand Down Expand Up @@ -159,6 +160,25 @@ def http_error_405(self, req, fp, code, msg, headers):
raise ExtractorError('Invalid URL protocol')
return response

def _extract_rss(self, url, video_id, doc):
playlist_title = doc.find('./channel/title').text
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text

entries = [{
'_type': 'url',
'url': e.find('link').text,
'title': e.find('title').text,
} for e in doc.findall('./channel/item')]

return {
'_type': 'playlist',
'id': url,
'title': playlist_title,
'description': playlist_desc,
'entries': entries,
}

def _real_extract(self, url):
parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme:
Expand Down Expand Up @@ -219,6 +239,14 @@ def _real_extract(self, url):

self.report_extraction(video_id)

# Is it an RSS feed?
try:
doc = xml.etree.ElementTree.fromstring(webpage)
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
except xml.etree.ElementTree.ParseError:
pass

# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name
Expand Down

0 comments on commit 4fc946b

Please sign in to comment.