Skip to content

Commit

Permalink
[dailymotion] Added support for subtitles + new InfoExtractor for
Browse files Browse the repository at this point in the history
generic subtitle download.

The idea is that all subtitle downloaders must descend from SubtitlesIE
and implement only three basic methods to achieve the complete subtitle
download functionality. This will allow to reduce the code in YoutubeIE
once it is rewritten.
  • Loading branch information
iemejia committed Aug 7, 2013
1 parent 5898e28 commit 953e32b
Show file tree
Hide file tree
Showing 4 changed files with 242 additions and 11 deletions.
96 changes: 96 additions & 0 deletions test/test_dailymotion_subtitles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env python

import sys
import unittest
import json
import io
import hashlib

# Allow direct execution
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from youtube_dl.extractor import DailymotionIE
from youtube_dl.utils import *
from helper import FakeYDL

md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
TEST_URL = 'http://www.dailymotion.com/video/xczg00'

class TestDailymotionSubtitles(unittest.TestCase):
def setUp(self):
DL = FakeYDL()
DL.params['allsubtitles'] = False
DL.params['writesubtitles'] = False
DL.params['subtitlesformat'] = 'srt'
DL.params['listsubtitles'] = False
def test_no_subtitles(self):
DL = FakeYDL()
DL.params['writesubtitles'] = False
IE = DailymotionIE(DL)
info_dict = IE.extract(TEST_URL)
subtitles = info_dict[0]['subtitles']
self.assertEqual(subtitles, None)
def test_subtitles(self):
DL = FakeYDL()
DL.params['writesubtitles'] = True
IE = DailymotionIE(DL)
info_dict = IE.extract(TEST_URL)
sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub), '976553874490cba125086bbfea3ff76f')
def test_subtitles_fr(self):
DL = FakeYDL()
DL.params['writesubtitles'] = True
DL.params['subtitleslang'] = 'fr'
IE = DailymotionIE(DL)
info_dict = IE.extract(TEST_URL)
sub = info_dict[0]['subtitles']['fr']
self.assertEqual(md5(sub), '594564ec7d588942e384e920e5341792')
def test_onlysubtitles(self):
DL = FakeYDL()
DL.params['writesubtitles'] = True
DL.params['onlysubtitles'] = True
IE = DailymotionIE(DL)
info_dict = IE.extract(TEST_URL)
sub = info_dict[0]['subtitles']['en']
self.assertEqual(md5(sub), '976553874490cba125086bbfea3ff76f')
def test_allsubtitles(self):
DL = FakeYDL()
DL.params['allsubtitles'] = True
IE = DailymotionIE(DL)
info_dict = IE.extract(TEST_URL)
subtitles = info_dict[0]['subtitles']
self.assertEqual(len(subtitles.keys()), 5)
# def test_subtitles_sbv_format(self):
# DL = FakeYDL()
# DL.params['writesubtitles'] = True
# DL.params['subtitlesformat'] = 'sbv'
# IE = DailymotionIE(DL)
# info_dict = IE.extract(TEST_URL)
# sub = info_dict[0]['subtitles'][0]
# self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b')
# def test_subtitles_vtt_format(self):
# DL = FakeYDL()
# DL.params['writesubtitles'] = True
# DL.params['subtitlesformat'] = 'vtt'
# IE = DailymotionIE(DL)
# info_dict = IE.extract(TEST_URL)
# sub = info_dict[0]['subtitles'][0]
# self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7')
def test_list_subtitles(self):
DL = FakeYDL()
DL.params['listsubtitles'] = True
IE = DailymotionIE(DL)
info_dict = IE.extract(TEST_URL)
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
DL = FakeYDL()
DL.params['writeautomaticsub'] = True
DL.params['subtitleslang'] = 'en'
IE = DailymotionIE(DL)
info_dict = IE.extract(TEST_URL)
sub = info_dict[0]['subtitles']
self.assertTrue(len(sub) == 0)

if __name__ == '__main__':
unittest.main()
10 changes: 5 additions & 5 deletions youtube_dl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,22 +187,22 @@ def _find_term_columns():
action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
video_format.add_option('--write-sub', '--write-srt',
action='store_true', dest='writesubtitles',
help='write subtitle file (currently youtube only)', default=False)
help='write subtitle file', default=False)
video_format.add_option('--write-auto-sub', '--write-automatic-sub',
action='store_true', dest='writeautomaticsub',
help='write automatic subtitle file (currently youtube only)', default=False)
help='write automatic subtitle file (youtube only)', default=False)
video_format.add_option('--only-sub',
action='store_true', dest='skip_download',
help='[deprecated] alias of --skip-download', default=False)
video_format.add_option('--all-subs',
action='store_true', dest='allsubtitles',
help='downloads all the available subtitles of the video (currently youtube only)', default=False)
help='downloads all the available subtitles of the video', default=False)
video_format.add_option('--list-subs',
action='store_true', dest='listsubtitles',
help='lists all available subtitles for the video (currently youtube only)', default=False)
help='lists all available subtitles for the video', default=False)
video_format.add_option('--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT',
help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt')
help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
video_format.add_option('--sub-lang', '--srt-lang',
action='store', dest='subtitleslang', metavar='LANG',
help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
Expand Down
67 changes: 61 additions & 6 deletions youtube_dl/extractor/dailymotion.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,49 @@
import re
import json
import itertools
import socket

from .common import InfoExtractor
from .subtitles import SubtitlesIE

from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_request,
compat_str,
get_element_by_attribute,
get_element_by_id,

ExtractorError,
)

class DailymotionIE(InfoExtractor):

class DailyMotionSubtitlesIE(SubtitlesIE):

def _get_available_subtitles(self, video_id):
request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
try:
sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
return {}
info = json.loads(sub_list)
if (info['total'] > 0):
sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
return sub_lang_list
self._downloader.report_warning(u'video doesn\'t have subtitles')
return {}

def _get_subtitle_url(self, sub_lang, sub_name, video_id, format):
sub_lang_list = self._get_available_subtitles(video_id)
return sub_lang_list[sub_lang]

def _request_automatic_caption(self, video_id, webpage):
self._downloader.report_warning(u'Automatic Captions not supported by dailymotion')
return {}


class DailymotionIE(DailyMotionSubtitlesIE): #,InfoExtractor):
"""Information Extractor for Dailymotion"""

_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
Expand All @@ -18,7 +53,7 @@ class DailymotionIE(InfoExtractor):
u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': {
u"uploader": u"Alex and Van .",
u"uploader": u"Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
}
}
Expand Down Expand Up @@ -57,23 +92,43 @@ def _real_extract(self, url):

# TODO: support choosing qualities

for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
'stream_h264_hq_url','stream_h264_url',
for key in ['stream_h264_hd1080_url', 'stream_h264_hd_url',
'stream_h264_hq_url', 'stream_h264_url',
'stream_h264_ld_url']:
if info.get(key):#key in info and info[key]:
if info.get(key): # key in info and info[key]:
max_quality = key
self.to_screen(u'Using %s' % key)
self.to_screen(u'%s: Using %s' % (video_id, key))
break
else:
raise ExtractorError(u'Unable to extract video URL')
video_url = info[max_quality]

# subtitles
video_subtitles = None
video_webpage = None

if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
video_subtitles = self._extract_subtitles(video_id)
elif self._downloader.params.get('writeautomaticsub', False):
video_subtitles = self._request_automatic_caption(video_id, video_webpage)

if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id)
return

if 'length_seconds' not in info:
self._downloader.report_warning(u'unable to extract video duration')
video_duration = ''
else:
video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])

return [{
'id': video_id,
'url': video_url,
'uploader': video_uploader,
'upload_date': video_upload_date,
'title': self._og_search_title(webpage),
'ext': video_extension,
'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url']
}]
80 changes: 80 additions & 0 deletions youtube_dl/extractor/subtitles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import socket

from .common import InfoExtractor

from ..utils import (
compat_http_client,
compat_urllib_error,
compat_urllib_request,
compat_str,
)


class SubtitlesIE(InfoExtractor):

def report_video_subtitles_available(self, video_id, sub_lang_list):
"""Report available subtitles."""
sub_lang = ",".join(list(sub_lang_list.keys()))
self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))

def _list_available_subtitles(self, video_id):
sub_lang_list = self._get_available_subtitles(video_id)
self.report_video_subtitles_available(video_id, sub_lang_list)

def _extract_subtitles(self, video_id):
"""
Return a dictionary: {language: subtitles} or {} if the subtitles
couldn't be found
"""
sub_lang_list = self._get_available_subtitles(video_id)
sub_format = self._downloader.params.get('subtitlesformat')
if not sub_lang_list: #There was some error, it didn't get the available subtitles
return {}
if self._downloader.params.get('writesubtitles', False):
if self._downloader.params.get('subtitleslang', False):
sub_lang = self._downloader.params.get('subtitleslang')
elif 'en' in sub_lang_list:
sub_lang = 'en'
else:
sub_lang = list(sub_lang_list.keys())[0]
if not sub_lang in sub_lang_list:
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
return {}
sub_lang_list = {sub_lang: sub_lang_list[sub_lang]}
subtitles = {}
for sub_lang in sub_lang_list:
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
if subtitle:
subtitles[sub_lang] = subtitle
return subtitles

def _request_subtitle(self, sub_lang, sub_name, video_id, format):
""" Return the subtitle as a string or None if they are not found """
# return (u'Did not fetch video subtitles for %s' % sub_lang, None, None)
self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
url = self._get_subtitle_url(sub_lang, sub_name, video_id, format)
try:
sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
return
if not sub:
self._downloader.report_warning(u'Did not fetch video subtitles')
return
return sub

def _get_available_subtitles(self, video_id):
"""Get available subtitles. Redefine in subclasses."""
"""returns {(lang, url)} """
# return {}
pass

def _get_subtitle_url(self, sub_lang, sub_name, video_id, format):
"""returns the url for the given subtitle. Redefine in subclasses."""
pass

def _request_automatic_caption(self, video_id, webpage):
"""Request automatic caption. Redefine in subclasses."""
"""returns a tuple of ... """
# return [(err_msg, None, None)]
pass

0 comments on commit 953e32b

Please sign in to comment.