forked from Jamie-Landeg-Jones/youtube-dl
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[dailymotion] Added support for subtitles + new InfoExtractor for
generic subtitle download. The idea is that all subtitle downloaders must descend from SubtitlesIE and implement only three basic methods to achieve the complete subtitle download functionality. This will allow to reduce the code in YoutubeIE once it is rewritten.
- Loading branch information
Showing
4 changed files
with
242 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#!/usr/bin/env python | ||
|
||
import sys | ||
import unittest | ||
import json | ||
import io | ||
import hashlib | ||
|
||
# Allow direct execution | ||
import os | ||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||
|
||
from youtube_dl.extractor import DailymotionIE | ||
from youtube_dl.utils import * | ||
from helper import FakeYDL | ||
|
||
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() | ||
TEST_URL = 'http://www.dailymotion.com/video/xczg00' | ||
|
||
class TestDailymotionSubtitles(unittest.TestCase): | ||
def setUp(self): | ||
DL = FakeYDL() | ||
DL.params['allsubtitles'] = False | ||
DL.params['writesubtitles'] = False | ||
DL.params['subtitlesformat'] = 'srt' | ||
DL.params['listsubtitles'] = False | ||
def test_no_subtitles(self): | ||
DL = FakeYDL() | ||
DL.params['writesubtitles'] = False | ||
IE = DailymotionIE(DL) | ||
info_dict = IE.extract(TEST_URL) | ||
subtitles = info_dict[0]['subtitles'] | ||
self.assertEqual(subtitles, None) | ||
def test_subtitles(self): | ||
DL = FakeYDL() | ||
DL.params['writesubtitles'] = True | ||
IE = DailymotionIE(DL) | ||
info_dict = IE.extract(TEST_URL) | ||
sub = info_dict[0]['subtitles']['en'] | ||
self.assertEqual(md5(sub), '976553874490cba125086bbfea3ff76f') | ||
def test_subtitles_fr(self): | ||
DL = FakeYDL() | ||
DL.params['writesubtitles'] = True | ||
DL.params['subtitleslang'] = 'fr' | ||
IE = DailymotionIE(DL) | ||
info_dict = IE.extract(TEST_URL) | ||
sub = info_dict[0]['subtitles']['fr'] | ||
self.assertEqual(md5(sub), '594564ec7d588942e384e920e5341792') | ||
def test_onlysubtitles(self): | ||
DL = FakeYDL() | ||
DL.params['writesubtitles'] = True | ||
DL.params['onlysubtitles'] = True | ||
IE = DailymotionIE(DL) | ||
info_dict = IE.extract(TEST_URL) | ||
sub = info_dict[0]['subtitles']['en'] | ||
self.assertEqual(md5(sub), '976553874490cba125086bbfea3ff76f') | ||
def test_allsubtitles(self): | ||
DL = FakeYDL() | ||
DL.params['allsubtitles'] = True | ||
IE = DailymotionIE(DL) | ||
info_dict = IE.extract(TEST_URL) | ||
subtitles = info_dict[0]['subtitles'] | ||
self.assertEqual(len(subtitles.keys()), 5) | ||
# def test_subtitles_sbv_format(self): | ||
# DL = FakeYDL() | ||
# DL.params['writesubtitles'] = True | ||
# DL.params['subtitlesformat'] = 'sbv' | ||
# IE = DailymotionIE(DL) | ||
# info_dict = IE.extract(TEST_URL) | ||
# sub = info_dict[0]['subtitles'][0] | ||
# self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b') | ||
# def test_subtitles_vtt_format(self): | ||
# DL = FakeYDL() | ||
# DL.params['writesubtitles'] = True | ||
# DL.params['subtitlesformat'] = 'vtt' | ||
# IE = DailymotionIE(DL) | ||
# info_dict = IE.extract(TEST_URL) | ||
# sub = info_dict[0]['subtitles'][0] | ||
# self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7') | ||
def test_list_subtitles(self): | ||
DL = FakeYDL() | ||
DL.params['listsubtitles'] = True | ||
IE = DailymotionIE(DL) | ||
info_dict = IE.extract(TEST_URL) | ||
self.assertEqual(info_dict, None) | ||
def test_automatic_captions(self): | ||
DL = FakeYDL() | ||
DL.params['writeautomaticsub'] = True | ||
DL.params['subtitleslang'] = 'en' | ||
IE = DailymotionIE(DL) | ||
info_dict = IE.extract(TEST_URL) | ||
sub = info_dict[0]['subtitles'] | ||
self.assertTrue(len(sub) == 0) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import socket | ||
|
||
from .common import InfoExtractor | ||
|
||
from ..utils import ( | ||
compat_http_client, | ||
compat_urllib_error, | ||
compat_urllib_request, | ||
compat_str, | ||
) | ||
|
||
|
||
class SubtitlesIE(InfoExtractor): | ||
|
||
def report_video_subtitles_available(self, video_id, sub_lang_list): | ||
"""Report available subtitles.""" | ||
sub_lang = ",".join(list(sub_lang_list.keys())) | ||
self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang)) | ||
|
||
def _list_available_subtitles(self, video_id): | ||
sub_lang_list = self._get_available_subtitles(video_id) | ||
self.report_video_subtitles_available(video_id, sub_lang_list) | ||
|
||
def _extract_subtitles(self, video_id): | ||
""" | ||
Return a dictionary: {language: subtitles} or {} if the subtitles | ||
couldn't be found | ||
""" | ||
sub_lang_list = self._get_available_subtitles(video_id) | ||
sub_format = self._downloader.params.get('subtitlesformat') | ||
if not sub_lang_list: #There was some error, it didn't get the available subtitles | ||
return {} | ||
if self._downloader.params.get('writesubtitles', False): | ||
if self._downloader.params.get('subtitleslang', False): | ||
sub_lang = self._downloader.params.get('subtitleslang') | ||
elif 'en' in sub_lang_list: | ||
sub_lang = 'en' | ||
else: | ||
sub_lang = list(sub_lang_list.keys())[0] | ||
if not sub_lang in sub_lang_list: | ||
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang) | ||
return {} | ||
sub_lang_list = {sub_lang: sub_lang_list[sub_lang]} | ||
subtitles = {} | ||
for sub_lang in sub_lang_list: | ||
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format) | ||
if subtitle: | ||
subtitles[sub_lang] = subtitle | ||
return subtitles | ||
|
||
def _request_subtitle(self, sub_lang, sub_name, video_id, format): | ||
""" Return the subtitle as a string or None if they are not found """ | ||
# return (u'Did not fetch video subtitles for %s' % sub_lang, None, None) | ||
self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format)) | ||
url = self._get_subtitle_url(sub_lang, sub_name, video_id, format) | ||
try: | ||
sub = compat_urllib_request.urlopen(url).read().decode('utf-8') | ||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) | ||
return | ||
if not sub: | ||
self._downloader.report_warning(u'Did not fetch video subtitles') | ||
return | ||
return sub | ||
|
||
def _get_available_subtitles(self, video_id): | ||
"""Get available subtitles. Redefine in subclasses.""" | ||
"""returns {(lang, url)} """ | ||
# return {} | ||
pass | ||
|
||
def _get_subtitle_url(self, sub_lang, sub_name, video_id, format): | ||
"""returns the url for the given subtitle. Redefine in subclasses.""" | ||
pass | ||
|
||
def _request_automatic_caption(self, video_id, webpage): | ||
"""Request automatic caption. Redefine in subclasses.""" | ||
"""returns a tuple of ... """ | ||
# return [(err_msg, None, None)] | ||
pass |