Skip to content

Commit

Permalink
[YoutubeDL] Force redirect URL to unicode on python 2
Browse files Browse the repository at this point in the history
  • Loading branch information
dstftw committed Feb 29, 2020
1 parent e2f8bf5 commit fca6dba
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
4 changes: 3 additions & 1 deletion youtube_dl/YoutubeDL.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
YoutubeDLCookieJar,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
YoutubeDLRedirectHandler,
)
from .cache import Cache
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
Expand Down Expand Up @@ -2343,6 +2344,7 @@ def _setup_opener(self):
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
redirect_handler = YoutubeDLRedirectHandler()
data_handler = compat_urllib_request_DataHandler()

# When passing our own FileHandler instance, build_opener won't add the
Expand All @@ -2356,7 +2358,7 @@ def file_open(*args, **kwargs):
file_handler.file_open = file_open

opener = compat_urllib_request.build_opener(
proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play
Expand Down
9 changes: 9 additions & 0 deletions youtube_dl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2795,6 +2795,15 @@ def http_response(self, request, response):
https_response = http_response


class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
if sys.version_info[0] < 3:
def redirect_request(self, req, fp, code, msg, headers, newurl):
# On python 2 urlh.geturl() may sometimes return redirect URL
# as byte string instead of unicode. This workaround allows
# to force it always return unicode.
return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))


def extract_timezone(date_str):
m = re.search(
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
Expand Down

0 comments on commit fca6dba

Please sign in to comment.