Skip to content

Commit

Permalink
[pornhub] Bypass scrape detection (closes #5930)
Browse files Browse the repository at this point in the history
  • Loading branch information
dstftw committed Jan 22, 2019
1 parent 503b604 commit 278d061
Showing 1 changed file with 22 additions and 0 deletions.
22 changes: 22 additions & 0 deletions youtube_dl/extractor/pornhub.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
from ..compat import (
compat_HTTPError,
compat_str,
compat_urllib_request,
)
from .openload import PhantomJSwrapper
from ..utils import (
ExtractorError,
int_or_none,
Expand Down Expand Up @@ -126,6 +128,26 @@ class PornHubIE(InfoExtractor):
'only_matching': True,
}]

def _download_webpage_handle(self, *args, **kwargs):
def dl(*args, **kwargs):
return super(PornHubIE, self)._download_webpage_handle(*args, **kwargs)

webpage, urlh = dl(*args, **kwargs)

if any(re.search(p, webpage) for p in (
r'<body\b[^>]+\bonload=["\']go\(\)',
r'document\.cookie\s*=\s*["\']RNKEY=',
r'document\.location\.reload\(true\)')):
url_or_request = args[0]
url = (url_or_request.get_full_url()
if isinstance(url_or_request, compat_urllib_request.Request)
else url_or_request)
phantom = PhantomJSwrapper(self, required_version='2.0')
phantom.get(url, html=webpage)
webpage, urlh = dl(*args, **kwargs)

return webpage, urlh

@staticmethod
def _extract_urls(webpage):
return re.findall(
Expand Down

0 comments on commit 278d061

Please sign in to comment.