Skip to content

Commit

Permalink
InvalidLinkBear: Allow variable timeouts
Browse files Browse the repository at this point in the history
Allow variable timeouts for servers that are slow
in responding, and hence increase the accuracy of
the number of invalid links reported by coala.

Closes coala#686
  • Loading branch information
meetmangukiya committed Dec 22, 2016
1 parent 47f61cc commit f72c246
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 5 deletions.
22 changes: 17 additions & 5 deletions bears/general/InvalidLinkBear.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import requests
from urllib.parse import urlparse

from difflib import SequenceMatcher

Expand All @@ -11,6 +12,7 @@
from coalib.bearlib import deprecate_settings
from coalib.settings.Setting import typed_list
from coalib.parsing.Globbing import fnmatch
from coalib.settings.Setting import typed_dict


class InvalidLinkBear(LocalBear):
Expand Down Expand Up @@ -48,7 +50,8 @@ def parse_pip_vcs_url(link):
return splitted_schema

@staticmethod
def find_links_in_file(file, timeout, link_ignore_regex, link_ignore_list):
def find_links_in_file(file, network_timeout, link_ignore_regex,
link_ignore_list):
link_ignore_regex = re.compile(link_ignore_regex)
regex = re.compile(
r"""
Expand Down Expand Up @@ -85,12 +88,15 @@ def find_links_in_file(file, timeout, link_ignore_regex, link_ignore_list):
fnmatch(link, link_ignore_list)):
if link.startswith(('hg+', 'bzr+', 'git+', 'svn+')):
link = InvalidLinkBear.parse_pip_vcs_url(link)
code = InvalidLinkBear.get_status_code(link, timeout)
host = urlparse(link).netloc
code = InvalidLinkBear.get_status_code(
link, network_timeout.get(
host, InvalidLinkBear.DEFAULT_TIMEOUT))
yield line_number + 1, link, code

@deprecate_settings(link_ignore_regex='ignore_regex')
def run(self, filename, file,
timeout: int=DEFAULT_TIMEOUT,
network_timeout: typed_dict(str, int, DEFAULT_TIMEOUT)=dict(),
link_ignore_regex: str='([.\/]example\.com|\{|\$)',
link_ignore_list: typed_list(str)='',
follow_redirects: bool=False):
Expand All @@ -108,13 +114,19 @@ def run(self, filename, file,
`do_not_ever_open = 'https://api.acme.inc/delete-all-data'` wiping out
all your data.
:param timeout: Request timeout period.
:param network_timeout: A dict mapping URLs and timeout to be
used for that URL. All the URLs that have
the same host as that of URLs provided
will be passed that timeout.
:param link_ignore_regex: A regex for urls to ignore.
:param link_ignore_list: Comma separated url globs to ignore
:param follow_redirects: Set to true to autocorrect redirects.
"""
network_timeout = {urlparse(url).netloc: timeout
for url, timeout in network_timeout.items()}

for line_number, link, code in InvalidLinkBear.find_links_in_file(
file, timeout, link_ignore_regex, link_ignore_list):
file, network_timeout, link_ignore_regex, link_ignore_list):
if code is None:
yield Result.from_values(
origin=self,
Expand Down
33 changes: 33 additions & 0 deletions tests/general/InvalidLinkBearTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import requests
import requests_mock
import unittest
import unittest.mock

from bears.general.InvalidLinkBear import InvalidLinkBear
from coalib.settings.Section import Section
Expand Down Expand Up @@ -229,3 +230,35 @@ def test_links_to_ignore(self):

self.assertResult(valid_file=valid_file,
settings={'link_ignore_list': link_ignore_list})

def test_variable_timeouts(self):
nt = {
'https://google.com/timeout/test/2/3/4/5/something': 10,
'https://facebook.com/timeout': 15
}

file_contents = """
https://facebook.com/
https://google.com/
https://coala.io/som/thingg/page/123
""".splitlines()

def response(status_code, *args, **kwargs):
res = requests.Response()
res.status_code = status_code
return res

with unittest.mock.patch(
'tests.general.InvalidLinkBearTest.requests.head',
return_value=response(status_code=200)) as mock:
uut = InvalidLinkBear(self.section, Queue())
self.assertEqual([x.message
for x in list(uut.run('file', file_contents,
network_timeout=nt))], [])
mock.assert_has_calls([
unittest.mock.call('https://facebook.com/', timeout=15,
allow_redirects=False),
unittest.mock.call('https://google.com/',
timeout=10, allow_redirects=False),
unittest.mock.call('https://coala.io/som/thingg/page/123',
timeout=2, allow_redirects=False)])

0 comments on commit f72c246

Please sign in to comment.