Skip to content

Commit

Permalink
InvalidLinkBear: Add ignore_regex setting
Browse files Browse the repository at this point in the history
To ignore all urls that matches the regex
  • Loading branch information
SanketDG committed May 26, 2016
1 parent f9a83c4 commit d402bf5
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 7 deletions.
17 changes: 11 additions & 6 deletions bears/general/InvalidLinkBear.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,22 @@ def get_status_code(url, timeout):
pass

@staticmethod
def find_links_in_file(file, timeout):
def find_links_in_file(file, timeout, ignore_regex):
ignore_regex = re.compile(ignore_regex)
regex = re.compile(
r'((ftp|http)s?:\/\/\S+\.(?:[^\s\(\)\'\"\>\|]+|'
r'\([^\s\(\)]*\))*)(?<!\.)(?<!\,)')
for line_number, line in enumerate(file):
match = regex.search(line)
if match:
link = match.group()
code = InvalidLinkBear.get_status_code(link, timeout)
yield line_number + 1, link, code
if not ignore_regex.search(link):
code = InvalidLinkBear.get_status_code(link, timeout)
yield line_number + 1, link, code

def run(self, filename, file, timeout: int=DEFAULT_TIMEOUT):
def run(self, filename, file,
timeout: int=DEFAULT_TIMEOUT,
ignore_regex: str="[.\/]example\.com"):
"""
Find links in any text file and check if they are valid.
Expand All @@ -53,10 +57,11 @@ def run(self, filename, file, timeout: int=DEFAULT_TIMEOUT):
This bear can automatically fix redirects, but ignores redirect
URLs that have a huge difference with the original URL.
:param timeout: Request timeout period.
:param timeout: Request timeout period.
:param ignore_regex: A regex for urls to ignore.
"""
for line_number, link, code in InvalidLinkBear.find_links_in_file(
file, timeout):
file, timeout, ignore_regex):
if code is None:
yield Result.from_values(
origin=self,
Expand Down
31 changes: 30 additions & 1 deletion tests/general/InvalidLinkBearTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from bears.general.InvalidLinkBear import InvalidLinkBear
from coalib.settings.Section import Section
from coalib.settings.Setting import Setting


def custom_matcher(request):
Expand Down Expand Up @@ -38,10 +39,12 @@ class InvalidLinkBearTest(unittest.TestCase):
def setUp(self):
self.section = Section("")

def assertResult(self, valid_file=None, invalid_file=None):
def assertResult(self, valid_file=None, invalid_file=None, settings={}):
with requests_mock.Mocker() as m:
InvalidLinkBear.check_prerequisites = lambda *args: True
uut = InvalidLinkBear(self.section, Queue())
for name, value in settings.items():
self.section.append(Setting(name, value))
m.add_matcher(custom_matcher)
if valid_file:
out = uut.execute("valid", valid_file)
Expand Down Expand Up @@ -141,3 +144,29 @@ def test_redirect_threshold(self):

self.assertResult(valid_file=long_url_redirect,
invalid_file=short_url_redirect)

def test_ignore_regex(self):

ignored_URLs = """
http://sub.example.com
http://sub.example.com/something
""".splitlines()

not_ignored_URLs = """
http://www.notexample.com
http://exampe.com
http://example.co.in
""".splitlines()

self.assertResult(valid_file=ignored_URLs,
invalid_file=not_ignored_URLs)

valid_file = """
http://httpbin.org/status/524
""".splitlines()
invalid_file = """
http://httpbin.org/status/503
""".splitlines()
self.assertResult(valid_file=valid_file,
invalid_file=invalid_file,
settings={'ignore_regex': '[1-9]{2}$'})

0 comments on commit d402bf5

Please sign in to comment.