Skip to content

Commit

Permalink
Update noisy.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ail1020 authored Aug 5, 2018
1 parent 91c3927 commit 194da05
Showing 1 changed file with 17 additions and 17 deletions.
34 changes: 17 additions & 17 deletions noisy.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,27 +230,27 @@ def crawl(self):
self._start_time = datetime.datetime.now()

while True:
for url in self._config["root_urls"]:
try:
body = self._request(url).content
self._links = self._extract_urls(body, url)
logging.debug("found {} links".format(len(self._links)))
self._browse_from_links()

except requests.exceptions.RequestException:
logging.warn("Error connecting to root url: {}".format(url))
url = random.choice(self._config["root_urls"])
try:
body = self._request(url).content
self._links = self._extract_urls(body, url)
logging.debug("found {} links".format(len(self._links)))
self._browse_from_links()

except requests.exceptions.RequestException:
logging.warn("Error connecting to root url: {}".format(url))

except MemoryError:
logging.warn("Error: content at url: {} is exhausting the memory".format(url))
except MemoryError:
logging.warn("Error: content at url: {} is exhausting the memory".format(url))

except LocationParseError:
logging.warn("Error encountered during parsing of: {}".format(url))
except LocationParseError:
logging.warn("Error encountered during parsing of: {}".format(url))

except self.CrawlerTimedOut:
logging.info("Timeout has exceeded, exiting")
return
except self.CrawlerTimedOut:
logging.info("Timeout has exceeded, exiting")
return

logging.debug("No more links were found")
logging.debug("No more links were found")


def main():
Expand Down

0 comments on commit 194da05

Please sign in to comment.