Merge pull request 1tayH#15 from ail1020/patch-1

randomize order of root_urls
and4r1lh0 · Aug 19, 2018 · 2815487 · 2815487
2 parents 91c3927 + cfcc557
commit 2815487
Showing 1 changed file with 16 additions and 19 deletions.
diff --git a/noisy.py b/noisy.py
@@ -230,28 +230,25 @@ def crawl(self):
         self._start_time = datetime.datetime.now()
 
         while True:
-            for url in self._config["root_urls"]:
-                try:
-                    body = self._request(url).content
-                    self._links = self._extract_urls(body, url)
-                    logging.debug("found {} links".format(len(self._links)))
-                    self._browse_from_links()
-
-                except requests.exceptions.RequestException:
-                    logging.warn("Error connecting to root url: {}".format(url))
+            url = random.choice(self._config["root_urls"])
+            try:
+                body = self._request(url).content
+                self._links = self._extract_urls(body, url)
+                logging.debug("found {} links".format(len(self._links)))
+                self._browse_from_links()
+
+            except requests.exceptions.RequestException:
+                logging.warn("Error connecting to root url: {}".format(url))
 
-                except MemoryError:
-                    logging.warn("Error: content at url: {} is exhausting the memory".format(url))
+            except MemoryError:
+                logging.warn("Error: content at url: {} is exhausting the memory".format(url))
 
-                except LocationParseError:
-                    logging.warn("Error encountered during parsing of: {}".format(url))
-
-                except self.CrawlerTimedOut:
-                    logging.info("Timeout has exceeded, exiting")
-                    return
-
-            logging.debug("No more links were found")
+            except LocationParseError:
+                logging.warn("Error encountered during parsing of: {}".format(url))
 
+            except self.CrawlerTimedOut:
+                logging.info("Timeout has exceeded, exiting")
+                return
 
 def main():
     parser = argparse.ArgumentParser()