Merge pull request #89 from Yelp/opendns_discard_404s

Added option to the HTTP request handler to avoid retries on 404s, using that option in OpenDNS
Yelp · Feb 21, 2019 · d76aeef · d76aeef
2 parents 21acd3b + 6e4017a
commit d76aeef
Show file tree

Hide file tree

Showing 10 changed files with 110 additions and 55 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,25 +1,32 @@
+repos:
 -   repo: git://github.com/pre-commit/pre-commit-hooks
-    sha: cf550fcab3f12015f8676b8278b30e1a5bc10e70
+    sha: v2.1.0
     hooks:
-    -   id: autopep8-wrapper
-        args:
-        - -i
-        - --ignore=E501
-    -   id: check-json
-    -   id: check-yaml
-    -   id: end-of-file-fixer
     -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: check-yaml
+    -   id: debug-statements
     -   id: name-tests-test
-    -   id: requirements-txt-fixer
+    -   id: check-added-large-files
+    -   id: check-byte-order-marker
+    -   id: fix-encoding-pragma
     -   id: flake8
+    -   id: requirements-txt-fixer
 -   repo: git://github.com/asottile/reorder_python_imports
-    sha: 3d86483455ab5bd06cc1069fdd5ac57be5463f10
+    sha: v1.3.4
     hooks:
     -   id: reorder-python-imports
+-   repo: [email protected]:mirrors/asottile/add-trailing-comma
+    rev: v0.7.1
+    hooks:
+    -   id: add-trailing-comma
+-   repo: [email protected]:mirrors/pre-commit/mirrors-autopep8
+    rev: v1.4.3
+    hooks:
+    -   id: autopep8
 -   repo: https://github.com/Yelp/detect-secrets
-    sha: 0.9.1
+    sha: v0.12.0
     hooks:
     -   id: detect-secrets
         args: ['--baseline', '.secrets.baseline']
         exclude: .*tests/.*|\.pre-commit-config\.yaml
-        language_version: python2.7
diff --git a/.secrets.baseline b/.secrets.baseline
@@ -1,6 +1,9 @@
 {
-  "exclude_regex": ".*tests/.*|\\.pre-commit-config\\.yaml",
-  "generated_at": "2018-12-11T21:45:51Z",
+  "exclude": {
+    "files": ".*tests/.*|\\.pre-commit-config\\.yaml",
+    "lines": null
+  },
+  "generated_at": "2019-02-13T15:09:11Z",
   "plugins_used": [
     {
       "base64_limit": 4.5,
@@ -67,5 +70,5 @@
       }
     ]
   },
-  "version": "0.9.1"
+  "version": "0.12.0"
 }
diff --git a/.travis.yml b/.travis.yml
@@ -1,7 +1,7 @@
 language: python
 python:
     - '2.7'
-    - '3.5'
+    - '3.6'
 
 install:
     - pip install tox-travis

diff --git a/Makefile b/Makefile
@@ -9,9 +9,12 @@ test:
 venv:
 	tox -evenv
 
+install-hooks: venv
+	virtualenv_run/bin/pre-commit install -f --install-hooks
+
 clean:
 	rm -rf build/ dist/ threat_intel.egg-info/ .tox/ virtualenv_run/
 	find . -name '*.pyc' -delete
 	find . -name '__pycache__' -delete
 
-.PHONY: all test venv clean
+.PHONY: all test venv clean install-hooks
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -2,8 +2,8 @@
 -r requirements.txt
 coverage==3.7.1
 mock==1.0.1
-pre-commit==0.7.6
+pre-commit==1.13.0
 pyflakes==0.9.2
-tornado==4.5.3
 testify==0.7.2
+tornado==4.5.3
 tox==2.3.1
diff --git a/setup.py b/setup.py
@@ -1,11 +1,10 @@
-# -*- coding: utf-8 -*-
 from setuptools import find_packages
 from setuptools import setup
 
 
 setup(
     name="threat_intel",
-    version='0.1.27',
+    version='0.1.28',
     provides=['threat_intel'],
     author="Yelp Security",
     url='https://github.com/Yelp/threat_intel',
@@ -18,6 +17,6 @@
         "requests[security]",
         "grequests",
         "simplejson",
-        "six"
+        "six",
     ],
 )
diff --git a/tests/util/http_test.py b/tests/util/http_test.py
@@ -30,6 +30,11 @@ def mock_unsuccessful_response(self, response):
         response.status_code = 500
         response._content = u'Internal Server Error'.encode('utf-8')
 
+    def mock_not_found_response(self, response):
+        """Mocks a 404 response by changes its status code"""
+        response.status_code = 404
+        response._content = u'Not Found'.encode('utf-8')
+
     def mock_unsuccessful_responses(self, responses):
         """Mocks unsuccessful responses by changing their status code to 500 and the content to indicate the error."""
         for response in responses:
@@ -84,7 +89,7 @@ def test_multi_get_max_retry(self):
         responses_to_calls = [
             self.mock_ok_responses(number_of_requests),
             self.mock_ok_responses(number_of_requests - 1),
-            self.mock_ok_responses(number_of_requests - 2)
+            self.mock_ok_responses(number_of_requests - 2),
         ]
         # mock unsuccessful responses to the first call to grequests.map
         self.mock_unsuccessful_responses(responses_to_calls[0][0:3])
@@ -114,7 +119,8 @@ def test_multi_get_response_to_json(self):
         T.assert_equals(5, len(actual_responses))
         T.assert_is(None, actual_responses[3])
         logging.warning.called_once_with(
-            'Expected response in JSON format from example.com/movie/TheRevenant but the actual response text is: This is not JSON')
+            'Expected response in JSON format from example.com/movie/TheRevenant but the actual response text is: This is not JSON',
+        )
 
     def assert_only_unsuccessful_requests(self, call, unsuccessful_responses):
         """Asserts that the requests in call where only the ones that failed, based on the `unsuccessful_responses` list."""
@@ -129,7 +135,7 @@ def test_multi_get_retry_only_unsuccessful_requests(self):
         responses_to_calls = [
             self.mock_ok_responses(10),
             self.mock_ok_responses(3),
-            self.mock_ok_responses(2)
+            self.mock_ok_responses(2),
         ]
         # mock unsuccessful responses to the first call to grequests.map
         unsuccessful_responses_first_call = [
@@ -169,3 +175,12 @@ def test_multi_get_retry_only_unsuccessful_requests(self):
         # assert that only the failed requests from the second call to grequests.map are passed in the third call
         third_call = grequests.map.call_args_list[2]
         self.assert_only_unsuccessful_requests(third_call, unsuccessful_responses_second_call)
+
+    def test_multi_get_drop_404s(self):
+        responses_to_calls = self.mock_ok_responses(3)
+        self.mock_not_found_response(responses_to_calls[1])
+        query_params = [{'Hugh Glass': 'Leonardo DiCaprio'}] * 3
+        self.mock_grequests_map(responses_to_calls)
+        result = MultiRequest(drop_404s=True).multi_get('example.org', query_params)
+        assert grequests.map.call_count == 1
+        assert result[1] is None
diff --git a/threat_intel/opendns.py b/threat_intel/opendns.py
@@ -2,10 +2,10 @@
 #
 # InvestigateApi makes calls to the OpenDNS Investigate API.
 #
-from six.moves import range
 from warnings import warn
 
 import simplejson
+from six.moves import range
 
 from threat_intel.util.api_cache import ApiCache
 from threat_intel.util.error_messages import write_error_message
@@ -60,7 +60,10 @@ class InvestigateApi(object):
 
     def __init__(self, api_key, cache_file_name=None, update_cache=True, req_timeout=None):
         auth_header = {'Authorization': 'Bearer {0}'.format(api_key)}
-        self._requests = MultiRequest(default_headers=auth_header, max_requests=12, rate_limit=30, req_timeout=req_timeout)
+        self._requests = MultiRequest(
+            default_headers=auth_header, max_requests=12, rate_limit=30,
+            req_timeout=req_timeout, drop_404s=True,
+        )
 
         # Create an ApiCache if instructed to
         self._cache = ApiCache(cache_file_name, update_cache) if cache_file_name else None
@@ -116,8 +119,10 @@ def domain_score(self, domains):
         This method is deprecated since OpenDNS Investigate API
         endpoint is also deprecated.
         """
-        warn('OpenDNS Domain Scores endpoint is deprecated. Use '
-             'InvestigateApi.categorization() instead', DeprecationWarning)
+        warn(
+            'OpenDNS Domain Scores endpoint is deprecated. Use '
+            'InvestigateApi.categorization() instead', DeprecationWarning,
+        )
         url_path = 'domains/score/'
         return self._multi_post(url_path, domains)
 
@@ -310,11 +315,13 @@ def search(self, patterns, start=30, limit=1000, include_category=False):
         fmt_url_path = u'search/{0}'
         start = '-{0}days'.format(start)
         include_category = str(include_category).lower()
-        query_params = {'start': start,
-                        'limit': limit,
-                        'includecategory': include_category}
+        query_params = {
+            'start': start,
+            'limit': limit,
+            'includecategory': include_category,
+        }
         return self._multi_get(api_name, fmt_url_path, patterns, query_params)
-         
+
     def risk_score(self, domains):
         """Performs Umbrella risk score analysis on the input domains
 
@@ -327,6 +334,7 @@ def risk_score(self, domains):
         fmt_url_path = u'domains/risk-score/{0}'
         return self._multi_get(api_name, fmt_url_path, domains)
 
+
 class ResponseError(Exception):
 
     """Raised when the response received from the endpoint is not valid."""