forked from mozilla-services/socorro
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fixes bug 1278937 - Implement downloading missing symbols CSV (mozill…
…a-services#3375) r=adngdb
- Loading branch information
Showing
5 changed files
with
463 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
import datetime | ||
import csv | ||
from cStringIO import StringIO | ||
|
||
from configman import Namespace | ||
from configman.converters import class_converter | ||
from crontabber.base import BaseCronApp | ||
from crontabber.mixins import with_postgres_transactions | ||
|
||
from socorro.external.postgresql.missing_symbols import MissingSymbols | ||
from socorrolib.app.socorro_app import App, main | ||
|
||
|
||
@with_postgres_transactions() | ||
class MissingSymbolsCronApp(BaseCronApp): | ||
app_name = 'missing-symbols' | ||
app_description = 'Missing Symbols' | ||
app_version = '0.1' | ||
|
||
required_config = Namespace() | ||
required_config.add_option( | ||
'hours_back', | ||
default=24, # hours | ||
doc='Number of hours of missing symbols' | ||
) | ||
|
||
required_config.add_option( | ||
'boto_class', | ||
default=( | ||
'socorro.external.boto.connection_context.S3ConnectionContext', | ||
), | ||
doc=( | ||
'fully qualified dotted Python classname to handle ' | ||
'Boto connections', | ||
), | ||
from_string_converter=class_converter, | ||
reference_value_from='resource.boto' | ||
) | ||
|
||
required_config.add_option( | ||
'bucket_name', | ||
default='missing-symbols', | ||
doc='Name of S3 bucket to store this' | ||
) | ||
|
||
def run(self): | ||
buf = StringIO() | ||
writer = csv.writer(buf) | ||
writer.writerow(( | ||
'debug_file', | ||
'debug_id', | ||
'code_file', | ||
'code_id', | ||
)) | ||
implementation = MissingSymbols(config=self.config) | ||
date = datetime.datetime.utcnow() | ||
date -= datetime.timedelta(hours=self.config.hours_back) | ||
rows = 0 | ||
for each in implementation.iter(date=date.date()): | ||
writer.writerow(( | ||
each['debug_file'], | ||
each['debug_id'], | ||
each['code_file'], | ||
each['code_id'], | ||
)) | ||
rows += 1 | ||
s3 = self.config.boto_class(self.config) | ||
conn = s3._connect() | ||
self.config.logger.info( | ||
'Writing {} missing symbols rows to a file in {}'.format( | ||
format(rows, ','), | ||
self.config.bucket_name | ||
) | ||
) | ||
bucket = s3._get_or_create_bucket(conn, self.config.bucket_name) | ||
key_object = bucket.new_key('latest.csv') | ||
key_object.set_contents_from_string(buf.getvalue()) | ||
self.config.logger.info( | ||
'Generated {} ({} bytes, {:.2f} Mb)'.format( | ||
key_object.generate_url(expires_in=0, query_auth=False), | ||
format(key_object.size, ','), | ||
key_object.size / 1024.0 / 1024.0 | ||
) | ||
) | ||
|
||
|
||
class MissingSymbolsCronAppDryRunner(App): # pragma: no cover | ||
"""App to test running missing-symbols right here right now. | ||
To run it, simply execute this file: | ||
$ python socorro/cron/jobs/missingsymbols.py | ||
""" | ||
|
||
required_config = Namespace() | ||
required_config.add_option( | ||
'crontabber_job_class', | ||
default='socorro.cron.jobs.missingsymbols.MissingSymbolsCronApp', | ||
doc='bla', | ||
from_string_converter=class_converter, | ||
) | ||
|
||
def __init__(self, config): | ||
self.config = config | ||
self.app = config.crontabber_job_class(config, {}) | ||
|
||
def main(self): | ||
self.app.run() | ||
|
||
|
||
if __name__ == '__main__': # pragma: no cover | ||
import sys | ||
sys.exit(main(MissingSymbolsCronAppDryRunner)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
||
import datetime | ||
import logging | ||
|
||
from socorro.external.postgresql.base import PostgreSQLBase | ||
from socorrolib.lib import external_common | ||
|
||
|
||
logger = logging.getLogger("webapi") | ||
|
||
|
||
class MissingSymbols(PostgreSQLBase): | ||
|
||
def _get_sql_params(self, **kwargs): | ||
filters = [ | ||
( | ||
'date', | ||
( | ||
datetime.datetime.utcnow() - datetime.timedelta(days=1) | ||
).date(), | ||
'date' | ||
), | ||
( | ||
'limit', | ||
None, | ||
int | ||
), | ||
] | ||
params = external_common.parse_arguments(filters, kwargs) | ||
sql = """ | ||
SELECT debug_file, debug_id, code_file, code_id | ||
FROM missing_symbols | ||
WHERE | ||
date_processed = %(date)s AND | ||
debug_file != '' AND | ||
debug_id != '' | ||
GROUP BY debug_file, debug_id, code_file, code_id | ||
""" | ||
if params['limit'] is not None: | ||
sql += '\nLIMIT %(limit)s' | ||
return sql, params | ||
|
||
def iter(self, **kwargs): | ||
"""return an iterator that yields dicts that look like this: | ||
{ | ||
'debug_file': ..., | ||
'debug_id': ..., | ||
'code_file': ..., | ||
'code_id': ..., | ||
} | ||
The reason this is important and useful to have is that missing | ||
symbols recordsets tend to be very very large so it's not | ||
a good idea to allocate it into one massive big list. | ||
""" | ||
sql, params = self._get_sql_params(**kwargs) | ||
with self.cursor(sql, params) as cursor: | ||
names = [x.name for x in cursor.description] | ||
for row in cursor: | ||
yield dict(zip(names, row)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
import csv | ||
import datetime | ||
from cStringIO import StringIO | ||
|
||
import mock | ||
from crontabber.app import CronTabber | ||
|
||
from socorro.unittest.cron.jobs.base import IntegrationTestBase | ||
from socorro.unittest.cron.setup_configman import ( | ||
get_config_manager_for_crontabber, | ||
) | ||
|
||
|
||
class TestMissingSymbolsCronApp(IntegrationTestBase): | ||
|
||
def setUp(self): | ||
super(TestMissingSymbolsCronApp, self).setUp() | ||
|
||
cursor = self.conn.cursor() | ||
today = datetime.datetime.utcnow().date() | ||
yesterday = today - datetime.timedelta(days=1) | ||
|
||
cursor.execute(""" | ||
INSERT INTO missing_symbols | ||
(date_processed, debug_file, debug_id, code_file, code_id) | ||
VALUES | ||
( | ||
%(today)s, | ||
'McBrwCtl.pdb', | ||
'133A2F3537E341A995D7C2BF8C3B2C663', | ||
'', | ||
'' | ||
), | ||
( | ||
%(today)s, | ||
'msmpeg2vdec.pdb', | ||
'8515599DC90B4A01997BA2647DFE24941', | ||
'msmpeg2vdec.dll', | ||
'54134E292c4000' | ||
), | ||
( | ||
%(today)s, | ||
'', | ||
'8515599DC90B4A01997BA2647DFE24941', | ||
'msmpeg2vdec.dll', | ||
'54134E292c4000' | ||
), | ||
( | ||
%(today)s, | ||
'msmpeg2vdec.pdb', | ||
'', | ||
'msmpeg2vdec.dll', | ||
'54134E292c4000' | ||
), | ||
( | ||
%(yesterday)s, | ||
'nvwgf2um.pdb', | ||
'9D492B844FF34800B34320464AA1E7E41', | ||
'nvwgf2um.dll', | ||
'561D1D4Ff58000' | ||
), | ||
( | ||
%(yesterday)s, | ||
'nvwgf2um.pdb', | ||
'', | ||
'nvwgf2um.dll', | ||
'561D1D4Ff58000' | ||
), | ||
( | ||
%(yesterday)s, | ||
'', | ||
'9D492B844FF34800B34320464AA1E7E41', | ||
'nvwgf2um.dll', | ||
'561D1D4Ff58000' | ||
) | ||
""", {'today': today, 'yesterday': yesterday}) | ||
|
||
self.conn.commit() | ||
|
||
self.mock_boto_class = mock.MagicMock() | ||
self.mock_bucket = mock.MagicMock() | ||
self.mock_key = mock.MagicMock() | ||
self.mock_boto_class()._get_or_create_bucket.return_value = ( | ||
self.mock_bucket | ||
) | ||
self.mock_bucket.new_key.return_value = self.mock_key | ||
|
||
def tearDown(self): | ||
cursor = self.conn.cursor() | ||
|
||
# Ensure that the test partition entry and table no longer exist. | ||
statement = """ | ||
TRUNCATE missing_symbols CASCADE | ||
""" | ||
cursor.execute(statement) | ||
self.conn.commit() | ||
|
||
super(TestMissingSymbolsCronApp, self).tearDown() | ||
|
||
def _setup_config_manager(self, days_to_keep=None): | ||
super(TestMissingSymbolsCronApp, self)._setup_config_manager | ||
return get_config_manager_for_crontabber( | ||
jobs=( | ||
'socorro.cron.jobs.missingsymbols.MissingSymbolsCronApp|1d' | ||
), | ||
overrides={ | ||
'crontabber.class-MissingSymbolsCronApp' | ||
'.boto_class': self.mock_boto_class | ||
}, | ||
) | ||
|
||
def test_basic_run(self): | ||
# We need to prepare to return a size for the new key | ||
self.mock_key.size = 123456789 | ||
self.mock_key.generate_url.return_value = ( | ||
'https://s3.example.com/latest.csv' | ||
) | ||
|
||
# Run the crontabber job to remove the test table. | ||
config_manager = self._setup_config_manager() | ||
with config_manager.context() as config: | ||
tab = CronTabber(config) | ||
tab.run_all() | ||
|
||
# Basic assertion test of stored procedure. | ||
information = self._load_structure() | ||
assert information['missing-symbols'] | ||
assert not information['missing-symbols']['last_error'] | ||
assert information['missing-symbols']['last_success'] | ||
|
||
self.mock_boto_class()._connect.assert_called_with() | ||
self.mock_boto_class.close.assert_called_with() | ||
self.mock_bucket.new_key.assert_called_with('latest.csv') | ||
content = StringIO() | ||
writer = csv.writer(content) | ||
writer.writerow(( | ||
'debug_file', | ||
'debug_id', | ||
'code_file', | ||
'code_id', | ||
)) | ||
writer.writerow(( | ||
'nvwgf2um.pdb', | ||
'9D492B844FF34800B34320464AA1E7E41', | ||
'nvwgf2um.dll', | ||
'561D1D4Ff58000', | ||
)) | ||
self.mock_key.set_contents_from_string.assert_called_with( | ||
content.getvalue() | ||
) | ||
|
||
# this is becausse 123456789 bytes is 117.74 Mb | ||
tab.config.logger.info.assert_called_with( | ||
'Generated https://s3.example.com/latest.csv ' | ||
'(123,456,789 bytes, 117.74 Mb)' | ||
) |
Oops, something went wrong.