Skip to content

Commit

Permalink
fixes bug 1278937 - Implement downloading missing symbols CSV (mozill…
Browse files Browse the repository at this point in the history
  • Loading branch information
Peter Bengtsson authored and adngdb committed Jun 22, 2016
1 parent c1bb82b commit dd97716
Show file tree
Hide file tree
Showing 5 changed files with 463 additions and 2 deletions.
114 changes: 114 additions & 0 deletions socorro/cron/jobs/missingsymbols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import datetime
import csv
from cStringIO import StringIO

from configman import Namespace
from configman.converters import class_converter
from crontabber.base import BaseCronApp
from crontabber.mixins import with_postgres_transactions

from socorro.external.postgresql.missing_symbols import MissingSymbols
from socorrolib.app.socorro_app import App, main


@with_postgres_transactions()
class MissingSymbolsCronApp(BaseCronApp):
app_name = 'missing-symbols'
app_description = 'Missing Symbols'
app_version = '0.1'

required_config = Namespace()
required_config.add_option(
'hours_back',
default=24, # hours
doc='Number of hours of missing symbols'
)

required_config.add_option(
'boto_class',
default=(
'socorro.external.boto.connection_context.S3ConnectionContext',
),
doc=(
'fully qualified dotted Python classname to handle '
'Boto connections',
),
from_string_converter=class_converter,
reference_value_from='resource.boto'
)

required_config.add_option(
'bucket_name',
default='missing-symbols',
doc='Name of S3 bucket to store this'
)

def run(self):
buf = StringIO()
writer = csv.writer(buf)
writer.writerow((
'debug_file',
'debug_id',
'code_file',
'code_id',
))
implementation = MissingSymbols(config=self.config)
date = datetime.datetime.utcnow()
date -= datetime.timedelta(hours=self.config.hours_back)
rows = 0
for each in implementation.iter(date=date.date()):
writer.writerow((
each['debug_file'],
each['debug_id'],
each['code_file'],
each['code_id'],
))
rows += 1
s3 = self.config.boto_class(self.config)
conn = s3._connect()
self.config.logger.info(
'Writing {} missing symbols rows to a file in {}'.format(
format(rows, ','),
self.config.bucket_name
)
)
bucket = s3._get_or_create_bucket(conn, self.config.bucket_name)
key_object = bucket.new_key('latest.csv')
key_object.set_contents_from_string(buf.getvalue())
self.config.logger.info(
'Generated {} ({} bytes, {:.2f} Mb)'.format(
key_object.generate_url(expires_in=0, query_auth=False),
format(key_object.size, ','),
key_object.size / 1024.0 / 1024.0
)
)


class MissingSymbolsCronAppDryRunner(App): # pragma: no cover
"""App to test running missing-symbols right here right now.
To run it, simply execute this file:
$ python socorro/cron/jobs/missingsymbols.py
"""

required_config = Namespace()
required_config.add_option(
'crontabber_job_class',
default='socorro.cron.jobs.missingsymbols.MissingSymbolsCronApp',
doc='bla',
from_string_converter=class_converter,
)

def __init__(self, config):
self.config = config
self.app = config.crontabber_job_class(config, {})

def main(self):
self.app.run()


if __name__ == '__main__': # pragma: no cover
import sys
sys.exit(main(MissingSymbolsCronAppDryRunner))
17 changes: 15 additions & 2 deletions socorro/external/postgresql/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,19 @@ def count(self, sql, params=None, error_message=None, connection=None):
connection=connection
)

@contextlib.contextmanager
def cursor(self, sql, params=None, error_message=None, connection=None):
fresh_connection = not connection
if not connection:
connection = self.database.connection()
try:
with connection.cursor() as cursor:
cursor.execute(sql, params)
yield cursor
finally:
if connection and fresh_connection:
connection.close()

def _execute(
self, actor_function, sql, error_message, params=None, connection=None
):
Expand Down Expand Up @@ -253,8 +266,8 @@ def build_reports_sql_where(params, sql_params, config):
# Get all versions that are linked to this rapid beta.
rapid_beta_versions = [
x for x in versions_info
if versions_info[x]["from_beta_version"] == key
and not versions_info[x]["is_rapid_beta"]
if versions_info[x]["from_beta_version"] == key and
not versions_info[x]["is_rapid_beta"]
]

for rapid_beta in rapid_beta_versions:
Expand Down
63 changes: 63 additions & 0 deletions socorro/external/postgresql/missing_symbols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

import datetime
import logging

from socorro.external.postgresql.base import PostgreSQLBase
from socorrolib.lib import external_common


logger = logging.getLogger("webapi")


class MissingSymbols(PostgreSQLBase):

def _get_sql_params(self, **kwargs):
filters = [
(
'date',
(
datetime.datetime.utcnow() - datetime.timedelta(days=1)
).date(),
'date'
),
(
'limit',
None,
int
),
]
params = external_common.parse_arguments(filters, kwargs)
sql = """
SELECT debug_file, debug_id, code_file, code_id
FROM missing_symbols
WHERE
date_processed = %(date)s AND
debug_file != '' AND
debug_id != ''
GROUP BY debug_file, debug_id, code_file, code_id
"""
if params['limit'] is not None:
sql += '\nLIMIT %(limit)s'
return sql, params

def iter(self, **kwargs):
"""return an iterator that yields dicts that look like this:
{
'debug_file': ...,
'debug_id': ...,
'code_file': ...,
'code_id': ...,
}
The reason this is important and useful to have is that missing
symbols recordsets tend to be very very large so it's not
a good idea to allocate it into one massive big list.
"""
sql, params = self._get_sql_params(**kwargs)
with self.cursor(sql, params) as cursor:
names = [x.name for x in cursor.description]
for row in cursor:
yield dict(zip(names, row))
156 changes: 156 additions & 0 deletions socorro/unittest/cron/jobs/test_missingsymbols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import csv
import datetime
from cStringIO import StringIO

import mock
from crontabber.app import CronTabber

from socorro.unittest.cron.jobs.base import IntegrationTestBase
from socorro.unittest.cron.setup_configman import (
get_config_manager_for_crontabber,
)


class TestMissingSymbolsCronApp(IntegrationTestBase):

def setUp(self):
super(TestMissingSymbolsCronApp, self).setUp()

cursor = self.conn.cursor()
today = datetime.datetime.utcnow().date()
yesterday = today - datetime.timedelta(days=1)

cursor.execute("""
INSERT INTO missing_symbols
(date_processed, debug_file, debug_id, code_file, code_id)
VALUES
(
%(today)s,
'McBrwCtl.pdb',
'133A2F3537E341A995D7C2BF8C3B2C663',
'',
''
),
(
%(today)s,
'msmpeg2vdec.pdb',
'8515599DC90B4A01997BA2647DFE24941',
'msmpeg2vdec.dll',
'54134E292c4000'
),
(
%(today)s,
'',
'8515599DC90B4A01997BA2647DFE24941',
'msmpeg2vdec.dll',
'54134E292c4000'
),
(
%(today)s,
'msmpeg2vdec.pdb',
'',
'msmpeg2vdec.dll',
'54134E292c4000'
),
(
%(yesterday)s,
'nvwgf2um.pdb',
'9D492B844FF34800B34320464AA1E7E41',
'nvwgf2um.dll',
'561D1D4Ff58000'
),
(
%(yesterday)s,
'nvwgf2um.pdb',
'',
'nvwgf2um.dll',
'561D1D4Ff58000'
),
(
%(yesterday)s,
'',
'9D492B844FF34800B34320464AA1E7E41',
'nvwgf2um.dll',
'561D1D4Ff58000'
)
""", {'today': today, 'yesterday': yesterday})

self.conn.commit()

self.mock_boto_class = mock.MagicMock()
self.mock_bucket = mock.MagicMock()
self.mock_key = mock.MagicMock()
self.mock_boto_class()._get_or_create_bucket.return_value = (
self.mock_bucket
)
self.mock_bucket.new_key.return_value = self.mock_key

def tearDown(self):
cursor = self.conn.cursor()

# Ensure that the test partition entry and table no longer exist.
statement = """
TRUNCATE missing_symbols CASCADE
"""
cursor.execute(statement)
self.conn.commit()

super(TestMissingSymbolsCronApp, self).tearDown()

def _setup_config_manager(self, days_to_keep=None):
super(TestMissingSymbolsCronApp, self)._setup_config_manager
return get_config_manager_for_crontabber(
jobs=(
'socorro.cron.jobs.missingsymbols.MissingSymbolsCronApp|1d'
),
overrides={
'crontabber.class-MissingSymbolsCronApp'
'.boto_class': self.mock_boto_class
},
)

def test_basic_run(self):
# We need to prepare to return a size for the new key
self.mock_key.size = 123456789
self.mock_key.generate_url.return_value = (
'https://s3.example.com/latest.csv'
)

# Run the crontabber job to remove the test table.
config_manager = self._setup_config_manager()
with config_manager.context() as config:
tab = CronTabber(config)
tab.run_all()

# Basic assertion test of stored procedure.
information = self._load_structure()
assert information['missing-symbols']
assert not information['missing-symbols']['last_error']
assert information['missing-symbols']['last_success']

self.mock_boto_class()._connect.assert_called_with()
self.mock_boto_class.close.assert_called_with()
self.mock_bucket.new_key.assert_called_with('latest.csv')
content = StringIO()
writer = csv.writer(content)
writer.writerow((
'debug_file',
'debug_id',
'code_file',
'code_id',
))
writer.writerow((
'nvwgf2um.pdb',
'9D492B844FF34800B34320464AA1E7E41',
'nvwgf2um.dll',
'561D1D4Ff58000',
))
self.mock_key.set_contents_from_string.assert_called_with(
content.getvalue()
)

# this is becausse 123456789 bytes is 117.74 Mb
tab.config.logger.info.assert_called_with(
'Generated https://s3.example.com/latest.csv '
'(123,456,789 bytes, 117.74 Mb)'
)
Loading

0 comments on commit dd97716

Please sign in to comment.