Skip to content

Commit

Permalink
Merge pull request mozilla-services#1286 from selenamarie/bug803209-g…
Browse files Browse the repository at this point in the history
…arbage-collecting-report

Bug803209 support for garbage collecting report in database
  • Loading branch information
selenamarie committed Jun 13, 2013
2 parents 47794b0 + 6c0e607 commit 8d8432b
Show file tree
Hide file tree
Showing 11 changed files with 192 additions and 79 deletions.
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,9 @@ analysis:
rsync socorro-toolbox/target/*.jar analysis/
rsync akela/target/*.jar analysis/
rsync -a socorro-toolbox/src/main/pig/ analysis/

json_enhancements_pg_extension: virtualenv
# This is only run manually, as it is a one-time operation
# to be performed at system installation time, rather than
# every time Socorro is built
if [ ! -f `pg_config --pkglibdir`/json_enhancements.so ]; then sudo $(VIRTUALENV)/bin/pgxn install json_enhancements ; fi
16 changes: 4 additions & 12 deletions alembic/script.py.mako
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,10 @@ class CITEXT(types.UserDefinedType):
return 'CITEXT'

def bind_processor(self, dialect):
def process(value):
return value
return process
return lambda value: value

def result_processor(self, dialect, coltype):
def process(value):
return value
return process
return lambda value: value

def __repr__(self):
return "citext"
Expand All @@ -43,14 +39,10 @@ class JSON(types.UserDefinedType):
return 'JSON'

def bind_processor(self, dialect):
def process(value):
return value
return process
return lambda value: value

def result_processor(self, dialect, coltype):
def process(value):
return value
return process
return lambda value: value

def __repr__(self):
return "json"
Expand Down
65 changes: 65 additions & 0 deletions alembic/versions/2b285e76f71d_bug_803209_add_garag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""bug 803209 -- add garbage collection count to TCBS
Revision ID: 2b285e76f71d
Revises: 8894c185715
Create Date: 2013-06-11 12:46:09.637058
"""

# revision identifiers, used by Alembic.
revision = '2b285e76f71d'
down_revision = '8894c185715'

import os
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
from sqlalchemy import types
from sqlalchemy.sql import table, column
from sqlalchemy.dialects import postgresql

class CITEXT(types.UserDefinedType):
name = 'citext'

def get_col_spec(self):
return 'CITEXT'

def bind_processor(self, dialect):
return lambda value: value

def result_processor(self, dialect, coltype):
return lambda value: value

def __repr__(self):
return "citext"

class JSON(types.UserDefinedType):
name = 'json'

def get_col_spec(self):
return 'JSON'

def bind_processor(self, dialect):
return lambda value: value

def result_processor(self, dialect, coltype):
return lambda value: value

def __repr__(self):
return "json"

def upgrade():
op.add_column(u'tcbs', sa.Column(u'is_gc_count', sa.INTEGER(), server_default='0', nullable=False))
op.add_column(u'tcbs_build', sa.Column(u'is_gc_count', sa.INTEGER(), server_default='0', nullable=False))
app_path=os.getcwd()
procs = [
'backfill_matviews.sql',
'update_tcbs.sql'
]
for myfile in [app_path + '/socorro/external/postgresql/raw_sql/procs/' + line for line in procs]:
with open(myfile, 'r') as file:
op.execute(file.read())

def downgrade():
op.drop_column(u'tcbs_build', u'is_gc_count')
op.drop_column(u'tcbs', u'is_gc_count')
6 changes: 6 additions & 0 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,12 @@ like to use a stable release, determine latest release tag from our release trac
::
git checkout $LATEST_RELEASE_TAG

Install json_extensions for use with PostgreSQL
```````````````````````````````````````````````
From inside the Socorro checkout
::
make json_enhancements_pg_extension

Run unit/functional tests
````````````

Expand Down
1 change: 1 addition & 0 deletions requirements/prod.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ suds==0.4
thrift==0.8.0
web.py==0.36
requests==1.2.3
pgxnclient==1.2.1
18 changes: 16 additions & 2 deletions socorro/external/postgresql/fakedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import csv
import os

crash_ids = []

def date_range(start_date, end_date, delta=None):
if delta is None:
Expand Down Expand Up @@ -332,8 +333,10 @@ def generate_rows(self):
def generate_crashid(self, timestamp):
crashid = str(uuid.UUID(int=random.getrandbits(128)))
depth = 0
return "%s%d%02d%02d%02d" % (crashid[:-7], depth, timestamp.year % 100,
final_crashid = "%s%d%02d%02d%02d" % (crashid[:-7], depth, timestamp.year % 100,
timestamp.month, timestamp.day)
crash_ids.append( (final_crashid, timestamp) )
return final_crashid

def buildid(self, fragment, format='%Y%m%d', days=None):
days = days or self.days
Expand Down Expand Up @@ -676,10 +679,21 @@ class Skiplist(BaseTable):
rows = [['ignore','everything'],
['prefix','SocketShutdown']]

class RawCrashes(BaseTable):
table = 'raw_crashes'
columns = ['uuid', 'raw_crash', 'date_processed']

def generate_rows(self):
for crashid, date_processed, in crash_ids:
raw_crash = '{ "uuid": "%s", "IsGarbageCollecting": "1" }' % crashid
row = [crashid, raw_crash, date_processed]
yield row


# the order that tables are loaded is important.
tables = [OSNames, OSNameMatches, ProcessTypes, Products, ReleaseChannels,
ProductReleaseChannels, RawADU, ReleaseChannelMatches,
ReleasesRaw, UptimeLevels, WindowsVersions, Reports, OSVersions,
ReleasesRaw, UptimeLevels, WindowsVersions, Reports, RawCrashes, OSVersions,
ProductProductidMap, ReleaseRepositories, CrontabberState,
CrashTypes, ReportPartitionInfo, Skiplist]

Expand Down
5 changes: 5 additions & 0 deletions socorro/external/postgresql/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ class Tcbs(DeclarativeBase):
lin_count = Column(u'lin_count', INTEGER(), nullable=False, server_default=text('0'))
hang_count = Column(u'hang_count', INTEGER(), nullable=False, server_default=text('0'))
startup_count = Column(u'startup_count', INTEGER())
is_gc_count = Column(u'is_gc_count', INTEGER(), nullable=False, server_default=text('0'))

idx_tcbs_product_version = Index('idx_tcbs_product_version', product_version_id, report_date)
tcbs_report_date = Index('tcbs_report_date', report_date)
Expand Down Expand Up @@ -1231,6 +1232,7 @@ class SignatureProduct(DeclarativeBase):
#relationship definitions
signatures = relationship('Signature', primaryjoin='SignatureProduct.signature_id==Signature.signature_id')


class SignatureProductsRollup(DeclarativeBase):
__tablename__ = 'signature_products_rollup'

Expand All @@ -1243,12 +1245,14 @@ class SignatureProductsRollup(DeclarativeBase):
products = relationship('Product', primaryjoin='SignatureProductsRollup.product_name==Product.product_name')
signatures = relationship('Signature', primaryjoin='SignatureProductsRollup.signature_id==Signature.signature_id')


class Skiplist(DeclarativeBase):
__tablename__ = 'skiplist'

category = Column(u'category', TEXT(), primary_key=True, nullable=False)
rule = Column(u'rule', TEXT(), primary_key=True, nullable=False)


class SocorroDbVersion(DeclarativeBase):
__tablename__ = 'socorro_db_version'

Expand Down Expand Up @@ -1308,6 +1312,7 @@ class TcbsBuild(DeclarativeBase):
signature_id = Column(u'signature_id', INTEGER(), primary_key=True, nullable=False)
startup_count = Column(u'startup_count', INTEGER())
win_count = Column(u'win_count', INTEGER(), nullable=False, server_default=text('0'))
is_gc_count = Column(u'is_gc_count', INTEGER(), nullable=False, server_default=text('0'))

#relationship definitions

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
CREATE OR REPLACE FUNCTION backfill_matviews(firstday date, lastday date DEFAULT NULL::date, reportsclean boolean DEFAULT true) RETURNS boolean
CREATE OR REPLACE FUNCTION backfill_matviews(firstday date, lastday date DEFAULT NULL::date, reportsclean boolean DEFAULT true, check_period interval DEFAULT '01:00:00'::interval) RETURNS boolean
LANGUAGE plpgsql
SET "TimeZone" TO 'UTC'
AS $$
Expand Down Expand Up @@ -65,7 +65,7 @@ WHILE thisday <= lastday LOOP
RAISE INFO 'signatures';
PERFORM update_signatures(thisday, FALSE);
RAISE INFO 'tcbs';
PERFORM backfill_tcbs(thisday);
PERFORM backfill_tcbs(thisday, check_period);
PERFORM backfill_tcbs_build(thisday);
DROP TABLE IF EXISTS new_tcbs;
RAISE INFO 'crashes by user';
Expand Down
104 changes: 63 additions & 41 deletions socorro/external/postgresql/raw_sql/procs/update_tcbs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,70 +11,92 @@ BEGIN
-- check that it hasn't already been run

IF checkdata THEN
PERFORM 1 FROM tcbs
WHERE report_date = updateday LIMIT 1;
IF FOUND THEN
RAISE NOTICE 'TCBS has already been run for the day %.',updateday;
RETURN FALSE;
END IF;
PERFORM 1 FROM tcbs
WHERE report_date = updateday LIMIT 1;
IF FOUND THEN
RAISE NOTICE 'TCBS has already been run for the day %.',updateday;
RETURN FALSE;
END IF;
END IF;

-- check if reports_clean is complete
IF NOT reports_clean_done(updateday, check_period) THEN
IF checkdata THEN
RAISE NOTICE 'Reports_clean has not been updated to the end of %',updateday;
IF checkdata THEN
RAISE NOTICE 'Reports_clean has not been updated to the end of %',updateday;
RETURN FALSE;
ELSE
RETURN FALSE;
ELSE
RETURN FALSE;
END IF;
END IF;
END IF;

-- populate the matview for regular releases

INSERT INTO tcbs (
signature_id, report_date, product_version_id,
process_type, release_channel,
report_count, win_count, mac_count, lin_count, hang_count,
startup_count
signature_id, report_date, product_version_id,
process_type, release_channel,
report_count, win_count, mac_count, lin_count, hang_count,
startup_count, is_gc_count
)
SELECT signature_id, updateday,
product_version_id,
process_type, release_channel,
count(*),
sum(case when os_name = 'Windows' THEN 1 else 0 END),
sum(case when os_name = 'Mac OS X' THEN 1 else 0 END),
sum(case when os_name = 'Linux' THEN 1 else 0 END),
count(hang_id),
sum(case when uptime < INTERVAL '1 minute' THEN 1 else 0 END)
WITH raw_crash_filtered AS (
SELECT
uuid
, json_object_field_text(r.raw_crash, 'IsGarbageCollecting') as is_garbage_collecting
FROM
raw_crashes r
WHERE
date_processed::date = updateday
)
SELECT signature_id
, updateday
, product_version_id
, process_type
, release_channel
, count(*)
, sum(case when os_name = 'Windows' THEN 1 else 0 END)
, sum(case when os_name = 'Mac OS X' THEN 1 else 0 END)
, sum(case when os_name = 'Linux' THEN 1 else 0 END)
, count(hang_id)
, sum(case when uptime < INTERVAL '1 minute' THEN 1 else 0 END)
, sum(CASE WHEN r.is_garbage_collecting = '1' THEN 1 ELSE 0 END) as gc_count
FROM reports_clean
JOIN product_versions USING (product_version_id)
WHERE utc_day_is(date_processed, updateday)
AND tstz_between(date_processed, build_date, sunset_date)
JOIN product_versions USING (product_version_id)
JOIN signatures USING (signature_id)
LEFT JOIN raw_crash_filtered r ON r.uuid::text = reports_clean.uuid
WHERE utc_day_is(date_processed, updateday)
AND tstz_between(date_processed, build_date, sunset_date)
GROUP BY signature_id, updateday, product_version_id,
process_type, release_channel;
process_type, release_channel;

-- populate summary statistics for rapid beta parent records

INSERT INTO tcbs (
signature_id, report_date, product_version_id,
process_type, release_channel,
report_count, win_count, mac_count, lin_count, hang_count,
startup_count )
SELECT signature_id, updateday, rapid_beta_id,
process_type, release_channel,
sum(report_count), sum(win_count), sum(mac_count), sum(lin_count),
sum(hang_count), sum(startup_count)
signature_id, report_date, product_version_id,
process_type, release_channel,
report_count, win_count, mac_count, lin_count, hang_count,
startup_count, is_gc_count )
SELECT signature_id
, updateday
, rapid_beta_id
, process_type
, release_channel
, sum(report_count)
, sum(win_count)
, sum(mac_count)
, sum(lin_count)
, sum(hang_count)
, sum(startup_count)
, sum(is_gc_count)
FROM tcbs
JOIN product_versions USING (product_version_id)
JOIN product_versions USING (product_version_id)
WHERE report_date = updateday
AND build_type = 'beta'
AND rapid_beta_id is not null
AND build_type = 'beta'
AND rapid_beta_id is not null
GROUP BY signature_id, updateday, rapid_beta_id,
process_type, release_channel;
process_type, release_channel;

-- tcbs_ranking removed until it's being used

-- done

RETURN TRUE;
END;
$$;
Expand Down
4 changes: 2 additions & 2 deletions socorro/external/postgresql/setupdb_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def __init__(self, sa_url, logger, autocommit=False):
def setup_admin(self):
self.session.execute('SET check_function_bodies = false')
self.session.execute('CREATE EXTENSION IF NOT EXISTS citext')
self.session.execute('CREATE EXTENSION IF NOT EXISTS hstore')
self.session.execute('CREATE EXTENSION IF NOT EXISTS json_enhancements')
self.session.execute('CREATE SCHEMA bixie')
self.session.execute('GRANT ALL ON SCHEMA bixie, public TO breakpad_rw')

Expand Down Expand Up @@ -459,7 +461,6 @@ def connection_url():
url_template = connection_url()
sa_url = url_template + '/%s' % 'postgres'

# Using the old connection manager style
with PostgreSQLAlchemyManager(sa_url, self.config.logger,
autocommit=False) as db:
db_version = db.version()
Expand Down Expand Up @@ -501,7 +502,6 @@ def connection_url():
return 0
raise

connection.execute('CREATE EXTENSION IF NOT EXISTS citext')
connection.close()

if self.no_schema:
Expand Down
Loading

0 comments on commit 8d8432b

Please sign in to comment.