Skip to content

Commit

Permalink
[ec] A very simple tracking stats export.
Browse files Browse the repository at this point in the history
Paster command to export some very simple tracking stats to a csv file.

(cherry picked from commits 18ca569 and
90d1ef6)
  • Loading branch information
icmurray committed Dec 4, 2012
1 parent 8ccc414 commit 5fcd88a
Showing 1 changed file with 84 additions and 7 deletions.
91 changes: 84 additions & 7 deletions ckan/lib/cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import collections
import csv
import os
import datetime
import sys
Expand Down Expand Up @@ -942,30 +944,50 @@ def clean(self, user_ratings=True):
rating.purge()
model.repo.commit_and_remove()

## Used by the Tracking class
_ViewCount = collections.namedtuple("ViewCount", "id name count")

class Tracking(CkanCommand):
'''Update tracking statistics
Usage:
tracking - update tracking stats
tracking update [start-date] - update tracking stats
tracking export <file> [start-date] - export tracking stats to a csv file
'''

summary = __doc__.split('\n')[0]
usage = __doc__
max_args = 1
min_args = 0
max_args = 3
min_args = 1

def command(self):
self._load_config()
import ckan.model as model
engine = model.meta.engine

if len(self.args) == 1:
# Get summeries from specified date
start_date = datetime.datetime.strptime(self.args[0], '%Y-%m-%d')
cmd = self.args[0]
if cmd == 'update':
start_date = self.args[1] if len(self.args) > 1 else None
self.update_all(engine, start_date)
elif cmd == 'export':
if len(self.args) <= 1:
print self.__class__.__doc__
exit(1)
output_file = self.args[1]
start_date = self.args[2] if len(self.args) > 2 else None
self.update_all(engine, start_date)
self.export_tracking(engine, output_file)
else:
print self.__class__.__doc__
exit(1)

def update_all(self, engine, start_date=None):
if start_date:
start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d')
else:
# No date given. See when we last have data for and get data
# from 2 days before then in case new data is available.
# If no date here then use 2010-01-01 as the start date
# If no date here then use 2011-01-01 as the start date
sql = '''SELECT tracking_date from tracking_summary
ORDER BY tracking_date DESC LIMIT 1;'''
result = engine.execute(sql).fetchall()
Expand All @@ -985,6 +1007,61 @@ def command(self):
print 'tracking updated for %s' % start_date
start_date = stop_date

def _total_views(self, engine):
sql = '''
SELECT p.id,
p.name,
COALESCE(SUM(s.count), 0) AS total_views
FROM package AS p
LEFT OUTER JOIN tracking_summary AS s ON s.package_id = p.id
GROUP BY p.id, p.name
ORDER BY total_views DESC
'''

return [ _ViewCount(*t) for t in engine.execute(sql).fetchall() ]

def _recent_views(self, engine, measure_from):

sql = '''
SELECT p.id,
p.name,
COALESCE(SUM(s.count), 0) AS total_views
FROM package AS p
LEFT OUTER JOIN tracking_summary AS s ON s.package_id = p.id
WHERE s.tracking_date >= %(measure_from)s
GROUP BY p.id, p.name
ORDER BY total_views DESC
'''

return [ _ViewCount(*t) for t in engine.execute(
sql,
measure_from=str(measure_from)
).fetchall() ]

def export_tracking(self, engine, output_filename):
'''Write tracking summary to a csv file.'''

HEADINGS = [
"dataset id",
"dataset name",
"total views",
"recent views (last 2 weeks)",
]

measure_from = datetime.date.today() - datetime.timedelta(days=14)
recent_views = self._recent_views(engine, measure_from)
total_views = self._total_views(engine)

with open(output_filename, 'w') as fh:
f_out = csv.writer(fh)
f_out.writerow(HEADINGS)
recent_views_for_id = dict( (r.id, r.count) for r in recent_views )
f_out.writerows([ (r.id,
r.name,
r.count,
recent_views_for_id.get(r.id, 0))
for r in total_views ])

def update_tracking(self, engine, summary_date):
PACKAGE_URL = '/dataset/'
# clear out existing data before adding new
Expand Down

0 comments on commit 5fcd88a

Please sign in to comment.