century-arcade · saulpw · Sep 2, 2016 · Aug 18, 2016 · Aug 19, 2016 · Aug 19, 2016
diff --git a/scripts/05-sql-import-receipts.sh b/scripts/05-sql-import-receipts.sh
@@ -7,4 +7,8 @@ METADB=meta.db
 if [ ! -f $METADB ] ; then
     sqlite3 $METADB < ./scripts/meta.sql
     ./scripts/tsv2sqlite.py ${DEBUG} -o ${METADB} gxd/receipts.tsv
+    ./scripts/tsv2sqlite.py ${DEBUG} -o ${METADB} gxd/publications.tsv
+    ./scripts/tsv2sqlite.py ${DEBUG} -o ${METADB} gxd/similar.tsv
 fi
+
+
diff --git a/scripts/22-pubyears.py b/scripts/22-pubyears.py
@@ -12,9 +12,21 @@ def main():
     args = utils.get_args('generate pub-years data')
 
     pubyears = [ (utils.parse_pubid(r.xdid), year_from_date(r.Date), dow_from_date(r.Date)) 
-					for r in metadb.xd_puzzles().values() ]
-
-    weekdays = [ 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun' ] 
+                    for r in metadb.xd_puzzles().values() ]
+
+    weekdays = [ 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun' ]
+
+    metasql.execute("INSERT INTO stats VALUES (?,?,?, ?,?, ?, ?,?,?, ?,?, ?,?)",
+            (pubid, year, weekday,
+                maineditor, maincopyright,
+                nexisting, nxd, npublic,
+                reprints, touchups, redones,
+                copies, themecopies))
+
+
+
+
+
     pubs = defaultdict(dict)
     for pubid, year, dow  in pubyears:
         if pubid not in pubs or int(year) not in pubs[pubid]:
@@ -29,12 +41,13 @@ def main():
             if y < 1900 or y > 2100:
                 continue
             # Preserve weekday order
-            dow_list = []
             for d in weekdays:
-                dow_list.append(str(years_dow[y][d]))
-
-            metadb.append_row('pub/pubyears.tsv', "pubid year total " + " ".join(weekdays), 
-                    [ pubid, y, sum(years_dow[y].values()), "\t".join(dow_list) ])
+                ndup = 0
+                nsusp = 0
+                for r in metasql.xd_similar():
+                    "SELECT * FROM similar WHERE pubid = ? AND year = ?", pubid, y)
+                ("INSERT INTO pubyears VALUES (?, ?, ?, ?, ?)metadb.append_row('pub/pubyears.tsv', "pubid year weekday total ndup nsusp",
+                    [ pubid, y, d, years_dow[y][d], ndup, nsusp])
 
 if __name__ == "__main__":
     main()
diff --git a/scripts/37-pubyear-svg.py b/scripts/37-pubyear-svg.py
@@ -0,0 +1,112 @@
+#!/usr/bin/python3
+
+from datetime import date
+from xdfile import utils
+import xdfile
+
+pys = '''
+<svg class="year_widget" width="30" height="30">
+  <g transform="translate(0,0)">
+    <rect class="%s" width="30" height="30"></rect>
+  </g>
+%s
+</svg>
+'''
+
+
+def rect(x, y, w, h, *classes):
+  return '<rect transform="translate({x},{y})" class="{classes}" width="{w}" height="{h}"></rect>'.format(x=x, y=y, w=w, h=h, classes=''.join(classes))
+
+
+def year_from(dt):
+    return int(dt.split('-')[0])
+
+def weekdays_between(dta, dtb):
+    return 0
+
+
+def pubyear_svg(corpus, nsusp, ndup, npub, npriv):
+    bgclass = "notexists"
+#    if bgclass not in publications.tsv:
+#       bgclass = "exists"
+
+    rects = ''
+
+    for i in range(0, 7):
+        y = i*3
+
+        # TODO: find first xd of weekday i
+        firstxd = corpus[i]
+        lastxd = corpus[1-i]
+
+        sz = firstxd.width() * firstxd.height()
+        h = 3 if sz > 17*17 else 2
+
+        x = 0
+        w = 6
+
+        rects += '''<g id="mon" transform="translate(0,{y})">'''.format(y=y)
+
+        npre = weekdays_between(date(year_from(firstxd.Date), 1, 1), firstxd.Date, i)
+        w = npre
+        rects += rect(x, y, w, h, 'prexd')
+        x += w
+
+        w = nsusp
+        rects += rect(x, y, w, h, 'suspxd')
+        x += w
+
+        w = ndup
+        rects += rect(x, y, w, h, 'dupxd')
+        x += w
+
+        w = npriv
+        rects += rect(x, y, w, h, 'privxd')
+        x += w
+
+        w = npub
+        rects += rect(x, y, w, h, 'pubxd')
+        x += w
+
+        npost = weekdays_between(lastxd.Date, date(year_from(lastxd.Date), 12, 31), i)
+        w = npost
+        rects += rect(x, y, w, h, 'postxd')
+        rects += '</g>'
+
+    return pys % (bgclass, rects)
+
+
+def main():
+    p = utils.args_parser(desc="annotate puzzle clues with earliest date used in the corpus")
+    p.add_argument('-a', '--all', default=False, help='analyze all puzzles, even those already in similar.tsv')
+    args = utils.get_args(parser=p)
+    outf = utils.open_output()
+
+    prev_similar = utils.parse_tsv('gxd/similar.tsv', "similar")
+    pubyears = {}
+    for xd in xdfile.corpus():
+        pubyear = xd.publication_id() + str(xd.year())
+        if pubyear not in pubyears:
+            pubyears[pubyear] = []
+        pubyears[pubyear].append(xd)
+
+    print('''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html>
+<head>
+    <meta charset="utf-8">
+    <meta name="description" content="">
+    <meta name="keywords" content="">
+    <meta name="author" content="">
+    <title></title>
+    <link href="style.css" rel="stylesheet" type="text/css">
+    <script src="script.js"></script>
+</head>
+<body>
+%s
+</body>
+</html>''' % pubyear_svg(pubyears['up2011'], 'up', 2011))
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/scripts/meta.sql b/scripts/meta.sql
@@ -1,11 +1,79 @@
-CREATE TABLE "receipts" (
-    "CaptureTime" TEXT,
-    "ReceivedTime" TEXT,
-    "ExternalSource" TEXT,
-    "InternalSource" TEXT,
-    "SourceFilename" TEXT,
-    "xdid" TEXT
+-- xd
+
+CREATE TABLE receipts (
+    CaptureTime TEXT,
+    ReceivedTime TEXT,
+    ExternalSource TEXT,
+    InternalSource TEXT,
+    SourceFilename TEXT,
+    xdid CHAR(16),
+    PRIMARY KEY (ExternalSource, SourceFilename)
+);
+
+CREATE INDEX XDID on receipts (xdid ASC);
+
+
+CREATE TABLE similar_grids (
+    xdid CHAR(16),
+    xdidMatch CHAR(16),
+    GridMatchPct INTEGER
+);
+
+
+CREATE TABLE similar_clues (
+    xdid CHAR(16),
+    reused_clues INTEGER,
+    Nujmreused_answers INTEGER,
+    total_clues INTEGER,
+);
+
+
+CREATE TABLE publications (
+    PublicationAbbr CHAR(8),
+    PublisherAbbr CHAR(8),
+    PublicationName TEXT,
+    PublisherName TEXT,
+    FirstIssueDate CHAR(10),
+    LastIssueDate CHAR(10),
+    NumberIssued INTEGER,
+    Contact TEXT,
+    Sources TEXT
 );
 
-CREATE INDEX "XDID" on receipts (xdid ASC);
+
+CREATE TABLE puzzles (
+    xdid CHAR(16),  -- "eltana-001"
+    Date CHAR(10),  -- "2016-07-18"
+    Size CHAR(8),   -- "15x15RS" (Rebus/Special)
+
+    Title TEXT,
+    Author TEXT,
+    Editor TEXT,
+    Copyright TEXT,
+    A1_D1 TEXT,
+);
+
+
+-- grouped by pub-year-weekday
+CREATE TABLE pubyears (
+    pubid CHAR(6),   -- "nyt"
+    year CHAR(4),    -- "2006"
+    weekday CHAR(3), -- "Mon"
+
+    Editor TEXT, -- most common entry
+    Copyright TEXT, -- most common, after removing Date/Author
+
+    NumExisting INTEGER, -- known or assumed to be in existence (0 means unknown)
+    NumXd INTEGER,       -- total number in xd
+    NumPublic INTEGER,   -- available for public download
+
+    -- duplicate grids, same author
+    NumReprints INTEGER, -- 100% grid match
+    NumTouchups INTEGER, -- 75-99% grid match
+    NumRedone INTEGER,   -- 30-75% grid match
+
+    -- duplicate grids, different author
+    NumSuspicious INTEGER, -- >50% similar grid
+    MaxSuspiciousPct INTEGER, -- highest grid match of all suspicious
+);
 
diff --git a/xdfile/pubyear.py b/xdfile/pubyear.py
@@ -1,10 +1,8 @@
-import cgi
-from collections import Counter, defaultdict
+from collections import defaultdict
 
-from xdfile.html import th, td, mkhref, mktag, tr_empty, td_with_class, year_widget, decade_widget
+from xdfile.html import mkhref, mktag, tr_empty, td_with_class
 from xdfile import utils, metadatabase as metadb
 from xdfile.utils import space_with_nbsp
-import xdfile
 from datetime import date
 
 
@@ -18,13 +16,13 @@ def mkcell(text, href="", title=""):
 def split_year(y):
     lsy = str(y)[2:]
     if y[3] != '0':
-        #msy = ' '  # unicode M space
-        msy = '&nbsp;' # Changed to &nbsp;
+        msy = '&nbsp;'  # Changed to &nbsp;
     else:
         msy = str(y)[:2]
 
     return "%s<br/>%s" % (msy, lsy)
 
+
 def get_pubheader_classes(*years):
     """
     Assign classes to years header
@@ -34,9 +32,34 @@ def get_pubheader_classes(*years):
         if "&nbsp" in str(y):
             classes.append("ord-year")
         else:
-            classes.append("zero-year")    
+            classes.append("zero-year")
     return classes
-
+
+
+def year_widget(dow_dict, total, fill_class=None):
+    # Generate SVG based widget for day of week dispersion for year
+    fill_class = fill_class or 'white'
+    b = []
+    b.append('<svg class="year_widget" width="30" height="30">')
+    b.append('<g transform="translate(0,0)"><rect class="%s" width="30" height="30"></rect></g>' % fill_class)
+    for i, v in enumerate(utils.WEEKDAYS):
+        _class = dow_dict[v]['class'] if 'class' in dow_dict[v].keys() else ''
+        _length = str(dow_dict[v]['count']) if 'count' in dow_dict[v].keys() else '0'
+        _length = _length if  int(_length) < 26 else '30' # for all 52/2 have full filled row
+        b.append('<g transform="translate(0,' + str(i*3+i) + ')"><rect class="' + _class + '" width="' + _length + '" height="3"></rect></g>')
+    b.append('</svg>')
+    return(' '.join(b))
+
+def decade_widget(total, fill_class=None):
+    # Generate SVG based widget for decade showing total
+    fill_class = fill_class or 'green'
+    b = []
+    b.append('<svg class="year_widget" width="30" height="30">')
+    b.append('<g transform="translate(0,0)"><rect class="%s" width="30" height="30"></rect></g>' % fill_class)
+    b.append('<text x="25" y="18">' + str(total) + '</text>')
+    b.append('</svg>')
+    return(' '.join(b))
+
 
 g_all_pubyears = None
 def pubyear_html(pubyears=[], skip_decades=None):
@@ -47,9 +70,9 @@ def pubyear_html(pubyears=[], skip_decades=None):
     if not g_all_pubyears:
         g_all_pubyears = utils.parse_tsv_data(open("pub/pubyears.tsv").read(), "pubyear")
 
-    
+
     # Read similars to make background of widgets
-    similar_d = defaultdict(dict) 
+    similar_d = defaultdict(dict)
     for xdid, v in utils.parse_tsv('gxd/similar.tsv', "similar").items():
         xd_split = utils.split_xdid(xdid)
         if xd_split:
@@ -61,7 +84,7 @@ def pubyear_html(pubyears=[], skip_decades=None):
 
     b = [] # Body
 
-    # Making collapsed decaded depends on args
+    # Making collapsed decades depends on args
     skip_decades = skip_decades if skip_decades else { 'start': 1910, 'end': 1970 } 
     allyears = []
     for i in range(skip_decades['start']//10, skip_decades['end']//10 + 1):
@@ -108,25 +131,25 @@ def pubyear_html(pubyears=[], skip_decades=None):
                     'hint': hint,
                     'total': int(total),
                     }
-    
+
     # main table
     b.append('<table class="pubyears">')
     yhdr = [ '&nbsp;' ] + [ split_year(y) for y in allyears ]
     yhdr.append("all")
     b.append(td_with_class(*yhdr, classes=get_pubheader_classes(*yhdr),
             rowclass="pubyearhead",tag="th"))
-    b.append(tr_empty()) 
-   
-    # Process each pubid sorted by earliest year 
+    b.append(tr_empty())
+
+    # Process each pubid sorted by earliest year
     for pubid in sorted(pubs, key=lambda x:min(pubs[x])):
         pub = metadb.xd_publications().get(pubid)
         pubname = pub.PublicationName if pub else ''
-        # Pub id to first column 
+        # Pub id to first column
         b.append(mktag('tr'))
         b.append(mktag('td','pub'))
         b.append(mkcell(space_with_nbsp(pubname or pubid), "/pub/" + pubid, ))
         b.append(mktag('/td'))
-       
+
         # Process each year not collapsed into decade
         for yi in allyears:
             if yi in pubs[pubid] and pubs[pubid][yi]['total'] > 0:
@@ -140,11 +163,11 @@ def pubyear_html(pubyears=[], skip_decades=None):
                 b.append(mktag('td', 'block'))
                 b.append('&nbsp;')
                 b.append(mktag('/td'))
-                
+
         b.append(mktag('td'))
         b.append(str(sum([ pubs[pubid][x]['total'] for x in pubs[pubid].keys() ])))
         b.append(mktag('/td'))
         b.append(mktag('/tr'))
-   
+
     b.append(mktag('/table'))
     return (" ".join(b))