century-arcade · saulpw · Aug 16, 2016 · Aug 14, 2016 · Aug 14, 2016 · Aug 14, 2016
diff --git a/queries/remix.py b/queries/remix.py
@@ -1,11 +1,12 @@
 #!/usr/bin/env python3
 
-from xdfile.utils import get_args, open_output, find_files, log, debug, get_log, COLUMN_SEPARATOR, EOL, parse_tsv, progress, parse_pathname
+from xdfile.utils import get_args, open_output, find_files, log, debug, info, error, get_log, COLUMN_SEPARATOR, EOL
+from xdfile.utils import parse_tsv, progress, parse_pathname
 from xdfile import corpus, xdfile, BLOCK_CHAR
 
 
 # for a given grid
-#  for all words, 
+#  for all words,
 #    show how many distinct clues there are per publication
 
 # for each pub that has clues for all words,
@@ -87,7 +88,7 @@ def mutate(xd, words, chance=1):
             if random.random() < chance:
                 nmutations += 1
                 xd.grid[r] = splice(xd.grid[r], c, best_replacement)
-                log("-> %s/%s (%s)" % (new_hwd, new_vwd, "".join(br for h, v, br in mutations_this_square)))
+                info("-> %s/%s (%s)" % (new_hwd, new_vwd, "".join(br for h, v, br in mutations_this_square)))
     return nmutations
 
 
@@ -179,7 +180,7 @@ def main():
                     while nmutated < 100:
                         nmutated += mutate(xd, pub_clues)
                     nmissing = reclue(xd, pub_clues)
-                    log("%s missing %d clues after %d mutations" % (outfn, nmissing, nmutated))
+                    info("%s missing %d clues after %d mutations" % (outfn, nmissing, nmutated))
 
                     remixed.add(pubid)
                     outf.write_file(outfn, xd.to_unicode())
@@ -189,14 +190,14 @@ def main():
                     missing_tsv += COLUMN_SEPARATOR.join([ xd.xdid(), pubid, str(nmissing) ]) + EOL
 
             except Exception as e:
-                log("remix error %s" % str(e))
+                error("remix error %s" % str(e))
 
         if remixed:
-            log("%d remixed: %s" % (len(remixed), " ".join(remixed))) 
+            info("%d remixed: %s" % (len(remixed), " ".join(remixed)))
             try:
                 outf.write_file(parse_pathname(fn).base + ".xd", contents.encode("utf-8"))
             except Exception as e:
-                log("couldn't write: " + str(e))
+                error("couldn't write: " + str(e))
 
     outf.write_file("remix.log", get_log().encode("utf-8"))
     outf.write_file("remix.tsv", missing_tsv)

diff --git a/scripts/00-aws-bootstrap.sh b/scripts/00-aws-bootstrap.sh
@@ -48,7 +48,7 @@ chmod 600 $SSHHOME/.ssh/gxd_rsa
 cat src/aws/ssh_config >> $SSHHOME/.ssh/config
 ssh-agent bash -c "ssh-add $SSHHOME/.ssh/gxd_rsa; git clone ${GXD_GIT}"
 
-# Import all .tsv to sql
+echo "Import all .tsv to sql"
 scripts/05-sql-import-receipts.sh
 
 echo "Run deploy script"
@@ -59,11 +59,15 @@ echo 'SUMMARY: End time '`date +'%Y-%m-%d %H:%M'`
 egrep -i 'ERROR|WARNING|SUMMARY' ${LOGFILE} > ${SUMLOGFILE}
 echo -e '\n' >> ${SUMLOGFILE}
 
+echo "Getting summary"
 scripts/48-stats.sh >> ${SUMLOGFILE}
 echo -e '\n' >> ${SUMLOGFILE}
 
-echo "SUMMARY: Full log file http://$BUCKET/logs/`basename ${LOGFILE}`"
+echo "SUMMARY: Full log file http://$BUCKET/logs/`basename ${LOGFILE}`" >> ${SUMLOGFILE}
 
+echo "Sending email"
 scripts/send-email.py $ADMIN_EMAIL "execution logs for $TODAY" ${SUMLOGFILE}
 
+echo "Copy logs to AWS"
 aws s3 cp --region ${REGION} ${LOGFILE} s3://${BUCKET}/logs/ --acl public-read
+aws s3 cp --region ${REGION} ${SUMLOGFILE} s3://${BUCKET}/logs/ --acl public-read
diff --git a/scripts/09-collection2zip.py b/scripts/09-collection2zip.py
@@ -8,15 +8,16 @@
 import zipfile
 
 from xdfile.metadatabase import xd_sources_row, xd_sources_header
-from xdfile.utils import find_files_with_time, get_log, get_args, filetime, args_parser, parse_pathname, log, iso8601, open_output, strip_toplevel
+from xdfile.utils import find_files_with_time, get_log, get_args, filetime, args_parser, parse_pathname
+from xdfile.utils import log, info, iso8601, open_output, strip_toplevel
 
 
 def main():
     p = args_parser('catalog source files and create source.tsv')
     p.add_argument('-s', '--source', default=None, help='ExternalSource')
     args = get_args(parser=p)
 
-    log("importing from %s" % args.source)
+    info("importing from %s" % args.source)
 
     outf = open_output()
 
@@ -25,14 +26,14 @@ def main():
     for input_source in args.inputs:
         for fn, contents, dt in find_files_with_time(input_source):
             if len(contents) == 0:
-                log("ignoring empty file")
+                info("ignoring empty file")
                 continue
 
             outf.write_file(strip_toplevel(fn), contents, dt)
 
             sources.append(xd_sources_row(fn, args.source or input_source, iso8601(dt)))
 
-    log("%s files cataloged" % len(sources))
+    info("%s files cataloged" % len(sources))
 
     outbase = parse_pathname(args.output).base
 

diff --git a/scripts/12-parse-email.py b/scripts/12-parse-email.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-from xdfile.utils import open_output, log, find_files, get_args, parse_pathname, generate_zip_files, iso8601, to_timet
+from xdfile.utils import open_output, info, log, find_files, get_args, parse_pathname, generate_zip_files, iso8601, to_timet
 from xdfile.metadatabase import xd_sources_header, xd_sources_row
 from xdfile.cloud import xd_send_email
 
@@ -53,7 +53,7 @@ def main():
         for puzfn, puzdata, puzdt in email_files:
             # a basic sanity check of filesize
             # accommodate small puzzles and .pdf
-            log("%s: %s from %s" % (puzfn, iso8601(puzdt), upload_src))
+            info("%s: %s from %s" % (puzfn, iso8601(puzdt), upload_src))
 
         summary("%s puzzles from %s" % (len(email_files), upload_src))
 

diff --git a/scripts/19-reshelve.py b/scripts/19-reshelve.py
@@ -30,10 +30,10 @@ def main():
             seqnum = utils.parse_seqnum(r.xdid or r.SourceFilename)
             if seqnum:
                 newxdid = newpubid + seqnum
-                utils.log("changing xdid from '%s' to '%s'" % (r.xdid, newxdid))
+                utils.info("changing xdid from '%s' to '%s'" % (r.xdid, newxdid))
                 d["xdid"] = newxdid
             else:
-                utils.log("no date or number in xdid, not reshelving")
+                utils.info("no date or number in xdid, not reshelving")
 
         all_receipts += metadb.xd_receipts_row(**d)
 

diff --git a/scripts/21-clean-metadata.py b/scripts/21-clean-metadata.py
@@ -2,7 +2,7 @@
 
 # Usage: $0 [-o <puzzles.tsv>] <input>
 #
-#   Generates puzzles.tsv with cleaned metadata for each .xd in <input>.  
+#   Generates puzzles.tsv with cleaned metadata for each .xd in <input>.
 #
 
 from xdfile import utils, metadatabase as metadb
@@ -96,7 +96,7 @@ def clean_headers(xd):
             xd.set_header(hdr, None)
         else:
             if hdr.lower() not in xdfile.HEADER_ORDER:
-                utils.log("%s: '%s' header not known: '%s'" % (xd.filename, hdr, xd.headers[hdr]))
+                utils.warn("%s: '%s' header not known: '%s'" % (xd.filename, hdr, xd.headers[hdr]))
 
     # clean Author and Editor headers
     author = xd.get_header("Author") or ""
@@ -122,8 +122,7 @@ def clean_headers(xd):
 
     if newtitle != title:
         xd.set_header("Title" + CLEAN_SUFFIX, newtitle)
-
-    # create Date header 
+    # create Date header
     dt = xd.get_header("Date")
 
     ## try getting Date from filename
@@ -133,7 +132,7 @@ def clean_headers(xd):
             if d:
                 dt = d.strftime("%Y-%m-%d")
         except Exception as e:
-            utils.log(str(e))
+            utils.error(str(e))
             if args.debug:
                 raise
 

diff --git a/scripts/25-analyze-puzzle.py b/scripts/25-analyze-puzzle.py
@@ -7,7 +7,7 @@
 #
 
 from queries.similarity import find_similar_to, find_clue_variants, load_clues, load_answers, grid_similarity
-from xdfile.utils import get_args, open_output, find_files, log, debug, get_log, COLUMN_SEPARATOR, EOL, parse_tsv, progress, parse_pathname
+from xdfile.utils import get_args, open_output, find_files, log, info, debug, get_log, COLUMN_SEPARATOR, EOL, parse_tsv, progress, parse_pathname
 from xdfile import xdfile, corpus, ClueAnswer, BLOCK_CHAR
 import time
 from xdfile import utils, metadatabase
@@ -27,25 +27,24 @@ def main():
         if mainxd.xdid() in prev_similar:
             continue  # skip reprocessing .xd that are already in similar.tsv
 
-        """ find similar grids (pct, xd) for the mainxd in the corpus. 
+        """ find similar grids (pct, xd) for the mainxd in the corpus.
         Takes about 1 second per xd.  sorted by pct.
         """
-        similar_grids = sorted(find_similar_to(mainxd, corpus(), min_pct=0.20), 
+        similar_grids = sorted(find_similar_to(mainxd, corpus(), min_pct=0.20),
                                key=lambda x: x[0], reverse=True)
 
         if similar_grids:
-            log("similar: " + " ".join(("%s=%s" % (xd2.xdid(), pct)) 
+            info("similar: " + " ".join(("%s=%s" % (xd2.xdid(), pct))
                                        for pct, xd1, xd2 in similar_grids))
 
         mainpubid = mainxd.publication_id()
         maindate = mainxd.date()
 
-        # go over each clue/answer, find all other uses, other answers, other possibilities. 
+        # go over each clue/answer, find all other uses, other answers, other possibilities.
         # these are added directly to similar.tsv
         nstaleclues = 0
         nstaleanswers = 0
         ntotalclues = 0
-
         for pos, mainclue, mainanswer in mainxd.iterclues():
             progress(mainanswer)
 
@@ -77,7 +76,7 @@ def main():
                 uses = []
                 for bc, nuses in bclues.items():
                     # then find all clues besides this one
-                    clue_usages = [ ca for ca in load_clues().get(bc, []) 
+                    clue_usages = [ ca for ca in load_clues().get(bc, [])
                                     if ca.answer == mainanswer and ca.date < maindate ]
 
                     if clue_usages:
@@ -89,7 +88,6 @@ def main():
                         else:
                             ca = sorted(clue_usages, key=lambda ca: ca.date or "z")[-1]
                         uses.append((ca, nuses))
-
         # summary row to similar.tsv
         row_header = 'xdid similar_grid_pct reused_clues reused_answers total_clues matches'
         metadatabase.append_row('gxd/similar.tsv', row_header, [

diff --git a/scripts/31-mkwww-publishers.py b/scripts/31-mkwww-publishers.py
@@ -83,7 +83,7 @@ def main():
     puzzles = metadb.xd_puzzles()
     outf.write_html('pub/index.html', pubyear.pubyear_html(), title='The xd crossword puzzle corpus')
 
-    utils.log("collating puzzles")
+    utils.info("collating puzzles")
     for puzrow in puzzles.values():
             pubid = utils.parse_pubid(puzrow.xdid)
             year = xdfile.year_from_date(puzrow.Date)
@@ -94,11 +94,9 @@ def main():
             all_pubs[k].add(puzrow)
 
     pubyear_header = [ 'xdid', 'Date', 'Size', 'Title', 'Author', 'Editor', 'Copyright', 'Grid_1A_1D', 'ReusedCluePct', 'SimilarGrids' ]
-    utils.log('generating index pages')
-
+    utils.info('generating index pages')
     # dict to generate pub page with calendars
     pub_grids = defaultdict(dict)
-
     for pair, pub in sorted(list(all_pubs.items())):
         c_grids = {}
         pubid, year = pair

diff --git a/scripts/35-mkwww-diffs.py b/scripts/35-mkwww-diffs.py
@@ -88,7 +88,6 @@ def main():
 
     similars = utils.parse_tsv('gxd/similar.tsv', 'Similar')
     xdids_todo = args.inputs or [ xdid for xdid, matches in metadb.get_similar_grids().items() if matches ]
-
     for mainxdid in xdids_todo:
         progress(mainxdid)
 
@@ -102,7 +101,6 @@ def main():
         xddates[mainxdid] = mainxd.date() # Dict to store XD dates for further sort
         html_grids = {}
         html_clues = {}
-
         # Store in list to make further formatting as html table easier
         html_grids[mainxdid] = grid_diff_html(xdfile.get_xd(mainxdid))
 
@@ -114,7 +112,6 @@ def main():
             diff_h += mktag('span', tagclass='main', inner='&nbsp;~&nbsp;' + mainanswer.upper())
             diff_l.append(diff_h)
         html_clues[mainxdid] = diff_l
-
         # Process for all matches
         for xdid in matches:
             xd = xdfile.get_xd(xdid)
@@ -124,11 +121,10 @@ def main():
             xddates[xdid] = xd.date()
             # output each grid
             html_grids[xdid] = grid_diff_html(xd, compare_with=mainxd)
-
             diff_l = []
             # output comparison of each set of clues
             for pos, clue, answer in xd.iterclues():
-                diff_h = mktag('div','fullgrid') + '%s.&nbsp;' %pos 
+                diff_h = mktag('div','fullgrid') + '%s.&nbsp;' %pos
                 # Sometimes can return clue == None
                 sm = difflib.SequenceMatcher(lambda x: x == ' ', mainxd.get_clue(pos) or '', clue)
                 if sm.ratio() < 0.50:
@@ -141,30 +137,26 @@ def main():
                             diff_h += '<span class="match">%s</span>' % clue[b1:b2]
                         else:
                             diff_h += '<span class="diff">%s</span>' % clue[b1:b2]
-
                 diff_h += mktag('span', tagclass=(answer == mainxd.get_answer(pos)) and 'match' or 'diff', inner='&nbsp;~&nbsp;' + answer.upper())
                 diff_h += mktag('/div')
                 diff_l.append(diff_h)
-            html_clues[xdid] = diff_l 
-
+            html_clues[xdid] = diff_l
 
         # Wrap into table
         diff_h = mktag('table') + mktag('tr')
         # Sort by date
-        sortedkeys = sorted(xddates.items(), key=operator.itemgetter(1)) 
+        sortedkeys = sorted(xddates.items(), key=operator.itemgetter(1))
         for w, dt in sortedkeys:
             # Wrap into table
             diff_h += mktag('td') + html_grids[w] + mktag('/td')
         diff_h += mktag('/tr')
-
         for i, clue in enumerate(html_clues[sortedkeys[0][0]]):
             diff_h += mktag('tr')
             for w, dt in sortedkeys:
                 if i < len(html_clues[w]):
                     diff_h += mktag('td') + html_clues[w][i] + mktag('/td')
-            diff_h += mktag('/tr') 
+            diff_h += mktag('/tr')
         diff_h += mktag('/table')
-
         outf.write_html('pub/%s/index.html' % mainxdid, diff_h, title='Comparison for ' + mainxdid)
 
 

diff --git a/scripts/40-deploy.sh b/scripts/40-deploy.sh
@@ -16,5 +16,5 @@ aws s3 sync --region $REGION $WWW ${S3WWW}/ --acl public-read
 # concatenate all logfiles from working dirs and copy to cloud
 ALLLOGS=$WWW/log/$TODAY-logs.txt
 scripts/49-cat-logs.py -o $ALLLOGS $PUB $TMP
-aws s3 cp --region $REGION $ALLLOGS ${S3WWW}/log/ --acl public-read
+aws s3 cp --region $REGION $ALLLOGS ${S3WWW}/logs/ --acl public-read
 
diff --git a/scripts/rewrite_corpus b/scripts/rewrite_corpus
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-from xdfile.utils import get_args, get_parser, open_output, log
+from xdfile.utils import get_args, get_parser, open_output, log, info, error, warn
 
 import xdfile
 import os
@@ -9,7 +9,6 @@ import os
 def collapse_whitespace(s):
     return u"".join(x.strip() for x in s.splitlines()).strip()
 
-
 if __name__ == "__main__":
     p = get_parser("rewrite corpus")
     p.add_argument('--noclues', nargs='?', help='omit clues')
@@ -28,10 +27,10 @@ if __name__ == "__main__":
                 outxdtt = xd.transpose().transpose()
                 if collapse_whitespace(outxd) != collapse_whitespace(outxdtt):
                     for a, b in xd.diffs(outxdtt):
-                        log("diff: %s | %s" % (a, b))
+                        info("diff: %s | %s" % (a, b))
                     raise Exception("differs when double-transposed")
         except Exception, e:
-            log(unicode(e))
+            error(unicode(e))
             if args.debug:
                 raise
 

diff --git a/scripts/tsv2sqlite.py b/scripts/tsv2sqlite.py
@@ -7,7 +7,7 @@
 
 import sqlite3
 import xdfile.utils
-from xdfile.utils import args_parser, get_args
+from xdfile.utils import args_parser, get_args, info
 from xdfile import metadatabase as metadb
 
 
@@ -19,7 +19,7 @@ def main():
     cur = sqlconn.cursor()
 
     rows = [list(r) for r in xdfile.utils.parse_tsv_rows(args.inputs[0], "Receipt")]
-    print("Rows to be inserted to sql: %s" % len(rows))
+    info("Rows to be inserted to sql: %s" % len(rows))
     cur.executemany('INSERT INTO receipts VALUES (?,?,?,?,?,?)', rows)
     sqlconn.commit()