Skip to content

Commit

Permalink
Python 3 fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
cclauss committed Oct 16, 2018
1 parent 1697ee1 commit 6a8e87e
Show file tree
Hide file tree
Showing 13 changed files with 140 additions and 116 deletions.
39 changes: 19 additions & 20 deletions py/docex.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@
writing "1+1==2 ==> True" and having it work in versions of Python
where True prints as "1" rather than as "True", and so on,
but doctest has the edge if you want to compare against something
that doesn't have an eval-able output, or if you want to test
that doesn't have an eval-able output, or if you want to test
printed output.
(4) Doctest has many more features, and is better supported.
I wrote docex before doctest was an official part of Python, but
with the refactoring of doctest in Python 2.4, I decided to switch
my code over to doctest, even though I prefer the brevity of docex.
my code over to doctest, even though I prefer the brevity of docex.
I still offer docex for those who want it.
From Python, when you want to test modules m1, m2, ... do:
Expand All @@ -70,8 +70,9 @@
First y is evaled to yield an exception type, then x is execed.
If x doesn't raise the right exception, an error msg is printed.
(5) Of the form 'statement'. Statement is execed for side effect.
(6) Of the form 'expression'. Expression is evaled for side effect.
(6) Of the form 'expression'. Expression is evaled for side effect.
"""
from __future__ import print_function

import re, sys, types

Expand All @@ -96,9 +97,9 @@ def __init__(self, modules=None, html=0, out=None,
if out:
sys.stdout = sys.__stdout__
out.close()

def __repr__(self):
if self.failed:
if self.failed:
return ('<Test: #### failed %d, passed %d>'
% (self.failed, self.passed))
else:
Expand Down Expand Up @@ -141,8 +142,8 @@ def run_docstring(self, object, search=re.compile(r'(?m)^\s*Ex: ').search):
match = search(s)
if match: self.run_string(s[match.end():])
if hasattr(object, '_docex'):
self.run_string(object._docex)
self.run_string(object._docex)

def run_string(self, teststr):
"""Run a test string, printing inputs and results."""
if not teststr: return
Expand All @@ -163,7 +164,7 @@ def run_string(self, teststr):
try:
self.evaluate(teststr)
except SyntaxError:
exec teststr in self.dictionary
exec(teststr, self.dictionary)

def evaluate(self, teststr, resultstr=None):
"Eval teststr and check if resultstr (if given) evals to the same."
Expand All @@ -172,26 +173,26 @@ def evaluate(self, teststr, resultstr=None):
self.dictionary['_'] = result
self.writeln(repr(result))
if resultstr == None:
return
return
elif result == eval(resultstr, self.dictionary):
self.passed += 1
self.passed += 1
else:
self.fail(teststr, resultstr)
self.fail(teststr, resultstr)

def raises(self, teststr, exceptionstr):
teststr = teststr.strip()
self.writeln('>>> ' + teststr)
except_class = eval(exceptionstr, self.dictionary)
try:
exec teststr in self.dictionary
exec(teststr, self.dictionary)
except except_class:
self.writeln('# raises %s as expected' % exceptionstr)
self.passed += 1
return
self.fail(teststr, exceptionstr)

def fail(self, teststr, resultstr):
self.writeln('###### ERROR, TEST FAILED: expected %s for %s'
self.writeln('###### ERROR, TEST FAILED: expected %s for %s'
% (resultstr, teststr),
'<font color=red><b>', '</b></font>')
self.failed += 1
Expand All @@ -201,9 +202,9 @@ def writeln(self, s, before='', after=''):
s = str(s)
if self.html:
s = s.replace('&','&amp;').replace('<','&lt;').replace('>','&gt;')
print '%s%s%s' % (before, s, after)
print('%s%s%s' % (before, s, after))
else:
print s
print(s)

def seen(self, object):
"""Return true if this object has been seen before.
Expand All @@ -213,7 +214,7 @@ def seen(self, object):
return result

def main(args):
"""Run Docex. args should be a list of python filenames.
"""Run Docex. args should be a list of python filenames.
If the first arg is a non-python filename, it is taken as the
name of a log file to which output is written. If it ends in
".htm" or ".html", then the output is written as html. If the
Expand All @@ -230,9 +231,7 @@ def main(args):
for file in glob.glob(arg):
if file.endswith('.py'):
modules.append(__import__(file[:-3]))
print Docex(modules, html=html, out=out)
print(Docex(modules, html=html, out=out))

if __name__ == '__main__':
main(sys.argv[1:])


59 changes: 30 additions & 29 deletions py/ibol.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from __future__ import print_function
from collections import defaultdict

def get_genomes(fname="byronbayseqs.fas.txt"):
"Return a list of genomes, and a list of their corresponding names."
import re
names, species, genomes = [], [], []
for name, g in re.findall('>(.*?)\r([^\r]*)\r*', file(fname).read()):
for name, g in re.findall('>(.*?)\r([^\r]*)\r*', open(fname).read()):
names.append(name)
species.append(name.split('|')[-1])
genomes.append(g)
Expand All @@ -14,7 +15,7 @@ def get_neighbors(fname="editdistances.txt"):
"Return dict: neighbors[i][j] = neighbors[j][i] = d means i,j are d apart."
## Read the data pre-computed from the Java program
neighbors = dict((i, {}) for i in range(n))
for line in file(fname):
for line in open(fname):
i,j,d = map(int, line.split())
neighbors[i][j] = neighbors[j][i] = d
return neighbors
Expand Down Expand Up @@ -75,15 +76,15 @@ def showh(d):
return ' '.join('%s:%s' % i for i in sorted(d.items()))

def greport(genomes):
print "Number of genomes: %d (%d distinct)" % (len(genomes), len(set(genomes)))
print("Number of genomes: %d (%d distinct)" % (len(genomes), len(set(genomes))))
G = dict((g, set()) for g in genomes)
for i in range(n):
G[genomes[i]].add(species[i])
print "Multi-named genomes:", (
len([s for s in G.values() if len(s) > 1]))
print("Multi-named genomes:", (
len([s for s in G.values() if len(s) > 1])))
lens = map(len, genomes)
print "Genome lengths: min=%d, max=%d" % (min(lens), max(lens))
print "Character counts: ", showh(c for g in genomes for c in g)
print("Genome lengths: min=%d, max=%d" % (min(lens), max(lens)))
print("Character counts: ", showh(c for g in genomes for c in g))

def nreport(neighbors):
NN, NumN = defaultdict(int), defaultdict(int) ## Nearest, Number of neighbors
Expand All @@ -92,9 +93,9 @@ def nreport(neighbors):
NN[nn] += 1
for d2 in neighbors[n].values():
NumN[d2] += 1
print
print "Nearest neighbor counts:", showh(NN)
print "Number of neighbors at each distance:", showh(NumN)
print()
print("Nearest neighbor counts:", showh(NN))
print("Number of neighbors at each distance:", showh(NumN))

def nspecies(c): return len(set(species[g] for g in c))

Expand All @@ -104,34 +105,34 @@ def showc(c):

def creport(drange, dcrange):
def table(what, fn):
print "\n" + what
print ' '*8, ' '.join([' '+pct(dc, glen) for dc in dcrange])
print("\n" + what)
print(' '*8, ' '.join([' '+pct(dc, glen) for dc in dcrange]))
for d in drange:
print '%s (%2d)' % (pct(d, glen), d),
print('%s (%2d)' % (pct(d, glen), d), end=' ')
for dc in dcrange:
print '%5s' % fn(cluster(neighbors, d, dc)),
print
print '\nNearest neighbor must be closer than this percentage (places). '
print 'Each column: all genomes in cluster within this percentage of each other.'
print('%5s' % fn(cluster(neighbors, d, dc)), end=' ')
print()
print('\nNearest neighbor must be closer than this percentage (places). ')
print('Each column: all genomes in cluster within this percentage of each other.')
table("Number of clusters", len)
cluster1 = cluster(neighbors, 8, 15) ## splits Cleora
print '\nNumber of clusters of different sizes:', showh(len(c) for c in cluster1)
print('\nNumber of clusters of different sizes:', showh(len(c) for c in cluster1))
M, T = defaultdict(int), defaultdict(int)
for c in cluster1:
M[margin(c)] += 1; T[margin(c)] += len(c)
for x in M: print '%d\t%d\t%d'% (x,M[x],T[x])
print '\nMargins', showh(M)
for x in M: print('%d\t%d\t%d'% (x,M[x],T[x]))
print('\nMargins', showh(M))
for c in cluster1:
if margin(c) <= 16:
print showc(c)
print '\nScatter plot of cluster diameter vs. margin.'
print(showc(c))
print('\nScatter plot of cluster diameter vs. margin.')
for c in cluster1:
if diameter(c) > 0:
pass
#print '%d\t%d' % (diameter(c), margin(c))
print '\nDifference from cluster(neighbors, 11, 14):'
print('\nDifference from cluster(neighbors, 11, 14):')
#table(lambda cl: pct(len(cluster1)-compare(cluster1, cl),max(len(cluster1),len(cl))))
print '\nNumber of clusters witth more than one species name:'
print('\nNumber of clusters witth more than one species name:')
#table(lambda cl: sum(nspecies(c) > 1 for c in cl))
def pct_near_another(clusters, P=1.25):
total = 0
Expand All @@ -143,21 +144,21 @@ def pct_near_another(clusters, P=1.25):
total += 1
return pct(total, n)
def f(P):
print '\nPercent of individuals within %.2f*diameter of another cluster.'%P
print('\nPercent of individuals within %.2f*diameter of another cluster.'%P)
table(lambda cl: pct_near_another(cl, P))
#map(f, [1.2, 1.33, 1.5])

def sreport(species):
SS = defaultdict(int)
print
print()
for s in set(species):
c = [g for g in range(n) if species[g] == s]
d = diameter(c)
if d > 14:
if d==glen: d = '>25'
print 'diameter %s for %s (%d elements)' % (d, s, len(c))
print('diameter %s for %s (%d elements)' % (d, s, len(c)))
SS[d] += 1
print 'Diameters of %d labeled clusters: %s' % (len(set(species)), showh(SS))
print('Diameters of %d labeled clusters: %s' % (len(set(species)), showh(SS)))

def compare(cl1, cl2):
"Compare two lists of clusters"
Expand All @@ -174,7 +175,7 @@ def unit_tests():
assert diameter(set()) == 0
assert diameter([17, 42]) == dist(17, 42)
assert pct(1, 2) == '50.0%'
print '\nAll tests pass.\n'
print('\nAll tests pass.\n')



Expand Down
31 changes: 16 additions & 15 deletions py/lettercount.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"""

from __future__ import division
from __future__ import print_function
from collections import Counter, defaultdict

#### Read files in Books-Ngram format; convert to a dict
Expand All @@ -31,7 +32,7 @@ def read_year_file(filename, dic=None):
"""Read a file of 'word year word_count book_count' lines and convert to a dict
{WORD: totalcount}. Uppercase all words, and only include all-alphabetic words."""
if dic is None: dic = {}
for line in file(filename):
for line in open(filename):
word, year, c1, c2 = line.split('\t')
if '_' in word:
word = word[:word.index('_')]
Expand All @@ -44,14 +45,14 @@ def read_year_file(filename, dic=None):

def write_dict(dic, filename):
"Write a {word:count} dict as 'word \t count' lines in filename."
out = file(filename, 'w')
out = open(filename, 'w')
for key in sorted(dic):
out.write('%s\t%s\n' % (key, dic[key]))
return out.close()

def read_dict(filename, sep='\t'):
"Read 'word \t count' lines from file and make them into a dict of {word:count}."
pairs = (line.split(sep) for line in file(filename))
pairs = (line.split(sep) for line in open(filename))
return {word: int(count) for (word, count) in pairs}

#### Convert a bunch of year files into dict file format.
Expand All @@ -61,9 +62,9 @@ def report(filename, D, adj):
import time
N = len(D)
W = sum(v for v in D.itervalues())
print '%s: %s %s words (%s tokens) at %s' % (
print('%s: %s %s words (%s tokens) at %s' % (
filename, adj, format(W, ',d'), format(N, ',d'),
time.strftime("%H:%M:%S", time.gmtime()))
time.strftime("%H:%M:%S", time.gmtime())))
for f in filenames:
report(f, {}, 'starting')
D = read_year_file(f)
Expand Down Expand Up @@ -155,10 +156,10 @@ def getcount(counts, s, pos, length):
return counts[s, pos, length]


print 'start'
print('start')
#wc = word_counts('count_100K.txt')
#counts = letter_counts(wc)
print 'end'
print('end')



Expand All @@ -172,18 +173,18 @@ def num(ch):

def stats(D, NS = (1, 2, 3, 4, 5, 6)):
counts = {n: Counter() for n in NS}
print 'words ' + ' '.join(' %d-grams ' % n for n in NS)
print('words ' + ' '.join(' %d-grams ' % n for n in NS))
for (i, word) in enumerate(sortedby(D), 1):
for n in NS:
for ng in ngrams(word, n):
counts[n][ng] += 1
if i % 5000 == 0 or i == len(D):
print "%4dK" % (i/1000),
print("%4dK" % (i/1000), end=' ')
for n in NS:
c = len(counts[n])
field = "%5d (%d%%)" % (c, int(round(c*100/(26**n))))
print '%12s' % field,
print
print('%12s' % field, end=' ')
print()

letters = 'ETAOINSRHLDCUMFPGWYBVKXJQZ'
alphabet = ''.join(sorted(letters))
Expand Down Expand Up @@ -224,7 +225,7 @@ def substr(word, pos, length):
def lettercount(D, pos):
LC = histogram((substr(w, pos, 1), D[w]) for w in D)
del LC[None]
print LC
print(LC)
pos_name = (str(pos)+'+' if isinstance(pos, tuple) else
pos if pos < 0 else
pos+1)
Expand Down Expand Up @@ -293,7 +294,7 @@ def csvline(first, rest):
return '\t'.join([first] + map(str, rest))

def makecsv(n, D=D):
out = file('ngrams%d.csv' % n, 'w')
out = open('ngrams%d.csv' % n, 'w')
cols = columns(n)
Dng = defaultdict(lambda: defaultdict(int))
for w in D:
Expand All @@ -310,9 +311,9 @@ def makecsv(n, D=D):
if from_end <= 9:
entry[ANY, -from_end, -from_end+n-1] += N
# enumerate ngrams from word and increment counts for each one
print >> out, csvline('%d-gram' % n, map(colname, cols))
print(csvline('%d-gram' % n, map(colname, cols)), file=out)
for ng in sorted(Dng, key=lambda ng: -Dng[ng][(ANY, ANY)]):
print >> out, csvline(ng, [Dng[ng].get(col, 0) for col in cols])
print(csvline(ng, [Dng[ng].get(col, 0) for col in cols]), file=out)
out.close()
return Dng

Expand Down
7 changes: 6 additions & 1 deletion py/lis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
import math
import operator as op

try:
raw_input # Python 2
except NameError:
raw_input = input # Python 3

################ Types

Symbol = str # A Lisp Symbol is implemented as a Python str
Expand Down Expand Up @@ -96,7 +101,7 @@ def repl(prompt='lis.py> '):
"A prompt-read-eval-print loop."
while True:
val = eval(parse(raw_input(prompt)))
if val is not None:
if val is not None:
print(lispstr(val))

def lispstr(exp):
Expand Down
Loading

0 comments on commit 6a8e87e

Please sign in to comment.