Skip to content

Commit

Permalink
CLI added to audfprint.py, nicer messages
Browse files Browse the repository at this point in the history
  • Loading branch information
dpwe committed May 27, 2014
1 parent 0f082f7 commit 843420e
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 12 deletions.
85 changes: 83 additions & 2 deletions audfprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ def ingest(ht, filename):
"""
targetsr = 11025
d, sr = librosa.load(filename, sr=targetsr)
# librosa.load on mp3 files prepents 396 samples compared
# to Matlab audioread
hashes = landmarks2hashes(peaks2landmarks(find_peaks(d, sr)))
ht.store(filename, hashes)
return (len(d)/float(sr), len(hashes))
Expand All @@ -235,8 +237,87 @@ def glob2hashtable(pattern):
if test:
fn = '/Users/dpwe/Downloads/carol11k.wav'
ht = hash_table.HashTable()

ingest(ht, fn)
ht.save('httest.pklz')


# Command line interface

import audfprint_match
import docopt
import time

usage = """
Audio landmark-based fingerprinting.
Create a new fingerprint dbase with new,
append new files to an existing database with add,
or identify noisy query excerpts with match.
Usage: audfprint (new | add | match) (-d <dbase> | --dbase <dbase>) [options] <file>...
Options:
-n <dens>, --density <dens> Target hashes per second [default: 7.0]
-h <bits>, --hashbits <bits> How many bits in each hash [default: 20]
-b <val>, --bucketsize <val> Number of entries per bucket [default: 100]
-t <val>, --maxtime <val> Largest time value stored [default: 16384]
-l, --list Input files are lists, not audio
"""

__version__ = 20130527

def filenames(filelist, listflag):
""" Iterator to yeild all the filenames, possibly interpreting them as list files """
if not listflag:
for filename in filelist:
yield filename
else:
for listfilename in filelist:
with open(listfilename, 'r') as f:
for filename in f:
yield filename.rstrip('\n')


def main(argv):
args = docopt.docopt(usage, version=__version__)

if args['new']:
# Create a new hash table
ht = hash_table.HashTable(hashbits=int(args['--hashbits']),
depth=int(args['--bucketsize']),
maxtime=int(args['--maxtime']))
else:
# Load existing
ht = hash_table.HashTable(filename=args['<dbase>'])

if args['match']:
# Running query
t_hop = 0.02322
for qry in filenames(args['<file>'], args['--list']):
rslts = audfprint_match.match_file(ht, qry)
print "Matched", qry, "as", ht.names[rslts[0][0]], \
"at %.3f" % (t_hop*float(rslts[0][2])), "s", \
"with", rslts[0][1], "of", rslts[0][3], "hashes"


else:
# Adding files - command was 'new' or 'add'
initticks = time.clock()
totdur = 0
tothashes = 0
for ix, file in enumerate(filenames(args['<file>'], args['--list'])):
print time.ctime(), "ingesting #", ix, ":", file, " ..."
dur, nhash = ingest(ht, file)
totdur += dur
tothashes += nhash
elapsedtime = time.clock() - initticks
print "Added", tothashes, \
"(%.1f" % (tothashes/float(totdur)), "hashes/sec)", \
"at %.3f" % (elapsedtime/totdur), "x RT"
if ht.dirty:
ht.save(args['<dbase>'])

ht.save('httest.pickle', {'version': 20140525})

# Run the main function if called from the command line
if __name__ == "__main__":
import sys
main(sys.argv)
38 changes: 28 additions & 10 deletions hash_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np
import random
import cPickle as pickle
import gzip
import os, gzip
import scipy.io

class HashTable:
Expand All @@ -27,15 +27,16 @@ class HashTable:
# Earliest acceptable version
HT_COMPAT_VERSION = 20140525

def __init__(self, size=1048576, depth=100, maxtime=16384, name=None):
def __init__(self, hashbits=20, depth=100, maxtime=16384, filename=None):
""" allocate an empty hash table of the specified size """
if name is not None:
self.load(name)
if filename is not None:
self.load(filename)
else:
self.size = size
self.hashbits = hashbits
self.depth = depth
self.maxtime = maxtime
# allocate the big table
size = 2**hashbits
self.table = np.zeros( (size, depth), dtype=np.uint32 )
# keep track of number of entries in each list
self.counts = np.zeros( size, dtype=np.int32 )
Expand All @@ -58,9 +59,12 @@ def store(self, name, timehashpairs):
# we were passed in a numerical id
id = name
# Now insert the hashes
hashmask = (1 << self.hashbits) - 1
for time, hash in timehashpairs:
# Keep only the bottom part of the time value
time %= self.maxtime
# Keep only the bottom part of the hash value
hash &= hashmask
# Mixin with ID
val = (id * self.maxtime + time) #.astype(np.uint32)
# increment count of vals in this hash bucket
Expand Down Expand Up @@ -102,19 +106,33 @@ def get_hits(self, hashes):
def save(self, name, params=[]):
""" Save hash table to file <name>, including optional addition params """
self.params = params
self.version = HT_VERSION
self.version = self.HT_VERSION
with gzip.open(name, 'wb') as f:
pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)
self.dirty = False
print "saved hash table to ", name
print "Saved fprints for", len(self.names), "files", \
"(", sum(self.counts), "hashes)", \
"to", name

def load(self, name):
""" Read either pklz or mat-format hash table file """
stem, ext = os.path.splitext(name)
if ext == '.mat':
params = self.load_matlab(name)
else:
params = self.load_pkl(name)
print "Read fprints for", len(self.names), "files", \
"(", sum(self.counts), "hashes)", \
"from", name
return params

def load_pkl(self, name):
""" Read hash table values from file <name>, return params """
with gzip.open(name, 'rb') as f:
temp = pickle.load(f)
assert(temp.version >= HT_COMPAT_VERSION)
assert(temp.version >= self.HT_COMPAT_VERSION)
params = temp.params
self.size = temp.size
self.hashbits = temp.hashbits
self.depth = temp.depth
self.maxtime = temp.maxtime
self.table = temp.table
Expand Down Expand Up @@ -143,7 +161,7 @@ def load_matlab(self, name):
params = {}
params['mat_version'] = mht['HT_params'][0][0][-1][0][0]
assert(params['mat_version'] >= 0.9)
self.size = mht['HT_params'][0][0][0][0][0]
self.hashbits = int(np.log(mht['HT_params'][0][0][0][0][0])/np.log(2.0))
self.depth = mht['HT_params'][0][0][1][0][0]
self.maxtime = mht['HT_params'][0][0][2][0][0]
params['hoptime'] = mht['HT_params'][0][0][3][0][0]
Expand Down

0 comments on commit 843420e

Please sign in to comment.