Skip to content

Commit

Permalink
implemented audfprint_match, see dotest block therein
Browse files Browse the repository at this point in the history
  • Loading branch information
dpwe committed May 26, 2014
1 parent 5194126 commit eefa119
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 14 deletions.
50 changes: 41 additions & 9 deletions audfprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,19 +192,51 @@ def landmarks2hashes(landmarks):
| (dtime & dtmask)) )
for time, bin1, bin2, dtime in landmarks ]

import hash_table

test = True
def ingest(ht, filename):
""" Read an audio file and add it to the database
:params:
ht : HashTable object
the hash table to add to
filename : str
name of the soundfile to add
:returns:
dur : float
the duration of the track
nhashes : int
the number of hashes it mapped into
"""
targetsr = 11025
d, sr = librosa.load(filename, sr=targetsr)
hashes = landmarks2hashes(peaks2landmarks(find_peaks(d, sr)))
ht.store(filename, hashes)
return (len(d)/float(sr), len(hashes))

import glob, time

def glob2hashtable(pattern):
""" Build a hash table from the files matching a glob pattern """
ht = hash_table.HashTable()
filelist = glob.glob(pattern)
initticks = time.clock()
totdur = 0.0
tothashes = 0
for file in filelist:
print "ingesting ", file, " ..."
dur, nhash = ingest(ht, file)
totdur += dur
tothashes += nhash
elapsedtime = time.clock() - initticks
print "Added",tothashes,"(",tothashes/float(totdur),"hashes/sec) at ", elapsedtime/totdur, "x RT"
return ht

test = False
if test:
fn = '/Users/dpwe/Downloads/carol11k.wav'
d, sr = librosa.load(fn, sr=11025)
hashes = landmarks2hashes(peaks2landmarks(find_peaks(d[:30*sr], sr)))
ht = hash_table.HashTable()

print len(hashes)
for hash in hashes[:40]:
print hash
ingest(ht, fn)

import hash_table
ht = hash_table.HashTable()
ht.store(fn, hashes)
ht.save('httest.pickle', {'version': 20140525})

67 changes: 67 additions & 0 deletions audfprint_match.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
audfprint_match.py
Fingerprint matching code for audfprint
2014-05-26 Dan Ellis [email protected]
"""
import librosa
import audfprint
import numpy as np

def find_mode(data, window=0):
""" Find the (mode, count) of a set of data, including a tolerance window +/- window if > 0 """
vals = np.unique(data)
counts = [len([x for x in data if abs(x-val) <= window]) for val in vals]
bestix = np.argmax(counts)
return (vals[bestix], counts[bestix])

def match_hashes(ht, hashes):
""" Match audio against fingerprint hash table.
Return top N matches as (id, filteredmatches, timoffs, rawmatches)
"""
# find the implicated id, time pairs from hash table
hits = ht.get_hits(hashes)
# Sorted list of all the track ids that got hits
idlist = np.r_[-1, sorted([id for id, time in hits]), -1]
# Counts of unique entries in the sorted list - diff of locations of changes
counts = np.diff(np.nonzero(idlist[:-1] != idlist[1:]))[0]
# ids corresponding to each count - just read after the changes in the list
ids = idlist[np.cumsum(counts)]

# Find all the actual hits for a the most popular ids
bestcountsids = sorted(zip(counts, ids), reverse=True)
# Try the top 100 results
resultlist = []
for rawcount, tid in bestcountsids[:100]:
(mode, filtcount) = find_mode([time for (id, time) in hits
if id == tid],
window=1)
resultlist.append( (tid, filtcount, mode, rawcount) )
return sorted(resultlist, key=lambda x:x[1], reverse=True)

def match_file(ht, filename):
""" Read in an audio file, calculate its landmarks, query against hash table. Return top N matches as (id, filterdmatchcount, timeoffs, rawmatchcount)
"""
d, sr = librosa.load(filename, sr=11025)
# Collect landmarks offset by 0..3 quarter-windows
t_win = 0.04644
win = int(np.round(sr * t_win))
qwin = win/4
hq = audfprint.landmarks2hashes(audfprint.peaks2landmarks(audfprint.find_peaks(d, sr)))
hq += audfprint.landmarks2hashes(audfprint.peaks2landmarks(audfprint.find_peaks(d[qwin:], sr)))
hq += audfprint.landmarks2hashes(audfprint.peaks2landmarks(audfprint.find_peaks(d[2*qwin:], sr)))
hq += audfprint.landmarks2hashes(audfprint.peaks2landmarks(audfprint.find_peaks(d[3*qwin:], sr)))
print "Analyzed",filename,"to",len(hq),"hashes"
# Run query
return match_hashes(ht, hq)


dotest = True
if dotest:
pat = '/Users/dpwe/projects/shazam/Nine_Lives/*mp3'
qry = 'query.mp3'
ht = audfprint.glob2hashtable(pat)
rslts = match_file(ht, qry)
t_hop = 0.02322
print "Matched", qry, "as", ht.names[rslts[0][0]], "at", t_hop*float(rslts[0][2]), "with", rslts[0][1], "of", rslts[0][3], "hashes"
17 changes: 12 additions & 5 deletions hash_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,18 @@ def get_entry(self, hash):
return idtimelist

def get_hits(self, hashes):
""" Return a list of (id, time) pairs associated with each element in hashes """
idtimelist = []
for hash in hashes:
idtimelist += self.get_entry(hash)
return idtimelist
""" Return a list of (id, delta_time) pairs associated with each element in hashes list of (time, hash) """
iddtimelist = []
for time, hash in hashes:
idtimelist = [(id, rtime-time)
for id, rtime in self.get_entry(hash)]
iddtimelist += idtimelist
return iddtimelist

def save(self, name, params=[]):
""" Save hash table to file <name>, including optional addition params """
self.params = params
self.version = 20140525
with open(name, 'w') as f:
pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)
self.dirty = False
Expand All @@ -112,3 +115,7 @@ def load(self, name):
self.hashesperid = temp.hashesperid
self.dirty = False
return params

def totalhashes(self):
""" Return the total count of hashesh stored in the table """
return np.sum(self.counts)

0 comments on commit eefa119

Please sign in to comment.