voices.py

import numpy as np
import scipy as sp
import re

panphoneme = "The beige hue on the waters of the loch impressed all, including the French queen, before she heard that symphony again, just as young Arthur wanted."

from g2p_en import G2p
g2p = G2p()


#phones_arpabet = "IY IH EY EH AE AA AO OW UH UW ER AX AH AY AW OY IX B D G P T K V DH Z ZH F TH S SH L EL R W WH Y M N NX EM EN CH JH HH DX Q".split(" ")
vowels = "IY IH EY EH AE AA AO OW UH UW ER AX AH".split(" ")
v_pos = ["Front"]*5+["Back"]*5+["Mid"]*3
dipthongs = "AY AW OY IX".split(" ")
stops = "B D G P T K".split(" ")
fricatives = "V DH Z ZH F TH S SH".split(" ")
semivowels = "L EL R W WH Y".split(" ")
liquids = "L EL R".split(" ")
nasals = "M N NX EM EN".split(" ")
affricates = "CH JH".split(" ")
others = "HH DX Q".split(" ")
voiced = "B D G V DH Z ZH EM EN DX".split(" ") + vowels + dipthongs + semivowels    + nasals
phones = vowels+dipthongs+stops+fricatives+semivowels+nasals+affricates+others
types = {v:"vowel" for v in vowels}
types.update({v:"dipthong" for v in dipthongs})
types.update({v:"fricative" for v in fricatives})
types.update({v:"semivowel" for v in semivowels})
types.update({v:"liquid" for v in liquids})
types.update({v:"nasal" for v in nasals})
types.update({v:"affricate" for v in affricates})
types.update({v:"other" for v in others})

def fails(func,*a,**ka):
    try:
        func(*a,**ka)
        return False
    except:
        return True
def broadcast_op(op,prim=lambda x: fails(lambda y:iter(y),x)):
    def do(v,op=op,prim=prim):
        if prim(v):
            return op(v)
        if type(v) is tuple:
            return tuple(do(i) for i in v)
        if type(v) is list:
            return list(do(i) for i in v)
        if type(v) is set:
            return set(do(i) for i in v)
        if type(v) is dict:
            return {i:do(v[i]) for i in v}
        return type(v)(do(i) for i in v)        
    return do

places = {
    "IY":314.482+186.721j,
    "UW":497.122+192.070j,
    "IH":348.106+218.817j,
    "UH":494.066+221.873j,
    "AX":463.498+252.441j,
    "EH":390.900+273.074j,
    "AO":533.039+286.065j,
    "AH":467.701+295.235j,
    "AE":401.599+325.803j,
    "AA":516.609+330.006j,
    "EY":(373.704+271.161j,352.803+236.325j),
    "OW":(520.428+268.868j,526.595+234.334j),
    "OY":(527.305+264.283j,447.830+263.519j,395.044+256.997j),
    "AW":(458.528+340.702j,487.950+304.021j,507.391+250.031j),
    "AY":(468.463+339.174j,429.871+321.597j,401.597+296.379j,366.541+238.632j),
    "YUW":(336.259+197.799j,396.629+197.035j,431.874+201.236j),
          }
tl = 290.407+175.256j
tr = 544.117+175.256j
bl = 412.677+346.815j
br = 544.117+346.815j
def unproject(p):
    #x <- is f2, y v is f1
    y = (p.imag-tr.imag)/(br.imag-tr.imag)
    x = (p.real-tr.real)/(tl.real-tr.real)
    #x = (p.real-br.real)/(bl.real-br.real) * y + (p.real-tr.real)/(tl.real-tr.real) * (1-y)
    return x+1j*y

places = broadcast_op(unproject)(places)

places = {k:sp.interpolate.lagrange(np.arange(len(places[k])),places[k]) if type(places[k]) is tuple else sp.interpolate.lagrange([0],[places[k]]) for k in places}

phoneme_re = re.compile("(?P<phone>[a-zA-Z]+)(?P<stress>\d)?(@(((?P<freq>[\+\-]?\d+(\.\d)?)(?P<freq_unit>[nµmkMG]?Hz)?)|(?P<pitch>[A-G][b#]?[\+\-]?\d+)))?(:(?P<dur>\d+(\.\d*)?)(?P<dur_unit>[nµmkMG]?s)?)?")

def si_prefix_to_factor(u):
    p = {'p':-4,'n':-3,'µ':-2,'m':-1,'k':1,'M':2,'G':3,'T':4}
    if len(u) and u[0] in p:
        return 10**(p[u[0]]*3)
    return 1

class united_number:
    def __init__(self,v,unit):
        self.v = v
        self.unit = unit
    def __repr__(self):
        return f"{self.v}{self.unit}"
    def __complex__(self):
        return complex(self.v)
    def __float__(self):
        return float(self.v)
    def __int__(self):
        return int(self.v)
    def __round__(self):
        return round(self.v)
    def __abs__(self):
        return united_number(abs(self.v),self.unit)
    def conjugate(self):
        return united_number(self.v.conjugate(),self.unit)
    def __neg__(self):
        return united_number(-self.v,self.unit)
    def __pos__(self):
        return united_number(+self.v,self.unit)
    def __add__(self,o):
        return self.v+o
    def __radd__(self,o):
        return o+self.v
    def __sub__(self,o):
        return self.v-o
    def __rsub__(self,o):
        return o-self.v
    def __mul__(self,o):
        return self.v*o
    def __rmul__(self,o):
        return o*self.v
    def __truediv__(self,o):
        return self.v/o
    def __rtruediv__(self,o):
        return o/self.v
    def __floordiv__(self,o):
        return self.v//o
    def __rfloordiv__(self,o):
        return o//self.v
    def __pow__(self,o):
        return self.v**o
    def __rpow__(self,o):
        return o**self.v
    

class phoneme:
    def __init__(self,phone,stress = None,duration = None,pitch = None):
        if m:=phoneme_re.match(phone):
            self.phone = m.group('phone')
            if stress is None: #direct args override parsed args
                self.stress = int(m.group('stress')) if m.group('stress') is not None else None
            else:
                self.stress = stress
            if duration is None:
                self.duration = float(m.group('dur')) if m.group('dur') is not None else None
                if m.group('dur_unit') is not None:
                    self.duration = united_number(self.duration,m.group('dur_unit'))
            else:
                self.duration = duration
            if pitch is None:
                if m.group('pitch'):
                    self.pitch = m.group('pitch')
                else:
                    self.pitch = float(m.group('freq')) if m.group('freq') is not None else None
                    if m.group('freq_unit') is not None:
                        self.pitch = united_number(self.pitch,m.group('freq_unit'))
            else:
                self.pitch = pitch
        else:
            self.phone = phone
            self.stress = stress
            self.pitch = pitch
            self.duration = duration
    def __repr__(self):
        r=self.phone
        if self.stress is not None:
            r += f'{self.stress}'
        if self.pitch is not None:
            r += f'@{self.pitch}'
        if self.duration is not None:
            r += f':{self.duration}'
        return r
    @property
    def type(self):
        return types[self.phone] if self.phone in types else None
    @property
    def voiced(self):
        return self.phone in voiced
    @property
    def voiceless(self):
        return not self.voiced
    @property
    def place(self):
        return places[self.phone] if self.phone in places else None
    

    @property
    def vowel(self):
        return self.phone in vowels
    @property
    def dipthong(self):
        return self.phone in dipthongs
    @property
    def fricative(self):
        return self.phone in fricatives
    @property
    def semivowel(self):
        return self.phone in semivowels
    @property
    def liquid(self):
        return self.phone in liquids
    @property
    def nasal(self):
        return self.phone in nasals
    @property
    def affricate(self):
        return self.phone in affricates
    @property
    def other(self):
        return self.phone in others

note_re = re.compile("(?P<note>[A-G])(?P<accidental>[b#])?(?P<octave>[\+\-]?\d+)")
def str_to_note(n,a440=69):
    m=note_re.match(n)
    r = [12,14,3,5,7,8,10]["ABCDEFG".index(m.group("note"))]
    if m.group('accidental'):
        r += "b#".index(m.group("accidental"))*2-1
    r += 12*int(m.group('octave'))
    return r+a440-(12*5)
import math
def note_to_hz(n,a440=440):
    return 2**((n-69)/12)*a440


#https://paginas.fe.up.pt/~voicestudies/artts/doc/reports/msc_dissertations/TESE_SANDRA_DIAS_vf_MEB04006.pdf
# page 55
def rgm(t,te,tc):
    if t < te:
        return .5*(1-math.cos(math.pi*t/te))
    elif t < tc:
        return math.cos(math.pi*(t-te)/(2*(tc-te)))
    return 0


def testfilt(phonemes,default_dur=6000,default_pitch=69-17,defaul_stress=4,ext_glot=None,fvs=[.9,.97,.9,.97],sr=48000):
    import random
    phase = 0
    eone = math.exp(2*math.pi)
    import filters as filt
    f1 = filt.biquad_zprl()
    f2 = filt.biquad_zprl()
    f1f = 400
    f2f = 1400
    time = 0
    dc_cut = filt.iir1l(-.999,1,-1)
    for p in phonemes:
        if type(p) is str:
            p = phoneme(p)
        attrs = [p.phone,
                 p.stress if p.stress is not None else defaul_stress,
                 p.pitch if p.pitch is not None else note_to_hz(default_pitch),
                 p.duration if p.duration is not None else default_dur]
        if type(attrs[2]) is float:
            attrs[2] = note_to_hz(attrs[2])
        elif type(attrs[2]) is united_number:
            attrs[2] = si_prefix_to_factor(attrs[2].unit)*attrs[2].v
        elif type(attrs[2]) is str:
            attrs[2] = note_to_hz(str_to_note(attrs[2]))
        if type(attrs[3]) is united_number:
            attrs[3] = si_prefix_to_factor(attrs[3].unit)*attrs[3].v*sr
        attrs[2] /= sr
        #now try to do the phoneme
        phone,stress,pitch,dur = attrs
        while time < dur:
            r = 0
            time += 1
            phase += pitch
            phase %= 1
            s = 10/(stress+10)
            if ext_glot is not None:
                r += s*next(ext_glot)*p.voiced
            else:
                if p.voiced:
                    r += s*rgm(phase,.2,.4) #todo stress changing duties
            if p.fricative:
                r += s*(random.random()-.5)
            if p.place is not None:
                l = complex(p.place(time/dur))
                #f1 ranges from roughly 280 to 710 hz
                #f2 from 870 to 2250
                f1f = f1f*.999+.001*(l.imag*(710-280)+280)
                f2f = f2f*.999+.001*(l.real*(2250-870)+870)
            f1p = eone**(1j*f1f/sr)
            f2p = eone**(1j*f2f/sr)
            r = f1(r,[fvs[0]*f1p],[fvs[1]*f1p])
            r = f2(r,[fvs[2]*f2p],[fvs[3]*f2p])
            yield dc_cut(r)
            
        time -= attrs[3]


def testsum(phonemes,default_dur=12000,default_pitch=69-17,defaul_stress=4,ext_glot=None,sr=48000):
    import random
    phase = 0
    eone = math.exp(2*math.pi)
    import filters as filt
    f1 = 0
    f2 = 0
    f1f = 400
    f2f = 1400
    time = 0
    dc_cut = filt.iir1l(-.999,1,-1)
    for p in phonemes:
        attrs = [p.phone,
                 p.stress if p.stress is not None else defaul_stress,
                 p.pitch if p.pitch is not None else note_to_hz(default_pitch),
                 p.duration if p.duration is not None else default_dur]
        if type(attrs[2]) is float:
            attrs[2] = note_to_hz(attrs[2])
        elif type(attrs[2]) is united_number:
            attrs[2] = si_prefix_to_factor(attrs[2].unit)*attrs[2].v
        elif type(attrs[2]) is str:
            attrs[2] = note_to_hz(str_to_note(attrs[2]))
        if type(attrs[3]) is united_number:
            attrs[3] = si_prefix_to_factor(attrs[3].unit)*attrs[3].v*sr
        attrs[2] /= sr
        #now try to do the phoneme
        phone,stress,pitch,dur = attrs
        while time < dur:
            r = 0
            time += 1
            phase += pitch
            phase %= 1
            s = 10/(stress+10)
            if ext_glot is not None:
                r += s*next(ext_glot)*p.voiced
            else:
                if p.voiced:
                    r += s*rgm(phase,.2,.4) #todo stress changing duties
            if p.fricative:
                r += s*(random.random()-.5)
            if p.place is not None:
                l = complex(p.place(time/dur))
                #f1 ranges from roughly 280 to 710 hz
                #f2 from 870 to 2250
                f1f = f1f*.999+.001*(l.imag*(710-280)+280)
                f2f = f2f*.999+.001*(l.real*(2250-870)+870)
            if p.voiced:
                r += s*rgm(f1,.5,1)/7
                r += s*rgm(f2,.5,1)/9
                f1 = (f1 + f1f/sr)%1
                f2 = (f2 + f1f/sr)%1
                if phase-pitch < 0:
                    f1 = 0
                    f2 = 0
            yield dc_cut(r)
            
        time -= attrs[3]
        
            
def difeq_glottal_a():
    #air -+> pressure
    #pressure -+> displacement
    #displacement = gap
    #gap --> pressure
    def do(a,t,dt=.001,o=[0,0,0]):
        o[0] += a*dt
        o[1] += (o[0]-o[2]*t)*dt #disp vel
        o[2] += o[1]*dt #displacement
        if o[2] < 0:
            o[2] = 0
            o[1] = 0
        o[0] -= o[2]*dt            
        return o[2]
    return do
def difeq_glottal_p(fr=.01,d0=0.01):
    #pressure -+> displacement
    #displacement = gap
    #gap --> pressure
    def do(p,t,d0=d0,fr=fr,dt=.001,o=[0,0,0]):
        o[0] = o[0]*(1-fr*dt)+p*fr*dt
        o[1] += (o[0]-o[2]*t)*dt #disp vel
        o[2] += o[1]*dt #displacement
        if o[2] < 0:
            o[2] = 0
            o[1] = 0
        e = max(0,(o[2]-d0))
        o[0] -= e*dt
        return e
    return do
    #this exhibits many cool features like grungy voice, harmonicy stuff, whistling and more.
    #now I need to write a controller for it.


def difeq_glottal_p2(fr=.01,d0=0.01):
    #pressure -+> force ^y
    # 2 springs, tension for towards centerpoint
    #            something intrinsic for radius
    #               _
    #             /
    #            |.--•
    # gap proportional to x displacement
    # probably need to do that old_position, new_position method of velocity
    #  and either just clamp the radius or
    # don't do that and use a spring?
    def do(p,t,d0=d0,fr=fr,dt=.001,o=[0,1,1]):
        o[0] = o[0]*(1-fr*dt)+p*fr*dt #pressure
        #o[1],o[2] are prev_pos and pos (complex)
        o[1],o[2] = o[2],o[2]*2-o[1] #apply vel
        #apply tension
        o[2] += t*(1-o[2])*dt*dt
        #apply pressure
        o[2] += 1j*dt*dt*o[0]
        #apply bounds
        if o[2].imag < 0:
            o[2] = o[2].real
        if o[2].real < 0:
            o[2] = 1j*o[2].imag
        if (a:=abs(o[2])) > 1:
            o[2] /= a            
        e = max(0,((1-o[2].real)-d0)/(1-d0))
        o[0] -= e*dt
        return e
    return do


#center an array on it's maximum for graphing
def scope_trig(a,lf=.5):
    a = np.array(a)
    l = round(len(a)*lf)
    m = np.argmax(a[l//2:-l//2].real)
    return a[m:m+l]

#import voices as vs
#
#gl2 = vs.difeq_glottal_p2()
#sl3 = slider()
#s2 = mpt.slider2d((-.1,1.8),(-.1,1.8));s2.show()
#live_graph([lambda x: vs.scope_trig(np.array([gl(s2.y,s2.x**4,dt=.1,d0=next(sl3),fr=.1).real for i in range(512)]))],256)
#
#


def dgp_fv_controller(dgp,r=.01,sr=48000):
    import filters as filt
    f = filt.biquad_zprl()
    def do(freq,vol,d0=.01,fr=.01,dt=.001,q=.99,r=r,sr=sr,dgp=dgp,s=[0,0,1],f=f):
        freq /= sr
        p = math.e**(math.pi*2j*freq)
        v = dgp(s[0],s[1],d0,fr,dt)
        fv = f(v,[p*q*q],[p*q]).real
        s[2] = s[2]*(1-r*dt)+v*v*dt*r
        s[1] += (v*v-vol*vol)*dt*r
        s[0] -= ((fv-v)**2/s[2]-.707)*dt*r
        return v
    return do#this doesn't work well yet. I might resort to tables


#https://www.mattmontag.com/projects-page/academic/speech
#https://www.britannica.com/science/sound-physics/Noise


#using the pitch estimation matrix for finding how pitch/volume scales with pressure and tension
# 
# window = np.cos((np.arange(4096)/4096)*2*math.pi)*-.5+.5
# pef = lambda a:(frs[np.argmax(pe@np.log1p(mag2(np.fft.fft(a*window))))],(np.sum(mag2(a*window))**.5)/10)
# gwpv = np.array([[pef(np.array([gl(y/64,x/64,dt=.1,d0=next(sl3),fr=.1).real for i in range(4596)][500:])) for y in range(64)] for x in range(64)])
#
#


#todo: "voice act" find some standard properties of the phonemes
# e.g. position for fricatives
#               and noise filters for fricatives
#       mouth poses for everything
#      formants above 2
#      typical durations
#      typical pitches
#

# finding fricatives:
#s2 = slider2d((0,1),(0,1));s2.show()
#f = filt.biquad_zprl()
#mix.out = (f(v,[1],[next(s2)]) for v in noi)


def collect_impulses(g,pre=128,vmin=.025,tmin=.02,sr=48000,sd=.99):
    smin = round(tmin*sr)
    r = []
    import filters as filt
    slope = filt.iir2l(-sd*2,sd*sd,1,-1)
    d = [0]*pre
    di = 0
    s = 0
    ps = 0
    t = 0
    for v in g:
        d[di] = v
        sl = slope(v)
        di = (di+1)%pre
        if s == 0:
            if abs(v) > vmin:
                s = 1
                ps = sl
        elif s == 1:
            if sl * ps < 0:
                s = 2
                t = smin
        elif s == 2:
            r += [d[di]]
            if abs(v) > vmin:
                t = smin
            t -= 1
            if t <= 0:
                yield r
                r = []
                s = 0
                continue
        yield None
            
            
def fold(a,l):
    r = np.zeros(l,dtype=a.dtype)
    i = 0
    while i < len(a):
        if i+l < len(a):
            r += a[i:i+l]
        else:
            r[:len(a)-i] += a[i:]
        i += l
    return r

#https://sal.arizona.edu/sites/sal.lab.arizona.edu/files/2021-02/BStory_HandbookSinging_ChapterReprint.pdf
# has some poses of the vocal tract for modeling with the pipes model


#trying to derive transmission line frequency response from impedence function
#   dL/dl   dR/dl
# ---ꕊꕊꕊ--.-^.^.--
#         1 dC/dl
# --------T-------
#
#       a  1+k
# ---[]--> -> +--[]--->
#       kv    ^-k
# <--[]--+ <- <--[]---
#    z     1-k  a z`
# k = (z`-z)/(z`+z)
# z` = z+dz/dl
# k = (dz/dl)/(2z)
#     z'/2z
#
# f(x) = A(x)e^(ixw)?
# A(0) = 1
#
# f(x),b(x) forward and backwards waves
# f' = f*(z'/2z+(a-1))-b*(z'/2z)
# b' = -f*(z'/2z)+b*(z'/2z-(a-1))
# so f' = -b'
# 
# 
# f = (-b+C)?
# 
# f' = (A'+iwA)e^(ixw)
# 
##annoying diffeqs
# 
## note: after searching around for a while I finally found online that this has no closed form solution.
#

# https://www.researchgate.net/publication/3333429_A_Fast_Algorithm_for_Computing_the_Vocal-Tract_Impulse_Response_from_the_Transfer_Function
#


def prod_polydict(a,b):
    r = dict()
    for i in a:
        for j in b:
            if i+j not in r:
                r[i+j] = 0
            r[i+j] += a[i]*b[j]
    return r
def sum_polydict(a,b):
    r = dict()
    for i in a:
        r[i] = a[i]
    for j in b:
        if j in r:
            r[j] += b[j]
        else:
            r[j] = b[j]
    return r


#estimator for the impulse response of a tube from an area function
def est_tube_resp(area,attenuation=lambda x:.9,max_x=1,dx=33000/48000/17.5):
    #z~1/A
    areas = [area(i*dx) for i in range(int(max_x/dx)+1)]
    atns = [attenuation(i*dx)**dx for i in range(len(areas)-1)]
    return tube_resp(areas,atns)
def tube_resp(areas,atns=.9):
    ks = [(areas[i]-areas[i+1])/(areas[i]+areas[i+1]) for i in range(len(areas)-1)]    
    try:
        atns[0]
    except:
        atns = [atns]*len(ks)
    #          a  1+k
    # ---[z-1]--> -> +--[z-1]--->
    #          kv    ^-k
    # <--[z-1]--+ <- <--[z-1]---
    #             1-k  a 
    # =?
    #         a  1+k
    # ---[z-1]--> -> + -->
    #          kv    ^-k
    # ---[z+1]->+ ->  --->
    #            /(1-k) /a
    f = {0:1}
    b = dict()
    for i in range(len(ks)):
        f = prod_polydict(f,{-1:atns[i]})
        b = prod_polydict(b,{1:1/atns[i]})
        b = sum_polydict(b,prod_polydict(f,{0:ks[i]}))
        f = prod_polydict(f,{0:1+ks[i]})
        b = prod_polydict(b,{0:1/(1-ks[i])})
        f = sum_polydict(f,prod_polydict(b,{0:-ks[i]}))
    return prod_polydict(f,{len(ks)+1:1}),sum_polydict(prod_polydict(b,{len(ks)+1:1}),{0:1})
    
    
def filt_resp_dict(b,a={0:1}):
    def response(z,b=b,a=a):
        return sum(b[k]*(z**k) for k in b)/sum(a[k]*(z**k) for k in a)
    return response


def mouth_pipe_model():
    a = .99
    import filters as filt
    return filt.tlmesh({(0,2):(2,1,a),
                        (2,3):(2,1,a),
                        (3,4):(2,1,a),
                        (4,5):(2,1,a),
                        (5,6):(2,1,a),(5,20):(1,.01,a),
                        (6,7):(2,1,a),(20,21):(2,1,a),
                        (7,8):(4,1,a),(21,22):(2,1,a),
                        (8,9):(2,1,a),(22,23):(2,1,a),
                        (9,10):(2,1,a),(23,24):(2,1,a),
                                       (24,25):(2,1,a),

                        (10,1):(1,1000,.1),
                        (25,1):(1,1000,.1),
                        (1,70):(1,2000,0)
                        })

def interactive_mouth_pipe(f=None):
    if f is None:
        f = mouth_pipe_model()
    import matplotlib.pyplot as plt
    from matplotlib.widgets import Slider
    fig, ax = plt.subplots()
    axs = [plt.axes(b) for b in [[0.1+(.01+.07)*i, 0.1, 0.07, 0.4] for i in range(9)]]
    naxs = [plt.axes(b) for b in [[0.1+(.01+.07)*(i+4), 0.5, 0.07, 0.45] for i in range(5)]]
    vax = plt.axes([0.1+(.01+.07)*2,.425,.07*4,.05])
    alx = axs+[vax]+naxs
    ks = [(0,2)]+[(i,i+1) for i in range(2,9)]+[(9,10),(5,20)]+[(i,i+1) for i in range(20,24)]+[(24,25)]
    sliders = [Slider(ax=a,valmin=1e-6,valmax=5,orientation='vertical',valinit=1,label='') for a in axs]+\
        [Slider(ax=vax,valmin=1e-6,valmax=3,orientation='horizontal',valinit=.1,label='')]+\
        [Slider(ax=a,valmin=1e-6,valmax=5,orientation='vertical',valinit=1,label='') for a in naxs]
    def updates(i,us=[]):
        def u(v,k=ks[i]):
            f.set_admittance(k,v*v)
        us += [u]
        sliders[i].on_changed(u)
    for i in range(len(sliders)):
        updates(i)
    nogc = [sliders,updates]
    class mouth:
        def __init__(self,f,p,nogc):
            self.f = f
            self.p = p
            self.nogc = nogc
        def show(self):
            self.p.show(block=0)
        def vec(self):
            return {k:self.f.get_admittance(k) for k in ks}
        def set(self,v):
            for k in v:
                self.f.set_admittance(k,v[k])
        
    return mouth(f,plt,nogc)

#the model takes too long to process so this is what I used to hear it
# f = vs.interactive_mouth_pipe()
# f.show()
# mix.out = compress(pitch.flat(extent(cycle([complex(f.f(i/400-.5)*10000) for i in range(400)]),12000) for j in mute))

#found these poses from the singing paper:
pvecs = {'UW': {(0, 2): 1.0, (2, 3): 1.3129357942714264, (3, 4): 0.6308674928799389, (4, 5): 0.19599147542400772, (5, 6): 0.15258861084069333, (6, 7): 1.005216718140287, (7, 8): 1.8692036468511124, (8, 9): 1.4980758504237464, (9, 10): 0.19599147542400772, (5, 20): 1e-12, (20, 21): 0.12056391782493926, (21, 22): 0.6564013921046102, (22, 23): 1.1338322659471203, (23, 24): 0.7942455062056635, (24, 25): 0.10502461162638868}, 'AH': {(0, 2): 1.0, (2, 3): 0.761075930379913, (3, 4): 0.2990732397468867, (4, 5): 0.42385638427809985, (5, 6): 0.8546632366949996, (6, 7): 1.6616840423589478, (7, 8): 2.5234667944340576, (8, 9): 2.7345467555749914, (9, 10): 1.7986741090907037, (5, 20): 1e-12, (20, 21): 0.12056391782493926, (21, 22): 0.6564013921046102, (22, 23): 1.1338322659471203, (23, 24): 0.7942455062056635, (24, 25): 0.10502461162638868}, 'EE': {(0, 2): 1.0, (2, 3): 1.2832659124761838, (3, 4): 1.4980758504237464, (4, 5): 1.3429447601324254, (5, 6): 0.8546632366949996, (6, 7): 0.3587521394864515, (7, 8): 0.15258861084069333, (8, 9): 0.1629307008878892, (9, 10): 0.5316852734382291, (5, 20): 1e-12, (20, 21): 0.12056391782493926, (21, 22): 0.6564013921046102, (22, 23): 1.1338322659471203, (23, 24): 0.7942455062056635, (24, 25): 0.10502461162638868}}

#https://www.tandfonline.com/doi/pdf/10.1080/00335633209379886 has more but they're not in as nicely a translatable format


def mouth_hello(duration=3000):
    import random
    import filters as filt
    f = interactive_mouth_pipe()
    gl = difeq_glottal_p()
    
    phoneme_poses = [{(0, 2): 1.058113884481424, (2, 3): 1.112367387085583, (3, 4): 1.085071093750626, (4, 5): 1.1962907714849849, (5, 6): 0.8546632366949996, (6, 7): 0.8789077734381596, (7, 8): 1.0314957592779783, (8, 9): 0.9284140991217447, (9, 10): 1.0314957592779783, (5, 20): 1e-12, (20, 21): 0.15485754951216527, (21, 22): 0.4663131219464751, (22, 23): 0.9452176157413882, (23, 24): 0.5149403581540242, (24, 25): 0.12056391782493926},
                     {(0, 2): 1.058113884481424, (2, 3): 1.112367387085583, (3, 4): 1.085071093750626, (4, 5): 1.1962907714849849, (5, 6): 0.8546632366949996, (6, 7): 0.8789077734381596, (7, 8): 1.0314957592779783, (8, 9): 0.9284140991217447, (9, 10): 1.0314957592779783, (5, 20): 1e-12, (20, 21): 0.15485754951216527, (21, 22): 0.4663131219464751, (22, 23): 0.9452176157413882, (23, 24): 0.5149403581540242, (24, 25): 0.12056391782493926},
                     {(0, 2): 1.058113884481424, (2, 3): 1.112367387085583, (3, 4): 1.085071093750626, (4, 5): 1.1962907714849849, (5, 6): 0.8546632366949996, (6, 7): 1.3732928100591795, (7, 8): 1.6282842358403977, (8, 9): 0.20768990173422425, (9, 10): 0.476244950562266, (5, 20): 1e-12, (20, 21): 0.15485754951216527, (21, 22): 0.4663131219464751, (22, 23): 0.9452176157413882, (23, 24): 0.5149403581540242, (24, 25): 0.12056391782493926},
                     {(0, 2): 1.058113884481424, (2, 3): 1.112367387085583, (3, 4): 1.085071093750626, (4, 5): 1.1962907714849849, (5, 6): 0.8546632366949996, (6, 7): 1.3732928100591795, (7, 8): 1.6282842358403977, (8, 9): 1.3129357942714264, (9, 10): 0.37451957458573065, (5, 20): 1e-12, (20, 21): 0.15485754951216527, (21, 22): 0.4663131219464751, (22, 23): 0.9452176157413882, (23, 24): 0.5149403581540242, (24, 25): 0.12056391782493926},
                     {(0, 2): 1.058113884481424, (2, 3): 1.112367387085583, (3, 4): 1.085071093750626, (4, 5): 1.1962907714849849, (5, 6): 0.8307577840175955, (6, 7): 1.3732928100591795, (7, 8): 1.3429447601324254, (8, 9): 0.5703409090176474, (9, 10): 0.13292168294356768, (5, 20): 1e-12, (20, 21): 0.15485754951216527, (21, 22): 0.4663131219464751, (22, 23): 0.9452176157413882, (23, 24): 0.5149403581540242, (24, 25): 0.12056391782493926}]
    glottal_params = [(-0.028273809523809534, -0.015322580645161343, 0.4951171875),
                      (1.3956980519480522, 0.273891129032258, 0.4951171875),
                      (1.4136904761904763, 0.16088709677419355, 0.4951171875),
                      (1.2491883116883116, -0.009576612903225756, 0.4951171875),
                      (-0.02570346320346384, -0.02106854838709671, 0.4951171875)]
    times = [4,1,1,1,2]
    pd = list(phoneme_poses[0])

    h_noise_point = 0
    nd = duration*2
    nfilt = filt.iir2l(-1.9,.93,.02)
    
    polys = [sp.interpolate.lagrange(np.arange(len(glottal_params)),[i==j for j in range(len(glottal_params))]) for i in range(len(glottal_params))]
    t = 0
    for ts in times:
        for i in range(int(ts*duration)):
            t += 1/ts
            v = [p(t/duration) for p in polys]
            #yield v
            #continue
            pose = {k:max(0,sum(phoneme_poses[i][k]*v[i] for i in range(len(v)))) for k in pd}
            gp = tuple(sum(glottal_params[i][k]*v[i] for i in range(len(v))) for k in range(3))
            if t < nd:
                #h noise
                nf = ((nd-t)/nd)
                nf *= min(1,(1-nf)*10)
                nf **= 2
                n = nfilt(random.random())*nf
                n /= 3
            else:
                n = 0
            f.set(pose)
            f.f[h_noise_point] += n
            yield f.f(gl(gp[0],gp[1],dt=.1,d0=gp[2],fr=.1).real*(1+n)) #.03
    #['HH', 'AH0', 'L', 'OW1']


# presentation demos:
# setup:
# from a import *
# import samvoice as sv
# import voices as vs
# pe,frs = pitch.pitch_est_matrix(1024,4096,fargs=(3,4),args=(1,5,6,.5,8,1))
# w = fm.idbwlPolyl(fm.psaw,4,4)
# ga_pitches_vols = [(frs[np.argmax(pe@np.log1p(mag2(np.fft.fft(a))))],(np.sum(mag2(a))**.5)/10) for a in pitch.wchunk(iter(ga),4096,4)]
# g = iter(ga);gres = [w(o)*v+1j*next(g).real for o,v in ((frs[np.argmax(pe@np.log1p(mag2(np.fft.fft(a))))],(np.sum(mag2(a))**.5)/10) for a in pitch.wchunk(iter(ga),4096,4)) for j in range(1<<10)]
# 
# phs = [vs.phoneme(i) for i in vs.g2p(vs.panphoneme)]
#
# vo = pitch.vocode()
#


# #sam
# mix.out = ((i/896-.5)*(1+1j) for i in sv.untime(sv.tstf(" ".join(sv.g2p("Hello, my name is SAM.")))))
#
# #glottal pulse v1
# gl = vs.difeq_glottal_p()
# live_graph([lambda x: scope_trig([gl(s2.y,s2.x**4*40,dt=.1,d0=next(sl3),fr=.1).real for i in range(2048)])])
# 
# #live audio
# mix.out = (gl(s2.y,s2.x**4*40,dt=.1,d0=next(sl3),fr=.1).real*(1+1j) for i in mute)
#
# #pitch estimation
# mix.out = gres
#
# mix.out = (i.real*(1+1j) for i in gres)
#
#
# #phonemes
# mix.out = (i*(1+1j)/4 for i in vs.testfilt(phs,3000,29))
# mix.out = (i*(1+1j)/4 for i in vs.testfilt(vs.g2p("Four score and seven years ago"),3000,29))
#
# #filters
# vo.show()
# mix.out = (i for i in (vo.p(i) for i in ga) for j in range(1))
#
# mix.out = (vo.p(i) for i in compress(mix.mic.seek(2048),1))
#