Skip to content

Commit

Permalink
bug fix in gamma optimization, non-tree child nets
Browse files Browse the repository at this point in the history
  • Loading branch information
cecileane committed May 28, 2020
1 parent dcb64f3 commit 96cf717
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 32 deletions.
79 changes: 47 additions & 32 deletions src/phyLiNCoptimization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,17 @@ struct CacheGammaLiNC
clike::Vector{Float64}
"conditional likelihood under partner edge, length nsites"
clikp::Vector{Float64}
"""conditional likelihood under displayed trees that have
Neither the focus hybrid edge Nor its partner. Length: nsites"""
clikn::Vector{Float64}
"""whether displayed trees have the focus hybrid edge (true)
the partner edge (false) or none of them (missing),
which can happen on non-tree-child networks"""
hase::Vector{Union{Missing, Bool}}
end
function CacheGammaLiNC(obj::SSM)
CacheGammaLiNC(
Vector{Float64}(undef, obj.nsites),
Vector{Float64}(undef, obj.nsites),
Vector{Float64}(undef, obj.nsites),
Vector{Union{Missing, Bool}}(undef, length(obj.displayedtree))
Expand Down Expand Up @@ -394,7 +398,7 @@ end
ftolRel::Float64, ftolAbs::Float64,
xtolRel::Float64, xtolAbs::Float64,
alphamin::Float64, alphamax::Float64,
γcache::CacheGammaLiNC
γcache::CacheGammaLiNC)
Estimate one phylogenetic network (or tree) from concatenated DNA data,
like [`phyLiNC!`](@ref), but doing one run only, and taking as input an
Expand Down Expand Up @@ -816,8 +820,13 @@ Deletes a random hybrid edge and updates SSM object as part of
PhyLiNC optimization.
Return true if the move is accepted, false if not.
Note that if `net` has no hybrid ladders, then deleting an existing
reticulation cannot create a hybrid ladder. (triple-check!)
Note: if net is tree-child and a hybrid edge is deleted, then the
resulting network is still tree-child.
But: if `net` has no hybrid ladders, deleting an existing reticulation
may create a hybrid ladder. It happens if there is a reticulation above a
W structure (tree node whose children are both hybrid nodes).
This creates a problem if the user asked for `nohybridladder`:
this request may not be met.
For a description of arguments, see [`phyLiNC!`](@ref).
Called by [`optimizestructure!`](@ref), which does some checks.
Expand Down Expand Up @@ -1097,18 +1106,6 @@ function CacheLengthLiNC(obj::SSM,
)
end

function updatecache_tolerance!(lcache::CacheLengthLiNC,
ftolRel::Float64, ftolAbs::Float64, xtolRel::Float64, xtolAbs::Float64,
maxeval::Integer)
opt = lcache.opt
NLopt.ftol_rel!(opt,ftolRel)
NLopt.ftol_abs!(opt,ftolAbs)
NLopt.xtol_rel!(opt,xtolRel)
NLopt.xtol_abs!(opt,xtolAbs)
NLopt.maxeval!(opt, maxeval)
return nothing
end

"""
updatecache_edge!(lcache::CacheLengthLiNC, obj::SSM, focusedge)
Expand Down Expand Up @@ -1565,9 +1562,11 @@ function optimizegamma_LiNC!(obj::SSM, focusedge::Edge,
partnernum = partner.number
clike = cache.clike # conditional likelihood under focus edge
clikp = cache.clikp # conditional likelihood under partner edge
clikn = cache.clikn # conditional lik under no focus edge nor partner edge
ulik = obj._sitecache # unconditional likelihood and more
llca = obj._loglikcache
fill!(clike, 0.0); fill!(clikp, 0.0)
fill!(clikn, 0.0)
nt, hase = updatecache_hase!(cache, obj, edgenum, partnernum)
γ0 = focusedge.gamma
if γ0<1e-7 # then prior weight and loglikcachetoo small (-Inf if γ0=0)
Expand All @@ -1588,13 +1587,20 @@ function optimizegamma_LiNC!(obj::SSM, focusedge::Edge,
# obj._loglikcache[site,rate,tree] = log P(site | tree,rate) + log gamma(tree)
cadjust = maximum(view(llca, :,:,1:nt)) # to avoid underflows
nr = length(obj.ratemodel.ratemultiplier)
visib = true # whether the reticulation is visible in *all* displayed trees
for it in 1:nt # sum likelihood over all displayed trees
@inbounds h = hase[it]
ismissing(h) && continue # skip below if tree doesn't have e or partner
for ir in 1:nr # sum over all rate categories
if h
if ismissing(h) # tree doesn't have e nor partner
visib = false
for ir in 1:nr # sum over all rate categories
clikn .+= exp.(llca[:,ir,it] .- cadjust)
end
elseif h # tree has e
for ir in 1:nr # sum over all rate categories
clike .+= exp.(llca[:,ir,it] .- cadjust)
else
end
else # tree has partner edge
for ir in 1:nr
clikp .+= exp.(llca[:,ir,it] .- cadjust)
end
end
Expand All @@ -1604,37 +1610,44 @@ function optimizegamma_LiNC!(obj::SSM, focusedge::Edge,
clikp ./= 1.0 - γ0
adjustment = obj.totalsiteweight * (log(nr) - cadjust)
ll = obj.loglik + adjustment
noweights = obj.siteweight === nothing
wsum = (obj.siteweight === nothing ? sum : x -> sum(obj.siteweight .* x))
# evaluate if best γ is at the boundary: 0 or 1
ulik .= (clike .- clikp) ./ clikp # at γ=0, get derivative of loglik
llg0 = (noweights ? sum(ulik) : sum(obj.siteweight .* ulik))
if visib # true most of the time (all the time if tree-child)
ulik .= (clike .- clikp) ./ clikp # avoids adding extra long vector of zeros
else ulik .= (clike .- clikp) ./ (clikp .+ clikn); end
# derivative of loglik at γ=0
llg0 = wsum(ulik)
inside01 = true
if llg0 < 0
γ = 0.0
inside01 = false
ll = (noweights ? sum(log.(clikp)) : sum(obj.siteweight .* log.(clikp)))
ll = wsum((visib ? log.(clikp) : log.(clikp .+ clikn)))
@debug "γ = 0 best, skip Newton-Raphson"
else
ulik .= (clike .- clikp) ./ clike # at γ=1
llg1 = (noweights ? sum(ulik) : sum(obj.siteweight .* ulik))
else # at γ=1
if visib
ulik .= (clike .- clikp) ./ clike
else ulik .= (clike .- clikp) ./ (clike .+ clikn); end
llg1 = wsum(ulik)
if llg1 > 0
γ = 1.0
inside01 = false
ll = (noweights ? sum(log.(clike)) : sum(obj.siteweight .* log.(clike)))
ll = wsum((visib ? log.(clike) : log.(clike .+ clikn)))
@debug "γ = 1 best, skip Newton-Raphson"
end
end
if inside01
# use interpolation to get a good starting point? γ = llg0 / (llg0 - llg1) in [0,1]
γ = γ0
ulik .= γ .* clike + (1.0-γ) .* clikp
if visib
ulik .= γ .* clike + (1.0-γ) .* clikp
else ulik .= γ .* clike + (1.0-γ) .* clikp .+ clikn; end
# ll: rescaled log likelihood. llg = gradient, llh = hessian below
for istep in 1:maxNR
# ll from above is also: sum(obj.siteweight .* log.(ulik)))
ulik .= (clike .- clikp) ./ ulik # gradient of unconditional lik
llg = (noweights ? sum(ulik) : sum(obj.siteweight .* ulik))
llg = wsum(ulik)
map!(x -> x^2, ulik, ulik) # now ulik = -hessian contributions
llh = (noweights ? -sum(ulik) : -sum(obj.siteweight .* ulik))
llh = - wsum(ulik)
= γ - llg/llh # candidate γ: will be new γ if inside (0,1)
if>= 1.0
γ = γ/2 + 0.5
Expand All @@ -1643,8 +1656,10 @@ function optimizegamma_LiNC!(obj::SSM, focusedge::Edge,
else
γ =
end
ulik .= γ .* clike + (1.0-γ) .* clikp
ll_new = (noweights ? sum(log.(ulik)) : sum(obj.siteweight .* log.(ulik)))
if visib
ulik .= γ .* clike + (1.0-γ) .* clikp
else ulik .= γ .* clike + (1.0-γ) .* clikp + clikn; end
ll_new = wsum(log.(ulik))
lldiff = ll_new - ll
ll = ll_new
lldiff < ftolAbs && break
Expand Down
20 changes: 20 additions & 0 deletions test/test_phyLiNCoptimization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,26 @@ lengthe = obj.net.edge[44].length
@test_nowarn PhyloNetworks.optimizelocalgammas_LiNC!(obj, obj.net.edge[4], 1e-6,γcache)
@test obj.net.edge[4].gamma == 0.0
@test PhyloNetworks.getMajorParentEdge(obj.net.hybrid[1]).gamma == 1.0

# gamma at a hybrid ladder: when some displayed trees don't have the focus edge
# 2 unzipped reticulations in a hybrid ladder, reasonable (small) branch lengths
net = readTopology("(#H2:0.0001::0.0244,((C:0.0262,((B:0.0)#H1:0.0::0.6)#H2:0.03::0.9756):0.4812,(#H1:0.0001::0.4,A:0.1274):0.0001):0.0274,D:0.151);")
obj = PhyloNetworks.StatisticalSubstitutionModel(net, fastasimple, :JC69)
obj.trait[1][3] = 3 # to create discordance across sites
obj.trait[4][4] = 2 # for estimated γ to be within (0,1)
γcache = PhyloNetworks.CacheGammaLiNC(obj)
PhyloNetworks.discrete_corelikelihood!(obj) # -29.82754754416619
ll = PhyloNetworks.optimizegamma_LiNC!(obj, obj.net.edge[4], .001, γcache, 3)
@test ll -29.655467369763585
@test obj.net.edge[4].gamma 0.8090635871910823
@test γcache.hase == [true, true, false]
@test all(γcache.clikn .== 0.0)
ll = PhyloNetworks.optimizegamma_LiNC!(obj, obj.net.edge[1], .001, γcache, 3)
@test ll -29.468517891012983
@test obj.net.edge[1].gamma 0.19975558937688737
@test γcache.hase[[1,2]] == [false,true]
@test γcache.hase[3] === missing
@test γcache.clikn[[6,7]] [0.25, 0.0007] rtol=0.01
end #of local branch length and gamma optimization with localgamma! localBL! with 8 sites

@testset "global branch length and gamma optimization" begin
Expand Down

0 comments on commit 96cf717

Please sign in to comment.