-
Notifications
You must be signed in to change notification settings - Fork 16
/
prune.go
154 lines (135 loc) · 4.04 KB
/
prune.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
package cmd
import (
goio "io"
"math/rand"
"os"
"github.com/evolbioinfo/gotree/io"
"github.com/evolbioinfo/gotree/tree"
"github.com/spf13/cobra"
)
func specificTips(ref *tree.Tree, comp *tree.Tree) []string {
compmap := make(map[string]*tree.Node)
spectips := make([]string, 0)
for _, n := range comp.Nodes() {
if n.Nneigh() == 1 {
compmap[n.Name()] = n
}
}
for _, n := range ref.Nodes() {
if n.Nneigh() == 1 {
_, ok := compmap[n.Name()]
if !ok {
spectips = append(spectips, n.Name())
}
}
}
return spectips
}
// returns a random list of n tips from the tree
// uses reservoir sampling to select random tips
// if n>nb tips: returns the tips
func randomTips(tr *tree.Tree, n int) (sampled []string) {
sampled = make([]string, n)
total := 0
for i, tip := range tr.Tips() {
if i < n {
sampled[i] = tip.Name()
} else {
j := rand.Intn(i)
if j < n {
sampled[j] = tip.Name()
}
}
total++
}
if total < n {
sampled = sampled[:total]
}
return
}
var randomtips int
// pruneCmd represents the prune command
var pruneCmd = &cobra.Command{
Use: "prune",
Short: "Remove tips of the input tree that are not in the compared tree",
Long: `This tool removes tips of the input reference tree that :
1) Are not present in the compared tree (--comp <other tree>) if any or
2) Are present in the given tip file (--tipfile <file>) if any or
3) Are randomly sampled (--random <num tips>) or
4) Are given on the command line
If several trees are present in the file given by -i, they are all analyzed and
written in the output.
If -c and -f are not given, this command will take taxa names on command line, for example:
gotree prune -i reftree.nw -o outtree.nw t1 t2 t3
By order of priority:
1) -f --tipfile <tip file>
2) -c --comp <other tree>
3) --random <number of tips to randomly sample>
4) tips given on commandline
5) Nothing is done
If -r is given, behavior is reversed, it keep given tips instead of removing them.
`,
RunE: func(cmd *cobra.Command, args []string) (err error) {
var f *os.File
var comptree *tree.Tree
var treefile goio.Closer
var treechan <-chan tree.Trees
var specificTipNames []string
if f, err = openWriteFile(outtreefile); err != nil {
io.LogError(err)
return
}
defer closeWriteFile(f, outtreefile)
if intree2file != "none" {
if comptree, err = readTree(intree2file); err != nil {
io.LogError(err)
return
}
}
// Read ref Trees
if treefile, treechan, err = readTrees(intreefile); err != nil {
io.LogError(err)
return
}
defer treefile.Close()
var tips []string
if tipfile != "none" {
if tips, err = parseTipsFile(tipfile); err != nil {
io.LogError(err)
return
}
}
for reftree := range treechan {
if reftree.Err != nil {
io.LogError(reftree.Err)
return reftree.Err
}
if tipfile != "none" {
err = reftree.Tree.RemoveTips(revert, tips...)
} else if comptree != nil {
specificTipNames = specificTips(reftree.Tree, comptree)
err = reftree.Tree.RemoveTips(revert, specificTipNames...)
} else if randomtips > 0 {
sampled := randomTips(reftree.Tree, randomtips)
err = reftree.Tree.RemoveTips(revert, sampled...)
} else {
err = reftree.Tree.RemoveTips(revert, args...)
}
if err != nil {
io.LogError(err)
return
}
f.WriteString(reftree.Tree.Newick() + "\n")
}
return
},
}
func init() {
RootCmd.AddCommand(pruneCmd)
pruneCmd.Flags().StringVarP(&intreefile, "ref", "i", "stdin", "Input reference tree")
pruneCmd.Flags().StringVarP(&intree2file, "comp", "c", "none", "Input compared tree ")
pruneCmd.Flags().StringVarP(&outtreefile, "output", "o", "stdout", "Output tree")
pruneCmd.Flags().StringVarP(&tipfile, "tipfile", "f", "none", "Tip file")
pruneCmd.Flags().BoolVarP(&revert, "revert", "r", false, "If true, then revert the behavior: will keep only species given in the command line, or keep only the species that are specific to the input tree, or keep only randomly selected taxa")
pruneCmd.PersistentFlags().IntVar(&randomtips, "random", 0, "Number of tips to randomly sample")
}