-
Notifications
You must be signed in to change notification settings - Fork 6
/
xtest
executable file
·196 lines (175 loc) · 11.4 KB
/
xtest
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/bin/bash
set -uo pipefail
# This script will test many of the options SRAssembler can use and report if they produce the expected output.
# If the script is given the argument 'defaultpath', it will look for SRAssembler in the local PATH rather than using the version compiled from the git.
if [ $# -lt 1 ]; then
bindir=../bin/
else
if [ $1 == defaultpath ]; then
bindir=""
else
echo "Usage: $0 [defaultpath]"
exit
fi
fi
echo "============================================================================================================"
echo " Test1: Use the -1, -2 options to assign reads files."
echo "============================================================================================================"
out=testout1
rm -rf $out
rm -rf processed_reads
${bindir}SRAssembler -q input/LOC_Os06g04560.pep -p SRAssembler.conf -1 input/reads1_200.fq -2 input/reads2_200.fq -Z 200 -r ./processed_reads -R 5000 -o $out
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test2: Use a cDNA sequence as query input."
echo "============================================================================================================"
out=testout2
rm -rf $out
# Because we have existing pre-processed reads, we do not need to specify the number of reads per split file with -R.
${bindir}SRAssembler -q input/AT1G01950.cdna -t dna -p SRAssembler.conf -l libraries_200bp.conf -r ./processed_reads -o $out
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test3: Use two libraries with a library definition file. Only the second library still needs to be processed."
echo "============================================================================================================"
out=testout3
rm -rf $out
${bindir}SRAssembler -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp_1kb.conf -r ./processed_reads -R 5000 -o $out
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test4: Previous example, but using the MPI implementation and require an extra 2 rounds of assembly."
echo "============================================================================================================"
out=testout4
rm -rf $out
mpirun -np 4 ${bindir}SRAssembler_MPI -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp_1kb.conf -r ./processed_reads -o $out -E 2
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test5.1: Stop after 3 rounds of assembly. Then continue the current assembly with a new SRAssembler run."
echo "============================================================================================================"
out=testout5.1
rm -rf testout5 testout5.1 testout5.2 testout5.3
${bindir}SRAssembler -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp.conf -r ./processed_reads -o testout5 -n 3
cp -r testout5 $out
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test5.2: Re-run the previous command, but set -n to 7. The assembly will start with round 4."
echo "============================================================================================================"
out=testout5.2
${bindir}SRAssembler -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp.conf -r ./processed_reads -o testout5 -n 7
cp -r testout5 $out
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test5.3: Re-run the previous command, but use -y to force SRAssembler to start from scratch."
echo "============================================================================================================"
out=testout5.3
${bindir}SRAssembler -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp.conf -r ./processed_reads -o testout5 -n 7 -y
mv testout5 $out
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test6: Use GeneSeqer as the spliced alignment program (-S 0)."
echo "============================================================================================================"
out=testout6
rm -rf $out
${bindir}SRAssembler -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp.conf -r ./processed_reads -o $out -S 0
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test7: Use Abyss as the assembly program (-A 1)."
echo "============================================================================================================"
out=testout7
rm -rf $out
${bindir}SRAssembler -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp_1kb.conf -r ./processed_reads -o $out -A 1
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test8: Use Abyss as the assembly program with the MPI implementation."
echo "============================================================================================================"
out=testout8
rm -rf $out
mpirun -np 4 ${bindir}SRAssembler_MPI -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp_1kb.conf -r ./processed_reads -o $out -A 1
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test9: Looking for potentially better alignments ..."
echo " Use higher score (-e 0.6) and coverage (-c 0.9) values to declare spliced alignment hits."
echo " Only report contigs of at least 7000 nucleotides (-m 7000)."
echo " Run the Snap ab initio gene prediction program on the final contigs (-G 1)."
echo "============================================================================================================"
out=testout9
rm -rf $out
${bindir}SRAssembler -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp.conf -r ./processed_reads -o $out -e 0.6 -c 0.9 -m 7000 -G 1
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
diff -q standards/${out}.ano ${out}/output.ano
echo "============================================================================================================"
echo " Test10: Remove unrelated contigs and reads every 2 rounds (-b 2)."
echo "============================================================================================================"
out=testout10
rm -rf $out
${bindir}SRAssembler -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp.conf -r ./processed_reads -o $out -b 2
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test11: Assemble contigs only from round 3 onward (-a 3)."
echo "============================================================================================================"
out=testout11
rm -rf $out
${bindir}SRAssembler -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp.conf -r ./processed_reads -o $out -a 3
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test12: Do not check if the query genes are assembled until the last round (-f)."
echo "============================================================================================================"
out=testout12
rm -rf $out
# Here we specify that SRAssembler should continue for 13 rounds because it causes it to hit the default maximum contig length (10000 bp).
${bindir}SRAssembler -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp.conf -r ./processed_reads -o $out -n 13 -f -M 10000
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test13: Use a DNA taboo sequence to restrict certain reads from being collected or used."
echo "============================================================================================================"
out=testout13
rm -rf $out
${bindir}SRAssembler -q input/AT1G01950.cdna -t dna -p SRAssembler.conf -l libraries_200bp.conf -r ./processed_reads -j input/taboo.dna -J dna -o $out
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test14: Use a peptide taboo sequence to restrict certain reads from being collected or used."
echo "============================================================================================================"
out=testout14
rm -rf $out
${bindir}SRAssembler -q input/LOC_Os06g04560.pep -p SRAssembler.conf -l libraries_200bp.conf -r ./processed_reads -j input/taboo.pep -J protein -o $out
echo "Checking results against standards..."
diff -qs standards/${out}.html ${out}/summary.html
echo "============================================================================================================"
echo " Test15: Pre-process reads only (-P)."
echo "============================================================================================================"
out=testout15
rm -rf $out
rm -rf processed_reads
${bindir}SRAssembler -l libraries_200bp_1kb.conf -r ./processed_reads -R 5000 -o $out -P
echo "Checking for preprocessed reads..."
if [[ -e processed_reads/reads1_200library/reads2_200_chunk10.al1 && -e processed_reads/1kb_lib/reads2_1000_chunk10.al1 ]]; then
printf "Pre-processing successful.\n"
fi
echo "============================================================================================================"
echo " Confirm results of all tests."
echo "============================================================================================================"
for file in testout{1,2,3,4,5.1,5.2,5.3,6,7,8,9,10,11,12,13,14}; do
diff -qs standards/${file}.html ${file}/summary.html
if [[ "$file" = "testout9" ]]; then
diff -q standards/${file}.ano ${file}/output.ano
fi
done
if [[ -e processed_reads/reads1_200library/reads2_200_chunk10.al1 && -e processed_reads/1kb_lib/reads2_1000_chunk10.al1 ]]; then
printf "Preprocessing-only test successful.\n"
else
printf "Preprocessing-only test unsuccessful.\n"
fi