-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_bwa.sh
executable file
·170 lines (155 loc) · 4.23 KB
/
run_bwa.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/bin/csh -f
# Extract unmapped reads and re-map them to the exon-exon junctions
set BWA_EXE = "bwa"
set BWA_OPTIONS = "aln -q 15 -n 0.04 -o 0 -t 8"
set SAMTOOLS_EXE = "samtools"
set GZIP_EXE = "gzip"
set PERL_EXE = "perl"
set XARGS_EXE = "xargs"
set TMP_DIR = "/tmp"
set change_path = "Please change the path to execulate in this script."
if (`which $BWA_EXE >& /dev/null; echo $?` == 1) then
echo "Can't run bwa!"
echo $change_path
exit;
endif
if (`which $SAMTOOLS_EXE >& /dev/null; echo $?` == 1) then
echo "Can't run samtools!"
echo $change_path
exit;
endif
if (`which $GZIP_EXE >& /dev/null; echo $?` == 1) then
echo "Can't run gzip!"
echo $change_path
exit;
endif
if (`which $PERL_EXE >& /dev/null; echo $?` == 1) then
echo "Can't run perl!"
echo $change_path
exit;
endif
if (`which $XARGS_EXE >& /dev/null; echo $?` == 1) then
echo "Can't run xargs!"
echo $change_path
exit;
endif
# Usage
set usage = "Usage: \
"$0" -o output_suffix \
-i genome_index_prefix1 [-i genome_index_prefix2 -i ...] \
file1.bam [file2.bam ...]"
# Input check
set n_args = $#argv
if ($n_args == 0) then
echo $usage:q
exit;
endif
# Parsing input
set genomes = ()
set files = ()
set i = 1
while ($i <= $n_args)
set arg = $argv[$i]
if ("$arg" == "-i" || "$arg" == "-o") then
@ i = $i + 1
if ("$arg" == "-i" && $i <= $n_args) then
set genomes = ($genomes $argv[$i])
endif
if ("$arg" == "-o" && $i <= $n_args) then
set suff = $argv[$i]
endif
else
set files = ($files $arg)
endif
@ i = $i + 1
end
# Checking suffix
if ($?suff == 0) then
echo "No output suffix provide!"
echo $usage:q
exit;
endif
# Checking genome indices
if ($#genomes <= 0) then
echo "No genome indices provide!"
echo $usage:q
exit;
endif
foreach g ($genomes)
if (`ls $g.* >& /dev/null; echo $?` == 1) then
echo "No files resembling index "$g" found!"
echo "Please check that provided prefix is correct."
exit
endif
end
# Checking input files
if ($#files <= 0) then
echo "No input files provide!"
echo $usage:q
exit;
endif
foreach f ($files)
if (! -e $f) then
echo "File "$f" doesn't exist ..."
exit
else
if (! -r $f) then
echo "File "$f" isn't readable ..."
exit
endif
endif
end
# Generating name for temporary file
set fq = $TMP_DIR"/rdup_"`date '+%S'``date '+%N'`".fastq.gz"
if (-e $fq) then
echo "File "$fq" exists ..."
set fq = $TMP_DIR"/rdup_"`date '+%S'``date '+%N'`".fastq.gz"
endif
if (-e $fq) then
echo "File "$fq" exists!"
echo "Can't generate unique name for .fastq file."
echo "Aborting ..."
echo "Please restart the script!"
exit
endif
# Selcting secondary reads
echo "Creating file "$fq" with unmapped reads from the following files: "
foreach f ($files)
echo " "$f
end
ls $files | $XARGS_EXE -i $SAMTOOLS_EXE view -f 0x4 {} | \
$PERL_EXE -ane ' ($flag,$seq,$qual) = ($F[1],$F[9],$F[10]); \
if ($flag & 0x10) { \
$seq = reverse($seq); \
$seq =~ tr/[ACTGactg]/[TGACtgac]/; \
$qual = reverse($qual); \
} \
print "\@",$F[0],"\n"; \
print $seq,"\n"; \
print "+\n"; \
print $qual,"\n";' | \
$GZIP_EXE > $fq
# Remapping
set alis = ()
foreach g ($genomes)
if (`ls $g.* >& /dev/null; echo $?` == 1) then
echo "No files resembling index "$g" found!"
echo "Please check that provided prefix is correct."
else
echo "Aligning to genome index "$g" ..."
set prefix = `echo $g | rev | cut -f 1 -d "/" | rev`
set out = $prefix"."$suff".sam.gz"
$BWA_EXE $BWA_OPTIONS $g $fq | $BWA_EXE samse $g - $fq | \
$SAMTOOLS_EXE view -q 20 -F 0x4 -S - | \
$GZIP_EXE > $out
set alis = ($alis $out)
endif
end
echo "Created the following files with alignments:"
foreach o ($out)
echo " "$o
end
# Deleting temporary file
echo "Deleting file "$fq" ..."
rm $fq
exit;