-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgenerate_count_dataframe.nf
58 lines (52 loc) · 1.91 KB
/
generate_count_dataframe.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env nextflow
/*==========================
==========================*/
process GENERATE_COUNT_DATAFRAME {
input:
path countfiles
output:
path("*.tsv"), emit: count_df
script:
"""
### RPM count files
# Get feature names from one of the count files
cat `ls *collapsed.count | head -n 1` \\
| awk '{print \$1}' \\
> FCount.rpm.all-features
echo "Feature" > Count_Header_File.txt
for f in *rpm_collapsed.count; do
echo \$f
file_base=\$(basename \$f)
filename="\$( cut -d '.' -f 1 <<< "\$file_base" )"
my_header=\$(cat Count_Header_File.txt)
new_header=\$(echo -e \$my_header"\t"\$filename)
echo \$new_header > Count_Header_File.temp.txt
mv Count_Header_File.temp.txt Count_Header_File.txt
awk '{print \$2}' \$f | paste FCount.rpm.all-features - \\
>> FCount.rpm.temp
mv FCount.rpm.temp FCount.rpm.all-features
done
cat Count_Header_File.txt FCount.rpm.all-features \\
> All-Features_normalised-counts.tsv
### Raw count files
# Get feature names from one of the count files
cat `ls *counts_collapsed.count | head -n 1` \\
| awk '{print \$1}' \\
> FCount.raw.all-features
echo "Feature" > Count_Header_File.txt
for f in *counts_collapsed.count; do
echo \$f
file_base=\$(basename \$f)
filename="\$( cut -d '.' -f 1 <<< "\$file_base" )"
my_header=\$(cat Count_Header_File.txt)
new_header=\$(echo -e \$my_header"\t"\$filename)
echo \$new_header > Count_Header_File.temp.txt
mv Count_Header_File.temp.txt Count_Header_File.txt
awk '{print \$2}' \$f | paste FCount.raw.all-features - \\
>> FCount.raw.temp
mv FCount.raw.temp FCount.raw.all-features
done
cat Count_Header_File.txt FCount.raw.all-features \
> All-Features_raw-counts.tsv
"""
}