forked from nf-core/ampliseq
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
135 additions
and
120 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,29 @@ | ||
#!/usr/bin/env python3 | ||
#@author Daniel Straub | ||
# @author Daniel Straub | ||
# Takes one TSV count table from QIIME2 | ||
# and reports the maximum or minimum counts of all samples. | ||
|
||
import pandas as pd | ||
import sys | ||
|
||
#argument check | ||
if len(sys.argv) != 3 or sys.argv[2] not in ['maximum','minimum']: | ||
# argument check | ||
if len(sys.argv) != 3 or sys.argv[2] not in ["maximum", "minimum"]: | ||
exit("Usage: count_table_max_reads.py <feature-table.tsv> <maximum/minimum>") | ||
|
||
#read tsv and skip first two rows | ||
data = pd.read_csv(sys.argv[1], sep='\t', skiprows=[0,1], header=None) #count table | ||
# read tsv and skip first two rows | ||
data = pd.read_csv(sys.argv[1], sep="\t", skiprows=[0, 1], header=None) # count table | ||
|
||
#drop feature ids | ||
# drop feature ids | ||
df = data.drop(data.columns[0], axis=1) | ||
|
||
#make sums | ||
# make sums | ||
sums = df.sum() | ||
|
||
#determine maximum or minimum | ||
if sys.argv[2] == 'maximum': | ||
# determine maximum or minimum | ||
if sys.argv[2] == "maximum": | ||
out = int(sums.max()) | ||
elif sys.argv[2] == 'minimum': | ||
elif sys.argv[2] == "minimum": | ||
out = int(sums.min()) | ||
|
||
#print value | ||
print(out, end='') | ||
# print value | ||
print(out, end="") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,38 +1,38 @@ | ||
#!/usr/bin/env python3 | ||
#@author Daniel Straub | ||
# @author Daniel Straub | ||
# Takes two TSV count table from QIIME2 | ||
# and reports how much counts were filtered. | ||
|
||
import pandas as pd | ||
import sys | ||
|
||
#argument check | ||
# argument check | ||
if len(sys.argv) != 3: | ||
exit("Usage: count_table_max_reads.py <unfiltered_feature-table.tsv> <filtered_feature-table.tsv>") | ||
|
||
#read tsv and skip first two rows | ||
data_unfiltered = pd.read_csv(sys.argv[1], sep='\t', skiprows=None) #count table | ||
data_filtered = pd.read_csv(sys.argv[2], sep='\t', skiprows=[0]) #count table | ||
# read tsv and skip first two rows | ||
data_unfiltered = pd.read_csv(sys.argv[1], sep="\t", skiprows=None) # count table | ||
data_filtered = pd.read_csv(sys.argv[2], sep="\t", skiprows=[0]) # count table | ||
|
||
#drop feature ids | ||
# drop feature ids | ||
df_unfiltered = data_unfiltered.drop(data_unfiltered.columns[0], axis=1) | ||
df_filtered = data_filtered.drop(data_filtered.columns[0], axis=1) | ||
|
||
#make sample count sums | ||
# make sample count sums | ||
sums_unfiltered = df_unfiltered.sum() | ||
sums_filtered = df_filtered.sum() | ||
|
||
#merge dataframes | ||
out = sums_unfiltered.to_frame(name = 'unfiltered').join(sums_filtered.to_frame(name = 'filtered')) | ||
out['lost'] = out['unfiltered'] - out['filtered'] | ||
out['retained_percent'] = out['filtered'] / out['unfiltered'] *100 | ||
out['lost_percent'] = (100 - out['retained_percent']) | ||
# merge dataframes | ||
out = sums_unfiltered.to_frame(name="unfiltered").join(sums_filtered.to_frame(name="filtered")) | ||
out["lost"] = out["unfiltered"] - out["filtered"] | ||
out["retained_percent"] = out["filtered"] / out["unfiltered"] * 100 | ||
out["lost_percent"] = 100 - out["retained_percent"] | ||
|
||
#add column with sample names at beginning | ||
out = out.rename_axis('sample').reset_index() | ||
# add column with sample names at beginning | ||
out = out.rename_axis("sample").reset_index() | ||
|
||
#rename columns | ||
out = out.rename(columns={'unfiltered': 'input_tax_filter', 'filtered': 'filtered_tax_filter'}) | ||
# rename columns | ||
out = out.rename(columns={"unfiltered": "input_tax_filter", "filtered": "filtered_tax_filter"}) | ||
|
||
#write file | ||
out.to_csv('count_table_filter_stats.tsv', sep='\t', index=False) | ||
# write file | ||
out.to_csv("count_table_filter_stats.tsv", sep="\t", index=False) |
Oops, something went wrong.