-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrna_pipeline.cfg
178 lines (168 loc) · 8.3 KB
/
rna_pipeline.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# pipeLine configuration Documentation
#Note:
; The current configuration file has 9 sections, the `global` section's option can used for all other sections.
; so you need to config these options, some of these parameters have a default assignment mechanism,
; so you can level these options empty such as [global.outdir | global.sample | global.thread | global.genomeDir]
; some of these parameters have [required] annotation on themselves,
; which must be configuration, such as [global.fq1 | global.fq2].
; There is one thing you need to know:
; the program will use the `global.genomeDir` parameter to determine whether to run star index step.
; More detailed description please see the annotation on themselves.
; Another you need to know is barcode section operation:
; In step of the barcode split step, there are two modes, one of them is split barcode
; with parametric mode which need provided `barcode.chemistry_config`, in the mode,you
; can configuration the valid params such as `barcode`,`linke`,`polyt` and `qual`,another
; mode is nonparametric mode, which you only can valid `polyt` and `qual`. the more detailed
; information you can view the annotation on themselves;
[global]
; output dir. must exist;if not provided,will use current dir.
outdir=
; sample name. if not provided,will extract from input fq1 file name,use `_` split and take first part.
sample=
; run thread num. if not provided,will use default 4.
thread=
; is gzip output file,if you want to output plain text file format,you can change value from `True` to `False`.
gzip=True
;[required] for 10X scRNA0seq, can take multiple comma-separated values.Doing this will treat all reads from the library,
; across flow cells, as one sample.
fq1=
;[required] RNA sequencing, format as fq1.
fq2=
;genome dir:
; if not provided
; -> will use current dir,the star index will run and output in current dir.
; In this case,you must provided `[mkref]` options param below.
; if provided,
; -> if `genome.config` file exist in provided path,which mean star index already run,
; and will not run star index.
; -> if `genome.config` file not exist in provided path, you must provided
; `mkref.fna`,`mkref.gtf`,`mkref.genome_name` below.
genomeDir=
[mkref]
;[take effect when need run star index] genome fasta file.
fasta=
;[take effect when need run star index] genome gtf file.
gtf=
; star index param ,
genomeSAindexNbases=14
; control the `-geneNameAsName2` option of gtfToGenePred command
; - true, contain this option and outfile while use gene name as first column.
; - false, without this option and outfile while use gene id as first column.
gene_name_as_name2=true
; Mitochondria gene list file which fmt is plain text file with one gene per line.
; if not provided, will use `mt-` and `MT-` to determine mitochondria genes.
; if provided this file,please use absolute path or relative path to `global.genomeDir`
mt_gene_list=
;[take effect when need run star index] genome name.
genome_name=
[sample]
;chemistry version
chemistry=
[barcode]
; chemistry library can be specified through two method;
; 1. you can use chemistry_name param. Now a library is preset which
; name KitVersion1(named it default will also take effect),
; At the same time, the pattern and chemistry_config parameters
; will not take effect
; 2. use pattern and chemistry_config specify chemistry library,
; You can read the following instructions to learn how to use it
chemistry_name=
;[required if chemistry_name not provided] The pattern of sequences,eg:C8L6C8L6C8U8T30.
; The number after the letter represents the number of bases,
; `C`, `L`, `U `and `T` represent `barcode`,`linker`,`UMI`
; and `poly T` respectively
pattern=
;[take effect only when run barcode split with param],
; if provided,please use absolute file path,which file content similar to below:
;-------------------------------------------------------
;[chemistry]
;barcode=Barcode1.list,Barcode2.list,Barcode3.list
;link=Link1.list,Link2.list
;-------------------------------------------------------
; the barcode and link valid file fmt all are plain text file with one sequences per line.
; And multi-file uer separate with comma. finally, please note that these files are in
; the same directory with chemistry_config file
chemistry_config=
;[take effect only when provided chemistry_config] valid link switch,default not valid link
; change from `False` to `True` only when you run with parametric mode,in this mode you must provided
; chemistry_config first!
use_link_valid_reads=True
;[take effect only when valid linker] the link diff num must less than provided value
allow_link_diff_num=2
;[take effect only when provided chemistry_config] valid barcode switch, default not valid barcode
; change from `False` to `True` only when you run with parametric mode,in this mode you must provided
; chemistry_config first!
use_barcode_valid_reads=True
;[take effect only when valid barcode] the barcode diff num must less than provided value
allow_barcode_diff_num=1
; valid poly T switch,select from [False,True],default not valid poly T
use_polyt_valid_reads=True
;[take effect only when valid poly T] number of T base in the polyT must greater than provided value times plotT length
polyt_rate=0.7
; valid QUAL value,provided 0 means not valid QUAL,if you provided a number greater than 0, that
; mean the base in barcode which extract from sequences will be check the qual whether lower than
; provided qual value. if lower than provided value, will be regarded as low quality base
low_qual=0
; [take effect only when low_qual not is 0],allow max value of low quality base number
low_num=2
; is output r1 file,select from [`False`, `True`],default is False,which mean not output r1 file
output_r1=False
[cutadapt]
;[Optional] Additional adapter fasta file.
adapter_fasta=
; Discard processed reads that are shorter than LENGTH.
minimum_length=20
; Quality trimming of reads using two-color chemistry (NextSeq).
; Some Illumina instruments use a two-color chemistry to encode the four bases.
; This includes the NextSeq and the NovaSeq.
; In those instruments, a `dark cycle` (with no detected color) encodes a G.
; However, dark cycles also occur when sequencing `falls off` the end of the fragment.
; The read then contains a run of high-quality, but incorrect `G` calls at its 3' end.
nextseq_trim=20
; Since Cutadapt allows partial matches between the read and the adapter sequence,
; short matches can occur by chance, leading to erroneously trimmed bases.
; For example, roughly 0.25 of all reads end with a base that is identical to the first base of the adapter.
; To reduce the number of falsely trimmed bases, the alignment algorithm requires that
; at least {overlap} bases match between adapter and read.
overlap=10
; Read2 insert length.
insert=150
;[Optional]Other cutadapt parameters,eg: `-g XXX`
cutadapt_param=
[star]
; is output unmapped reads. select from [`True`,`False`],default False,which mean not output unmapped reads.
out_unmapped=False
; Alignment will be output only if the number of matched bases is higher than or equal to this value.
out_filter_match_n_min=0
; How many places are allowed to match a read at most.
out_filter_multimap_n_max=1
; Maximum memory that picard can use.
picard_mem=40
; Maximum memory that STAR can use.
star_mem=40
;[Optional]Additional parameters from star software,eg: --param1 value1 --param1 value1
; you should be familiar with star software.
star_param=
[featureCounts]
; Specify attribute type in GTF annotation. Meta-features used for read counting will be
; extracted from annotation using the provided value(gene or exon).
gtf_type=gene
;[Optional]Additional parameters for the featureCounts software
feature_counts_param=
[count]
; Expected cell number
expected_cell_num=3000
; when correct barcode and umi, the umis count less than this value will
; be discard.set this params will accelerated running speed but some data will be discarded.
n_umi_filter=5
; when barcode correct,low_umis_count/high_umis_count need less than this value.
; if set 1,merge low to high for all match case.
barcode_correct_limit=0.01
; when umi correct,low count/high count need less than this value.
; if set 1,merge low to high for all match case.
umi_correct_limit=0.1
;[Optional]Force the cell number to be this number. default None, you can provided a integer value for this param
force_cell_num=
;Choose from [`auto`, `EmptyDrops_CR`]
cell_calling_method=EmptyDrops_CR
[analysis]