forked from YongxinLiu/EasyAmplicon
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtutorial.Rmd
254 lines (188 loc) · 7.87 KB
/
tutorial.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
---
title: "Diversity tutorial多样性分析教程"
author: "Yong-Xin Liu(刘永鑫)"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Vignette Title}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r setup, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```
# 简介 Introduction
## 安装 Installation
The [amplicon R package](https://github.com/microbiota)
facilitates statistics and visualization of amplicon profiling data, in
particular 16S and ITS alpha / beta diversity, and taxonomy composition.
```{r install, eval=FALSE, include=TRUE}
# 基于github安装
# library(devtools)
# install_github("microbiota/amplicon")
#基于bioconductor安装,暂不可用
# library(BiocManager)
# BiocManager::install("amplicon")
```
加载R包 Load the package
```{r loading}
suppressWarnings(suppressMessages(library(amplicon)))
```
## 实验设计 Design/Metadata
设置基本参数:读取实验设计、设置分组列名、图片宽高
Setting the basic parameter
```{r parameter, warning=FALSE}
# Data reading
metadata = read.table("metadata.tsv", header=T, row.names=1, sep="\t", comment.char="", stringsAsFactors = F)
head(metadata, n = 3)
# colnames of group ID in metadata
# 设置实验分组列名
group = "Group"
# Output figure width and height
# Letter纸图片尺寸为单栏89 mm,双栏183 mm,页面最宽为247 mm
# 推荐比例16:10,即半版89 mm x 56 mm; 183 mm x 114 mm
width = 89
height = 59
# 手动指定分组列和顺序,默认为字母顺序
# metadata[[group]] = factor(metadata[[group]], levels = c("WT","KO","OE"))
# 按实验设计中分组出现顺序
# metadata[[group]] = factor(metadata[[group]], levels = unique(metadata[[group]]))
```
# α多样性 Alpha diversity
## 箱线图+统计 Boxplot+Statistics
绘制主要分三步:
Plotting each figure mainly include three step:
1. 读取数据并预览格式Reading data and viewing format;
2. 参数调整和绘图Paramters adjustment and plotting;
3. 保存图片Saving figure.
```{r alpha_boxplot, fig.show='asis', fig.width=4, fig.height=2.5}
# vegan.txt中还有6种常用α多样性,alpha.txt中有11种α多样性
alpha_div = read.table("alpha/vegan.txt", header=T, row.names=1, sep="\t", comment.char="")
head(alpha_div, n = 3)
# capitalize
library(Hmisc)
colnames(alpha_div) = capitalize(colnames(alpha_div))
colnames(alpha_div)
# 选择指数"Richness","Chao1","ACE","Shannon","Simpson","Invsimpson"
alpha_index = "Richness"
# Plotting alpha diversity Richness boxplot and stat
(p = alpha_boxplot(alpha_div, index = alpha_index, metadata, groupID = group))
# Saving figure
# 保存图片,大家可以修改图片名称和位置,长宽单位为毫米
ggsave(paste0("alpha/alpha_boxplot_",alpha_index,".pdf"), p, width = width, height = height, units = "mm")
# 另存图片用于后期拼图
p1 = p
# 尝试探索不同的多样性各类
colnames(alpha_div)
alpha_boxplot(alpha_div, index = "Shannon", metadata, groupID = group)
# 尝试不同的分组方式
colnames(metadata)
alpha_boxplot(alpha_div, index = "Chao1", metadata, groupID = "Site")
```
## 稀释曲线+标准误 Rarefaction curve and standard error
```{r alpha_rare, fig.show='asis', fig.width=4, fig.height=2.5}
# 数据读取 Data reading
alpha_rare = read.table("alpha/alpha_rare.txt", header=T, row.names=1, sep="\t", comment.char="")
alpha_rare[1:3, 1:3]
# 绘制稀释曲线+标准误 Plotting alpha rarefaction curve in group mean with standard error
(p = alpha_rare_curve(alpha_rare, metadata, groupID = group))
# 保存图片 Saving figure
ggsave("alpha/alpha_rarefaction_curve.pdf", p, width = width, height = height, units = "mm")
p2 = p
```
## 各组多样性种组比较 Venn/Upset/Sanky
命令行 sp_vennDiagram.sh 绘制维恩图
推荐使用`otu_group_exist.txt`在线 http://www.ehbio.com/ImageGP 绘制Venn/Upset/Sanky
# β多样性 Beta diversity
## 主坐标轴分析 PCoA
principal coordinate analysis (PCoA)
```{r beta_pcoa, fig.show='asis', fig.width=4, fig.height=2.5}
# 主坐标轴分析,可选距离矩阵bray_curtis、unifrac、unifrac_binary、jaccard、manhatten、euclidean
# 设置距离矩阵类似,常用bray_curtis或unifrac
distance_type = "bray_curtis"
# Data reading
distance_mat = read.table(paste0("beta/",distance_type,".txt"), header=T, row.names=1, sep="\t", comment.char="")
distance_mat[1:3, 1:3]
# Plotting Constrained PCoA based on distance matrix
(p = beta_pcoa(distance_mat, metadata, groupID = group))
# Saving figure
ggsave(paste0("beta/pcoa_",distance_type,".pdf"), p, width = width, height = height, units = "mm")
p3 = p
# statistic each pairwise by adonis
beta_pcoa_stat(distance_mat, metadata, groupID = group)
# 结果文件默认见beta_pcoa_stat.txt
```
## 有监督PCoA/CCA
Constrained PCoA
要求至少分组数量 >= 3时才可用,否则请跳过此步分析
```{r beta_cpcoa, fig.show='asis', fig.width=4, fig.height=2.5}
# Plotting Constrained PCoA
(p = beta_cpcoa_dis(distance_mat, metadata, groupID = group))
# Saving figure
ggsave(paste0("beta/cpcoa_",distance_type,".pdf"), p, width = width, height = height, units = "mm")
p4 = p
```
# 物种组成 Taxonomy stackplot
分别绘制样本和组均值的物种组成
Samples and groups
```{r taxonomy, fig.show='asis', fig.width=4, fig.height=2.5}
# 设置分析级别 门p、纲c、目o、科f、属g
tax_level = "p"
# Data reading
tax_phylum = read.table(paste0("tax/sum_", tax_level, ".txt"), header=T, row.names=1, sep="\t", comment.char="")
tax_phylum[1:3, 1:3]
# Plotting samples taxonomy composition
(p = tax_stackplot(tax_phylum, metadata, topN = 8, groupID = group, style = "sample", sorted = "abundance"))
p5 =p
ggsave(paste0("tax/", tax_level,"_sample.pdf"), p, width = width, height = height, units = "mm")
# Plotting groups taxonomy composition
(p = tax_stackplot(tax_phylum, metadata, topN = 8, groupID = group, style = "group", sorted = "abundance"))
p6 = p
ggsave(paste0("tax/", tax_level,"_group.pdf"), p, width = width, height = height, units = "mm")
# 按字母顺序排列
(p = tax_stackplot(tax_phylum, metadata, topN = 8, groupID = group, style = "group", sorted = "alphabet"))
```
```{r RColorBrewer}
# 修改配色方案
library(RColorBrewer)
display.brewer.all()
p = p + scale_fill_brewer(palette = "Set1")
p
```
## 弦图 circlize plot
```{r}
# 输出结果有当前目录下默认参数的弦图circlize.pdf,每次颜色随机
# 指定颜色和图例的弦图circlize_legend.pdf
tax_circlize(tax_phylum, metadata, topN = 5, groupID = group)
# 查看结果不美观,尝试不同水平,如以纲为例
tax_class = read.table(paste0("tax/sum_c.txt"), header=T, row.names=1, sep="\t", comment.char="")
tax_circlize(tax_class, metadata, topN = 8, groupID = group)
```
## 树图treemap
```{r}
# 读取特征表OTU/ASV
otutab = as.matrix(read.delim("../result/otutab.txt",row.names = 1))
# head(otutab)
# 读取7级物种注释
taxonomy = as.matrix(read.delim("../result/taxonomy.txt",row.names = 1))
# head(taxonomy)
# 指定显示的特征数量,与图中最低级别球形数量相同
topN = 200
# 数据转换为maptree格式
mapdata = format2maptree(otutab, taxonomy, topN)
#按照平均丰度修改大小和按照门水平上色颜色
mapadd = tax_maptree(mapdata)
(p = mapadd[[1]])
ggsave("tax_maptree.pdf", p, width = 183, height = 118, units = "mm" )
```
# 排版 Combo plots
组合多个子图为发表格式
Combo plots to published-ready figure
```{r div_combo, fig.show='asis', fig.width=6, fig.height=4}
library(cowplot)
(p0 = plot_grid(p1, p2, p3, p4, p5, p6, labels = c("A", "B", "C", "D", "E", "F"), ncol = 2))
ggsave("diversity.pdf", p0, width = width * 2, height = height * 3, units = "mm")
```