Skip to content

Commit a8aeeea

Browse files
authored
Add files via upload
main files in gp folder
1 parent 1e034e7 commit a8aeeea

3 files changed

+255
-0
lines changed

gp/KEGG_gene_annotation.R

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#------gene annotat: biomaRt--------
2+
ens_to_name <- function(ens_names) {
3+
require("biomaRt")
4+
ensembl = useMart(biomart="ENSEMBL_MART_ENSEMBL",
5+
#host="asia.ensembl.org",
6+
host="uswest.ensembl.org",
7+
# host="www.ensembl.org",
8+
# host="grch37.ensembl.org",
9+
path="/biomart/martservice",
10+
dataset="hsapiens_gene_ensembl")
11+
mapping <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'hgnc_symbol', 'entrezgene', 'description'),
12+
filters = 'ensembl_gene_id',
13+
values = ens_names,
14+
mart = ensembl)
15+
16+
gene_names <- list(ids = ens_names, names = mapping[match(ens_names, mapping$ensembl_gene_id),]$external_gene_name)
17+
gene_final_names <- ifelse(is.na(as.character(gene_names$names)), gene_names$ids, gene_names$names)
18+
return(gene_final_names)
19+
}
20+
21+
ens_to_geneid <- function(ens_names) {
22+
require("biomaRt")
23+
ensembl = useMart(biomart="ENSEMBL_MART_ENSEMBL",
24+
#host="asia.ensembl.org",
25+
# host="uswest.ensembl.org",
26+
host="www.ensembl.org",
27+
# host="grch37.ensembl.org",
28+
path="/biomart/martservice",
29+
dataset="hsapiens_gene_ensembl")
30+
mapping <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'hgnc_symbol', 'entrezgene', 'description'),
31+
filters = 'ensembl_gene_id',
32+
values = ens_names,
33+
mart = ensembl)
34+
35+
gene_names <- list(ids = ens_names, names = mapping[match(ens_names, mapping$ensembl_gene_id),]$entrezgene)
36+
gene_final_names <- ifelse(is.na(as.character(gene_names$names)), gene_names$ids, gene_names$names)
37+
return(gene_final_names)
38+
}
39+
40+
ens_to_desc <- function(ens_names) {
41+
require("biomaRt")
42+
ensembl = useMart(biomart="ENSEMBL_MART_ENSEMBL",
43+
#host="asia.ensembl.org",
44+
# host="uswest.ensembl.org",
45+
host="www.ensembl.org",
46+
# host="grch37.ensembl.org",
47+
path="/biomart/martservice",
48+
dataset="hsapiens_gene_ensembl")
49+
mapping <- getBM(attributes = c('ensembl_gene_id',
50+
'external_gene_name',
51+
'hgnc_symbol',
52+
'entrezgene',
53+
# 'description',
54+
'wikigene_description'),
55+
filters = 'ensembl_gene_id',
56+
values = ens_names,
57+
mart = ensembl)
58+
59+
gene_names <- list(ids = ens_names, names = mapping[match(ens_names, mapping$ensembl_gene_id),]$wikigene_description)
60+
gene_final_names <- ifelse(is.na(as.character(gene_names$names)), gene_names$ids, gene_names$names)
61+
return(gene_final_names)
62+
}
63+
64+
name_to_geneid <- function(ens_names) {
65+
require("biomaRt")
66+
ensembl = useMart(biomart="ENSEMBL_MART_ENSEMBL",
67+
host="www.ensembl.org",
68+
path="/biomart/martservice",
69+
dataset="hsapiens_gene_ensembl")
70+
mapping <- getBM(attributes = c('ensembl_gene_id', 'external_gene_name', 'hgnc_symbol', 'entrezgene', 'description'),
71+
filters = 'external_gene_name',
72+
values = ens_names,
73+
mart = ensembl)
74+
75+
gene_names <- list(ids = ens_names, names = mapping[match(ens_names, mapping$external_gene_name),]$entrezgene)
76+
gene_final_names <- ifelse(is.na(as.character(gene_names$names)), gene_names$ids, gene_names$names)
77+
return(gene_final_names)
78+
}

gp/gage_pathview_analysis_bb.R

+142
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
prepare_data <- function(input_file){
2+
suppressMessages(require(dplyr))
3+
suppressMessages(require(stringr))
4+
suppressMessages(require(AnnotationDbi))
5+
suppressMessages(require(org.Hs.eg.db))
6+
7+
dge <- read.table(file = input_file,
8+
sep = "\t",
9+
header = T,
10+
check.names = F)
11+
# row.names = 1)
12+
13+
#--------id analysis--------
14+
15+
if ("translated_names" %in% colnames(dge)) {
16+
df <- dge %>% dplyr::select(c(translated_names, log2FoldChange, padj))
17+
df$geneid = mapIds(org.Hs.eg.db,
18+
keys=as.character(df$translated_names),
19+
column="ENTREZID",
20+
keytype="SYMBOL",
21+
multiVals="first")
22+
df2 <- df %>% dplyr::filter(!str_detect(translated_names, "ENSG0")) %>% dplyr::filter(!str_detect(geneid, "NA"))
23+
} else {
24+
df <- dge %>% dplyr::select(c(gene_name, log2FoldChange, padj))
25+
df <- df %>% dplyr::filter(padj < 0.05)
26+
df$geneid = mapIds(org.Hs.eg.db,
27+
keys=as.character(df$gene_name),
28+
column="ENTREZID",
29+
keytype="SYMBOL",
30+
multiVals="first")
31+
df2 <- df %>% dplyr::filter(!str_detect(gene_name, "ENSG0")) %>% dplyr::filter(!str_detect(geneid, "NA"))
32+
}
33+
34+
input_data <- as.data.frame(df2)
35+
return(input_data)
36+
}
37+
38+
# gage_pathview <- function(input, kegg_pathway, pgeomean, output_folder){
39+
gage_pathview <- function(input_file, pgeomean = 0.25, output_folder = output_folder){
40+
41+
message("Gage/Pathview Analysis Start")
42+
source("KEGG_gene_annotation.R")
43+
44+
#--------Libraries--------
45+
message("Loading Libraries")
46+
suppressMessages(require(pathview))
47+
suppressMessages(require(gage))
48+
49+
#--------Output directory--------
50+
message("Creating Output Directory")
51+
52+
mainDir <- setwd(getwd())
53+
outDir <- file.path(mainDir)
54+
subDir <- output_folder
55+
resDir <- file.path(outDir, subDir)
56+
57+
if (file.exists(resDir)){
58+
message ("Results folder for analysis found")
59+
} else {
60+
dir.create(resDir)
61+
message ("Results folder for analysis created")
62+
}
63+
64+
#--------load data--------
65+
message("Loading data and annotating Ids")
66+
67+
input_data <- prepare_data(input_file)
68+
# return(input_data)
69+
70+
#--------load Kegg pathways--------
71+
message("Loading Kegg Pathways")
72+
73+
## pathways from GAGE kegg.gsets
74+
pathways <- kegg.gsets(species = "hsa",
75+
id.type = "kegg",
76+
check.new = T)
77+
# sigmetidx <- pathways$sigmet.idx
78+
# listkegg <- pathways$kg.sets[sigmetidx]
79+
listkegg <- pathways$kg.sets
80+
81+
## prepare vector with geneid + FC
82+
foldchanges = input_data$log2FoldChange
83+
names(foldchanges) = input_data$geneid
84+
# head(foldchanges)
85+
86+
#--------run Gage-------
87+
message("Gage Analysis")
88+
89+
keggres = gage(exprs = foldchanges,
90+
gsets = listkegg,
91+
same.dir = T)
92+
93+
lapply(keggres, head)
94+
95+
keggresp <- as.data.frame(keggres$greater)
96+
keggresp$pathway <- rownames(keggres$greater)
97+
keggresp2 <- keggresp %>% dplyr::filter(p.geomean < pgeomean)
98+
99+
keggrespathways = data.frame(id=keggresp2$pathway, keggresp2) %>%
100+
tbl_df() %>%
101+
# filter(row_number()<=5) %>%
102+
.$id %>%
103+
as.character()
104+
105+
keggresids = substr(keggrespathways, start=1, stop=8)
106+
# avoid hsa01100 kegg pathway (too big)
107+
keggresids <- keggresids[!keggresids %in% "hsa01100"]
108+
109+
#--------Pathview analysis--------
110+
message("Pathview Analysis")
111+
112+
# detach dplyr (in order to avoid issues) as reported here: https://www.biostars.org/p/242157/
113+
detach("package:dplyr", unload=TRUE)
114+
115+
# set output
116+
setwd(output_folder)
117+
118+
# Define plotting function for applying later
119+
plot_pathway = function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="hsa", new.signature=FALSE)
120+
121+
# plot multiple pathways (plots saved to disk and returns a throwaway list object)
122+
tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species="hsa"))
123+
124+
#---------Saving results----------------
125+
message("Saving Results on Disk and as Rds object")
126+
127+
write.table(keggresp2, "GageResults.tab", sep = "\t", row.names = F)
128+
129+
report_data <- list()
130+
131+
# save results (for Rmd report)
132+
report_data[["DataFrame"]] <- as.data.frame(input_data)
133+
report_data[["FoldChanges"]] <- foldchanges
134+
report_data[["Pgeomean"]] <- pgeomean
135+
report_data[["PathviewResults"]] <- tmp
136+
report_data[["GageResults"]] <- keggresp2
137+
138+
# saveRds (tmp)
139+
saveRDS(report_data, "GagePathviewAnalysis.Rds")
140+
message("Gage/Pathview analysis completed")
141+
142+
}

gp/gage_pathview_analysis_cmd.R

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#! /usr/bin/env Rscript
2+
3+
suppressMessages(require(docopt))
4+
5+
#---------------Functions---------------------
6+
7+
source("gage_pathview_analysis_bb.R")
8+
9+
#-------Input parsing----------
10+
'Usage:
11+
gage_pathview_analysis_cmd.R [options]
12+
13+
Options:
14+
-i Name of file with input data/genes
15+
-p Value of p.geomean cutoff in Gage analysis [default: 0.25]
16+
-o Name of output folder where results are saved
17+
' -> doc
18+
19+
opts <- docopt(doc)
20+
21+
input_file <- opts$i
22+
# id <- opts$id
23+
# kegg_pathway <- opt$k
24+
pgeomean <- as.numeric(opts$p)
25+
output_folder <- opts$o
26+
27+
# gage_pathview(input_file = input_file,
28+
# id = id,
29+
# kegg_pathway = kegg_pathway,
30+
# pgeomean = pgeomean,
31+
# output_folder = output_folder)
32+
33+
gage_pathview(input_file = input_file,
34+
pgeomean = pgeomean,
35+
output_folder = output_folder)

0 commit comments

Comments
 (0)