-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathREADME.Rmd
87 lines (62 loc) · 3.67 KB
/
README.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
---
output: github_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,warning = F,message = F)
```
# ROMOPOmics
## Installation
```{r installation,eval=F}
devtools::install_github("AndrewC160/ROMOPomics",force=T) #for installation reference and not run
```
## Description
ROMOPOmics standardizes metadata of high throughput assays with associated patient clinical data. Our package ROMOPOmics provides a framework to standardize these datasets and a pipeline to convert this information into a SQL-friendly database that is easily accessed by users. After installation of our R package from the github repository, users specify a data directory and a mask file describing how to map their data's fields into a common data model. The resulting standardized data tables are then formatted into a SQLite database for easily interoperating and sharing the dataset.
```{r codeFlowChart, fig.align = 'center', out.width = "100%", echo=FALSE,fig.cap = "Flow chart of essential functions for a basic ROMOPOmics implementation using two different input datasets."}
knitr::include_graphics("man/figures/romopomics_code_flow.png")
```
## Package overview
See our vignette [ROMOPOmics](https://github.com/AndrewC160/ROMOPOmics/blob/master/vignettes/ROMOPomics.Rmd)
## Use Cases
```{r}
library(ROMOPOmics)
```
### TCGA data
```{r tcga-usecase}
dm_file <- system.file("extdata","OMOP_CDM_v6_0_custom.csv",package="ROMOPOmics",mustWork = TRUE)
dm <- loadDataModel(master_table_file = dm_file)
tcga_files <-
list(
"brca_clinical" = system.file("extdata","brca_clinical.csv",package="ROMOPOmics",mustWork = TRUE),
"brca_mutation" = system.file("extdata","brca_mutation.csv",package="ROMOPOmics",mustWork = TRUE)
)
msks <- list(brca_clinical=loadModelMasks(system.file("extdata","brca_clinical_mask.csv",package="ROMOPOmics",mustWork = TRUE)),
brca_mutation=loadModelMasks(system.file("extdata","brca_mutation_mask.csv",package="ROMOPOmics",mustWork = TRUE)))
omop_inputs <- list(brca_clinical=readInputFile(input_file = tcga_files$brca_clinical,
data_model = dm,
mask_table = msks$brca_clinical),
brca_mutation=readInputFile(input_file = tcga_files$brca_mutation,
data_model = dm,
mask_table = msks$brca_mutation))
db_inputs <- combineInputTables(input_table_list = omop_inputs)
omop_db <- buildSQLDBR(omop_tables = db_inputs,file.path(tempdir(),"TCGA.sqlite"))
DBI::dbListTables(omop_db)
```
### ATAC-seq data
```{r atacseq-usecase}
dm_file <- system.file("extdata","OMOP_CDM_v6_0_custom.csv",package="ROMOPOmics",mustWork = TRUE)
dm <- loadDataModel(master_table_file = dm_file)
msk_file <- system.file("extdata","GSE60682_standard_mask.csv",package="ROMOPOmics",mustWork = TRUE)
msks <- loadModelMasks(msk_file)
in_file <- system.file("extdata","GSE60682_standard.csv",package="ROMOPOmics",mustWork = TRUE)
omop_inputs <- readInputFile(input_file=in_file,data_model=dm,mask_table=msks,transpose_input_table = TRUE)
db_inputs <- combineInputTables(input_table_list = omop_inputs)
omop_db <- buildSQLDBR(omop_tables = db_inputs, sql_db_file=file.path(tempdir(),"GSE60682_sqlDB.sqlite"))
DBI::dbListTables(omop_db)
```
### GEO accessions from Stevens et al. 2013
```{r geo-usecase,eval=TRUE}
library(Biobase)
gse_ids <- c("GSE9006", "GSE26440", "GSE11504", "TABM666", "GSE6011", "GSE37721", "GSE20307", "GSE20436")
stevens_gse_lst <- fetch_geo_series(gse_ids,data_dir = tempdir())
stevens_gse_lst$merged_metadata
```