Merge pull request #6 from GMOD/use_r_data

Implement track that uses R data frame as source
GMOD · May 14, 2021 · ec916a5 · ec916a5
2 parents 3b44b69 + dc84ef7
commit ec916a5
Show file tree

Hide file tree

Showing 11 changed files with 292 additions and 5 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -2,19 +2,16 @@
 ^\.Rproj\.user$
 ^node_modules$
 ^srcjs$
-^app\.R$
 ^package\.json$
 ^webpack\.config\.js$
 ^yarn\.lock$
 ^LICENSE\.md$
 ^inst/htmlwidgets/JBrowseR\.js\.map$
-^config\.json$
-^custom_app\.R$
-^json_app\.R$
 ^README\.Rmd$
 ^_pkgdown\.yml$
 ^docs$
 ^pkgdown$
 ^\.github$
 ^cran-comments\.md$
 ^CRAN-RELEASE$
+^example_apps$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -30,7 +30,9 @@ Imports:
     jsonlite,
     httpuv,
     mime,
-    cli
+    cli,
+    ids,
+    dplyr
 Suggests: 
     testthat (>= 3.0.0),
     knitr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -10,6 +10,7 @@ export(renderJBrowseR)
 export(serve_data)
 export(theme)
 export(track_alignments)
+export(track_data_frame)
 export(track_feature)
 export(track_variant)
 export(track_wiggle)

diff --git a/R/data_frame.R b/R/data_frame.R
@@ -0,0 +1,121 @@
+#' Create a track from an R data frame for a custom JBrowse 2 view
+#'
+#' Creates the necessary configuration string for an R data frame so that it can
+#' be viewed as a track in a JBrowse custom linear genome view.
+#'
+#' @param track_data the data frame with track data. Must have cols:
+#'   \code{chrom}, \code{start}, \code{end}, \code{name}. The column
+#'   \code{additional} can optionally be include with more feature information.
+#'   If a \code{score} column is present, it will be used and the track will
+#'   be rendered to display quantitative features.
+#' @param track_name the name to use for the track
+#' @param assembly the config string generated by \code{assembly}
+#'
+#' @return a character vector of stringified track JSON configuration
+#'
+#' @export
+#'
+#' @examples
+#' assembly <- assembly("https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz", bgzip = TRUE)
+#'
+#' df <- data.frame(
+#'         chrom = c(1, 2),
+#'         start = c(123, 456),
+#'         end = c(789, 101112),
+#'         name = c('feature1', 'feature2')
+#'       )
+#'
+#' track_data_frame(df, "my_features", assembly)
+track_data_frame <- function(track_data, track_name, assembly) {
+  check_df(track_data)
+
+  if (is.element("score", colnames(track_data))) {
+    type <- "QuantitativeTrack"
+  } else {
+    type <- "FeatureTrack"
+  }
+  name <- track_name
+  assembly_name <- get_assembly_name(assembly)
+  track_id <- stringr::str_c(assembly_name, "_", name)
+  adapter <- get_from_config_adapter(track_data)
+
+  as.character(
+    stringr::str_glue(
+      "{{ ",
+      '"type": "{type}", ',
+      '"name": "{name}", ',
+      '"assemblyNames": ["{assembly_name}"], ',
+      '"trackId": "{track_id}", ',
+      "{adapter} ",
+      '}}'
+    )
+  )
+}
+
+check_df <- function(track_data) {
+  if (!is.data.frame(track_data)) {
+    stop("track data must be a data frame.")
+  }
+  if (invalid_cols(track_data)) {
+   stop("data frame must contain columns: chrom, start, end, name.")
+  }
+}
+
+invalid_cols <- function(df) {
+  columns_present <- is.element(c("chrom", "start", "end", "name"), colnames(df))
+  is.element(FALSE, columns_present)
+}
+
+get_from_config_adapter <- function(track_data) {
+  feature_data <- get_feature_data(track_data)
+
+  as.character(
+    stringr::str_glue(
+      '"adapter": {{ ',
+      '"type": "FromConfigAdapter", ',
+      '"features": [{feature_data}] ',
+      "}}"
+    )
+  )
+}
+
+get_feature_data <- function(track_data) {
+  if (!is.element("additional", colnames(track_data))) {
+    track_data[["additional"]] <- ""
+  }
+
+  if (is.element("score", colnames(track_data))) {
+    new_df <- track_data %>%
+      dplyr::mutate(
+        string_val = stringr::str_glue(
+          "{{",
+          '"refName": "{chrom}", ',
+          '"start": {start}, ',
+          '"end": {end}, ',
+          '"uniqueId": "{ids::random_id()}", ',
+          '"name": "{name}", ',
+          '"type": "", ',
+          '"score": {score}, ',
+          '"additional": "{additional}" ',
+          "}}"
+        )
+      )
+  } else {
+    new_df <- track_data %>%
+      dplyr::mutate(
+        string_val = stringr::str_glue(
+          "{{",
+          '"refName": "{chrom}", ',
+          '"start": {start}, ',
+          '"end": {end}, ',
+          '"uniqueId": "{ids::random_id()}", ',
+          '"name": "{name}", ',
+          '"type": "", ',
+          '"additional": "{additional}" ',
+          "}}"
+        )
+      )
+  }
+
+  stringr::str_c(new_df$string_val, collapse = ", ")
+}
diff --git a/app.R → example_apps/app.R b/app.R → example_apps/app.R
diff --git a/config.json → example_apps/config.json b/config.json → example_apps/config.json
diff --git a/custom_app.R → example_apps/custom_app.R b/custom_app.R → example_apps/custom_app.R
diff --git a/example_apps/df_app.R b/example_apps/df_app.R
@@ -0,0 +1,50 @@
+library(shiny)
+library(JBrowseR)
+
+ui <- fluidPage(
+  titlePanel("JBrowseR Example"),
+  JBrowseROutput("widgetOutput")
+)
+
+server <- function(input, output, session) {
+  # create the assembly configuration
+  assembly <- assembly(
+    "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz",
+    bgzip = TRUE,
+    aliases = c("GRCh37"),
+    refname_aliases = "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt"
+  )
+
+  df <- data.frame(
+    chrom = c('1', '2'),
+    start = c(123, 456),
+    end = c(789, 101112),
+    name = c('feature1', 'feature2')
+  )
+
+  df_track <- track_data_frame(df, "foo", assembly)
+
+  # set up the final tracks object to be used
+  tracks <- tracks(
+    df_track
+  )
+
+  # determine what the browser displays by default
+  default_session <- default_session(
+    assembly,
+    c(df_track),
+    display_assembly = FALSE
+  )
+
+
+  output$widgetOutput <- renderJBrowseR(
+    JBrowseR("View",
+             assembly = assembly,
+             tracks = tracks,
+             location = "2:456",
+             defaultSession = default_session
+    )
+  )
+}
+
+shinyApp(ui, server)
diff --git a/json_app.R → example_apps/json_app.R b/json_app.R → example_apps/json_app.R
diff --git a/man/track_data_frame.Rd b/man/track_data_frame.Rd
diff --git a/tests/testthat/test-data_frame.R b/tests/testthat/test-data_frame.R
@@ -0,0 +1,78 @@
+test_that("data frame validation works", {
+  assembly <- assembly(
+    "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz",
+    bgzip = TRUE,
+    aliases = c("GRCh37"),
+    refname_aliases = "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt"
+  )
+
+  list_input <- list(
+    c("chrom1", "chrom2"),
+    c('123', '456'),
+    c('789', '101112'),
+    c('foo', 'bar')
+  )
+  expect_error(
+    track_data_frame(list_input, assembly),
+    "track data must be a data frame."
+  )
+
+  invalid_df <- data.frame(
+    chroms = c("chrom1", "chrom2"),
+    start = c('123', '456'),
+    end = c('789', '101112'),
+    name = c('foo', 'bar')
+  )
+  expect_error(
+    track_data_frame(invalid_df, assembly),
+    "data frame must contain columns: chrom, start, end, name."
+  )
+})
+
+test_that("creating a data frame track returns the correct string", {
+  assembly <- assembly(
+    "https://jbrowse.org/genomes/hg19/fasta/hg19.fa.gz",
+    bgzip = TRUE,
+    aliases = c("GRCh37"),
+    refname_aliases = "https://s3.amazonaws.com/jbrowse.org/genomes/hg19/hg19_aliases.txt"
+  )
+
+  df <- data.frame(
+    chrom = c(1, 2),
+    start = c(123, 456),
+    end = c(789, 101112),
+    name = c('feature1', 'feature2')
+  )
+
+  remove_random_ids <- function(json_list) {
+    for (i in seq_along(length(json_list$json_list$adapter$features))) {
+      json_list$json_list$adapter$features[[i]]$uniqueId <- NULL
+    }
+  }
+
+  # parse JSON, strip out the unique ID before comparing result
+  df_json <- jsonlite::parse_json(track_data_frame(df, "my_features", assembly))
+  df_json <- remove_random_ids(df_json)
+
+  valid_json <- jsonlite::parse_json("{ \"type\": \"FeatureTrack\", \"name\": \"my_features\", \"assemblyNames\": [\"hg19\"], \"trackId\": \"hg19_my_features\", \"adapter\": { \"type\": \"FromConfigAdapter\", \"features\": [{\"refName\": \"1\", \"start\": 123, \"end\": 789, \"uniqueId\": \"29f30df147fdc145426288bfdda3dd9e\", \"name\": \"feature1\", \"type\": \"\", \"additional\": \"\" }, {\"refName\": \"2\", \"start\": 456, \"end\": 101112, \"uniqueId\": \"29f30df147fdc145426288bfdda3dd9e\", \"name\": \"feature2\", \"type\": \"\", \"additional\": \"\" }] } }")
+  valid_json <- remove_random_ids(valid_json)
+
+  expect_equal(df_json, valid_json)
+
+  # also test for a quantitative track
+  score_df <- df <- data.frame(
+    chrom = c('1', '2'),
+    start = c(123, 456),
+    end = c(789, 101112),
+    name = c('feature1', 'feature2'),
+    score = c(10, 20)
+  )
+
+  score_df_json <- jsonlite::parse_json(track_data_frame(score_df, "quantitative_features", assembly))
+  score_df_json <- remove_random_ids(score_df_json)
+
+  valid_score_json <- jsonlite::parse_json("{ \"type\": \"QuantitativeTrack\", \"name\": \"quantitative_features\", \"assemblyNames\": [\"hg19\"], \"trackId\": \"hg19_quantitative_features\", \"adapter\": { \"type\": \"FromConfigAdapter\", \"features\": [{\"refName\": \"1\", \"start\": 123, \"end\": 789, \"uniqueId\": \"000f0e1c826974d4dcbc3f665337ecb7\", \"name\": \"feature1\", \"type\": \"\", \"score\": 10, \"additional\": \"\" }, {\"refName\": \"2\", \"start\": 456, \"end\": 101112, \"uniqueId\": \"000f0e1c826974d4dcbc3f665337ecb7\", \"name\": \"feature2\", \"type\": \"\", \"score\": 20, \"additional\": \"\" }] } }")
+  valid_score_json <- remove_random_ids(valid_score_json)
+
+  expect_equal(score_df_json, valid_score_json)
+})
-Original file line number
+Diff line change
@@ Expand Up / @@ -30,7 +30,9 @@ Imports: @@
         jsonlite,
         httpuv,
         mime,
-        cli
+        cli,
+        ids,
+        dplyr
     Suggests:
         testthat (>= 3.0.0),
         knitr,
@@ Expand Down @@