diff --git a/DESCRIPTION b/DESCRIPTION index 5cf0b12..1dca79c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: clessnverse Title: Package for Data Domestication, Analysis, and Visualization -Version: 0.5.3 +Version: 0.5.3.9000 Authors@R: c( person("William", "Poirier", , "william.poirier.1@ulaval.ca", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-3274-1351")), diff --git a/R/analysis.R b/R/analyse.R similarity index 84% rename from R/analysis.R rename to R/analyse.R index 6ef6740..90e5979 100644 --- a/R/analysis.R +++ b/R/analyse.R @@ -147,54 +147,6 @@ sample_biased <- } } -#' Calculate the proportion of each category from one variable. -#' -#' This function creates a data.frame which includes 3 columns. -#' 1) a column containing the variable's categories; -#' 2) a column containing each category's frequency; -#' 3) a column containing each category's proportion. -#' -#' @param data An object of type data.frame. -#' @param variable The name of the variable from which to calculate -#' the proportions. -#' @return A data.frame which includes 3 columns. -#' 1) `variable`: a column containing the variable's categories; -#' 2) `n`: a column containing each category's frequency; -#' 3) `prop`: a column containing each category's proportion. -#' @export -#' @importFrom magrittr `%>%` -#' @importFrom rlang abort -#' @author CLESSN -#' @examples -#' -#' \dontrun{ -#' -#' # Calculate the proportions of each cylinder configuration -#' # from mtcars. -#' -#' calculate_proportions(mtcars,cyl) -#' } -calculate_proportions <- function(data, variable) { - if (!is.data.frame(data)) { - rlang::abort("Argument `data` must be a data frame.") - } - else { - D <- data %>% - dplyr::group_by({ - { - variable - } - }) %>% - dplyr::summarize(n = dplyr::n()) %>% #category frequencies - stats::na.omit() %>% - dplyr::mutate(prop = n / sum(n)) - } - if (length(table(D[, 1])) == 1) { - warning(paste0("`", names(D[, 1]), "`", " only has one category.")) - } - return(D) -} - #' Calculate dictionary expression mentions in a text. #' #' This function creates a data.frame which includes one column diff --git a/R/transform.R b/R/transform.R deleted file mode 100644 index 0cd9ab1..0000000 --- a/R/transform.R +++ /dev/null @@ -1,21 +0,0 @@ -#' Count NA in a vector -#' -#' @description -#' `r lifecycle::badge("experimental")` -#' -#' -#' @param x a vector -#' -#' @return number of NA or NaN in `x` (integer) -#' @export -#' -#' @examples -#' x <- c(4, 6, NA, 3, NaN, 1) -#' count_na(x) -#' -#' z <- c(NA, NaN, "w", "a", "b", NA) -#' count_na(z) -#' -count_na <- function(x){ - return(sum(is.na(x))) -} diff --git a/R/visualization.R b/R/visualise.R similarity index 100% rename from R/visualization.R rename to R/visualise.R diff --git a/R/domestication.R b/R/wrangle.R similarity index 50% rename from R/domestication.R rename to R/wrangle.R index 6f70061..a00a147 100644 --- a/R/domestication.R +++ b/R/wrangle.R @@ -1,4 +1,23 @@ -# Domestication +#' Count NA in a vector +#' +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' @param x a vector +#' +#' @return number of NA or NaN in `x` (integer) +#' @export +#' +#' @examples +#' x <- c(4, 6, NA, 3, NaN, 1) +#' count_na(x) +#' +#' z <- c(NA, NaN, "w", "a", "b", NA) +#' count_na(z) +#' +count_na <- function(x){ + return(sum(is.na(x))) +} #' Normalize a continuous variable between 0 and 1 #' @@ -22,7 +41,6 @@ #' #' data_output <- data %>% #' mutate(across(c(a, b), normalize_min_max)) - normalize_min_max <- function(x, remove_na = T) { min <- min(x, na.rm = remove_na) max <- max(x, na.rm = remove_na) @@ -58,7 +76,6 @@ normalize_min_max <- function(x, remove_na = T) { #' new_vector <- reduce_outliers(vector) #' new_vector #' hist(new_vector) - reduce_outliers <- function(vector) { q1 <- stats::quantile(vector, 0.25) # identify the first quartile q3 <- stats::quantile(vector, 0.75) # identify the first quartile @@ -69,3 +86,52 @@ reduce_outliers <- function(vector) { vector[vector < lim_min] <- lim_min # same thing with the lower limit return(vector) } + +#' Calculate the proportion of each category from one variable. +#' +#' This function creates a data.frame which includes 3 columns. +#' 1) a column containing the variable's categories; +#' 2) a column containing each category's frequency; +#' 3) a column containing each category's proportion. +#' +#' @param data An object of type data.frame. +#' @param variable The name of the variable from which to calculate +#' the proportions. +#' +#' @return A data.frame which includes 3 columns. +#' 1) `variable`: a column containing the variable's categories; +#' 2) `n`: a column containing each category's frequency; +#' 3) `prop`: a column containing each category's proportion. +#' @export +#' @importFrom magrittr `%>%` +#' @importFrom rlang abort +#' @author CLESSN +#' @examples +#' +#' \dontrun{ +#' +#' # Calculate the proportions of each cylinder configuration +#' # from mtcars. +#' +#' calculate_proportions(mtcars,cyl) +#' } +calculate_proportions <- function(data, variable) { + if (!is.data.frame(data)) { + rlang::abort("Argument `data` must be a data frame.") + } + else { + D <- data %>% + dplyr::group_by({ + { + variable + } + }) %>% + dplyr::summarize(n = dplyr::n()) %>% #category frequencies + stats::na.omit() %>% + dplyr::mutate(prop = n / sum(n)) + } + if (length(table(D[, 1])) == 1) { + warning(paste0("`", names(D[, 1]), "`", " only has one category.")) + } + return(D) +} diff --git a/README.Rmd b/README.Rmd index 12a4b03..c129cf6 100644 --- a/README.Rmd +++ b/README.Rmd @@ -41,6 +41,8 @@ remotes::install_github("clessn/clessnverse") ## Examples +### Wrangle data + Normalize a continuous variable between 0 and 1 ```{r example} @@ -58,6 +60,41 @@ data %>% mutate(across(c(a, b), normalize_min_max)) ``` +### Analyse data + +```{r} +run_dictionary( + data.frame(colnames(attitude)), + text = colnames(attitude), + dictionary = quanteda::data_dictionary_LSD2015 +) %>% head() +``` + +### Visualise data + +```{r} + +p <- ggplot2::ggplot(data = ggplot2::mpg) + + ggplot2::geom_point(mapping = ggplot2::aes(x = displ, y = cty, colour = class)) + + ggplot2::labs(title = "Look at this graph!", + subtitle = "What a great theme, eh?", + caption = "Data: API Twitter \nCLESSN") + + ggplot2::xlab("x axis label") + + ggplot2::ylab("y axis label") + +p + theme_clean_light() +p + theme_clean_dark() + +p <- ggplot2::ggplot(data = ggplot2::mpg) + + ggplot2::geom_point(mapping = ggplot2::aes(x = displ, y = cty, colour = class)) + + ggplot2::labs(title = "Look at this graph!", + subtitle = "What a great look, eh?", + caption = "Data: Twitter API \nCLESSN") + +p + scale_discrete_quorum(aesthetics = "colour") +``` + + ## Issues and suggestions You can submit bugs or suggestions in the Issues tab of this repo. To facilitate problem solving, please include a [minimal reproducible example](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html) of the issue. diff --git a/README.md b/README.md index 1e51cfb..152b23b 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,8 @@ remotes::install_github("clessn/clessnverse") ## Examples +### Wrangle data + Normalize a continuous variable between 0 and 1 ``` r @@ -61,6 +63,60 @@ data %>% #> 4 0 0 ``` +### Analyse data + +``` r +run_dictionary( + data.frame(colnames(attitude)), + text = colnames(attitude), + dictionary = quanteda::data_dictionary_LSD2015 +) %>% head() +#> 0.464 sec elapsed +#> doc_id negative positive neg_positive neg_negative +#> 1 text1 0 0 0 0 +#> 2 text2 1 0 0 0 +#> 3 text3 0 1 0 0 +#> 4 text4 0 1 0 0 +#> 5 text5 0 0 0 0 +#> 6 text6 1 0 0 0 +``` + +### Visualise data + +``` r + +p <- ggplot2::ggplot(data = ggplot2::mpg) + + ggplot2::geom_point(mapping = ggplot2::aes(x = displ, y = cty, colour = class)) + + ggplot2::labs(title = "Look at this graph!", + subtitle = "What a great theme, eh?", + caption = "Data: API Twitter \nCLESSN") + + ggplot2::xlab("x axis label") + + ggplot2::ylab("y axis label") + +p + theme_clean_light() +``` + + + +``` r +p + theme_clean_dark() +``` + + + +``` r + +p <- ggplot2::ggplot(data = ggplot2::mpg) + + ggplot2::geom_point(mapping = ggplot2::aes(x = displ, y = cty, colour = class)) + + ggplot2::labs(title = "Look at this graph!", + subtitle = "What a great look, eh?", + caption = "Data: Twitter API \nCLESSN") + +p + scale_discrete_quorum(aesthetics = "colour") +``` + + + ## Issues and suggestions You can submit bugs or suggestions in the Issues tab of this repo. To diff --git a/man/calculate_proportions.Rd b/man/calculate_proportions.Rd index 197434c..a0659d3 100644 --- a/man/calculate_proportions.Rd +++ b/man/calculate_proportions.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/analysis.R +% Please edit documentation in R/wrangle.R \name{calculate_proportions} \alias{calculate_proportions} \title{Calculate the proportion of each category from one variable.} diff --git a/man/count_na.Rd b/man/count_na.Rd index 30dc851..2df18d9 100644 --- a/man/count_na.Rd +++ b/man/count_na.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/transform.R +% Please edit documentation in R/wrangle.R \name{count_na} \alias{count_na} \title{Count NA in a vector} diff --git a/man/normalize_min_max.Rd b/man/normalize_min_max.Rd index 990f7d4..d44b888 100644 --- a/man/normalize_min_max.Rd +++ b/man/normalize_min_max.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/domestication.R +% Please edit documentation in R/wrangle.R \name{normalize_min_max} \alias{normalize_min_max} \title{Normalize a continuous variable between 0 and 1} diff --git a/man/reduce_outliers.Rd b/man/reduce_outliers.Rd index 08d30ee..cc8a605 100644 --- a/man/reduce_outliers.Rd +++ b/man/reduce_outliers.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/domestication.R +% Please edit documentation in R/wrangle.R \name{reduce_outliers} \alias{reduce_outliers} \title{Reduce outliers with the interquartile range method} diff --git a/man/run_dictionary.Rd b/man/run_dictionary.Rd index dbd407f..9e033c4 100644 --- a/man/run_dictionary.Rd +++ b/man/run_dictionary.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/analysis.R +% Please edit documentation in R/analyse.R \name{run_dictionary} \alias{run_dictionary} \title{Calculate dictionary expression mentions in a text.} diff --git a/man/sample_biased.Rd b/man/sample_biased.Rd index fcbb0ac..86437d1 100644 --- a/man/sample_biased.Rd +++ b/man/sample_biased.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/analysis.R +% Please edit documentation in R/analyse.R \name{sample_biased} \alias{sample_biased} \title{Create samples biased on the categories of one variable} diff --git a/man/scale.Rd b/man/scale.Rd index 2523c8b..50adad7 100644 --- a/man/scale.Rd +++ b/man/scale.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualization.R +% Please edit documentation in R/visualise.R \name{scale} \alias{scale} \alias{scale_discrete_quorum} diff --git a/man/visualization.Rd b/man/visualization.Rd index 2126aad..5b28306 100644 --- a/man/visualization.Rd +++ b/man/visualization.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualization.R +% Please edit documentation in R/visualise.R \name{visualization} \alias{theme_clean_light} \alias{theme_clean_dark}