Reorganise R files, add examples to README (#118)

* Rename visualise.R * Rename analyse.R * Create wrangle.R * Move count_na() to wrangle.R * Move domestication.R to wrangle.R * Move calculate_proportions() to wrangle.R * Add ggplot theme example * Add scale quorum example * Add run_dictionary() example * Increment version number to 0.5.3.9000
clessn · Apr 9, 2023 · c7022ac · c7022ac
1 parent 5d1b606
commit c7022ac
Show file tree

Hide file tree

Showing 15 changed files with 171 additions and 81 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: clessnverse
 Title: Package for Data Domestication, Analysis, and Visualization
-Version: 0.5.3
+Version: 0.5.3.9000
 Authors@R: c(
     person("William", "Poirier", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-3274-1351")),

diff --git a/R/analysis.R → R/analyse.R b/R/analysis.R → R/analyse.R
@@ -147,54 +147,6 @@ sample_biased <-
     }
   }
 
-#' Calculate the proportion of each category from one variable.
-#'
-#' This function creates a data.frame which includes 3 columns.
-#' 1) a column containing the variable's categories;
-#' 2) a column containing each category's frequency;
-#' 3) a column containing each category's proportion.
-#'
-#' @param data An object of type data.frame.
-#' @param variable The name of the variable from which to calculate
-#' the proportions.
-#' @return A data.frame which includes 3 columns.
-#' 1) `variable`: a column containing the variable's categories;
-#' 2) `n`: a column containing each category's frequency;
-#' 3) `prop`: a column containing each category's proportion.
-#' @export
-#' @importFrom magrittr `%>%`
-#' @importFrom rlang abort
-#' @author CLESSN
-#' @examples
-#'
-#' \dontrun{
-#'
-#' # Calculate the proportions of each cylinder configuration
-#' # from mtcars.
-#'
-#' calculate_proportions(mtcars,cyl)
-#' }
-calculate_proportions <- function(data, variable) {
-  if (!is.data.frame(data)) {
-    rlang::abort("Argument `data` must be a data frame.")
-  }
-  else {
-    D <- data %>%
-      dplyr::group_by({
-        {
-          variable
-        }
-      }) %>%
-      dplyr::summarize(n = dplyr::n()) %>% #category frequencies
-      stats::na.omit() %>%
-      dplyr::mutate(prop = n / sum(n))
-  }
-  if (length(table(D[, 1])) == 1) {
-    warning(paste0("`", names(D[, 1]), "`", " only has one category."))
-  }
-  return(D)
-}
-
 #' Calculate dictionary expression mentions in a text.
 #'
 #' This function creates a data.frame which includes one column

diff --git a/R/transform.R b/R/transform.R
diff --git a/R/visualization.R → R/visualise.R b/R/visualization.R → R/visualise.R
diff --git a/R/domestication.R → R/wrangle.R b/R/domestication.R → R/wrangle.R
@@ -1,4 +1,23 @@
-# Domestication
+#' Count NA in a vector
+#'
+#' @description
+#' `r lifecycle::badge("experimental")`
+#'
+#' @param x a vector
+#'
+#' @return number of NA or NaN in `x` (integer)
+#' @export
+#'
+#' @examples
+#' x <- c(4, 6, NA, 3, NaN, 1)
+#' count_na(x)
+#'
+#' z <- c(NA, NaN, "w", "a", "b", NA)
+#' count_na(z)
+#'
+count_na <- function(x){
+  return(sum(is.na(x)))
+}
 
 #' Normalize a continuous variable between 0 and 1
 #'
@@ -22,7 +41,6 @@
 #'
 #' data_output <- data %>%
 #'   mutate(across(c(a, b), normalize_min_max))
-
 normalize_min_max <- function(x, remove_na = T) {
   min <- min(x, na.rm = remove_na)
   max <- max(x, na.rm = remove_na)
@@ -58,7 +76,6 @@ normalize_min_max <- function(x, remove_na = T) {
 #' new_vector <- reduce_outliers(vector)
 #' new_vector
 #' hist(new_vector)
-
 reduce_outliers <- function(vector) {
   q1 <- stats::quantile(vector, 0.25) # identify the first quartile
   q3 <- stats::quantile(vector, 0.75) # identify the first quartile
@@ -69,3 +86,52 @@ reduce_outliers <- function(vector) {
   vector[vector < lim_min] <- lim_min # same thing with the lower limit
   return(vector)
 }
+
+#' Calculate the proportion of each category from one variable.
+#'
+#' This function creates a data.frame which includes 3 columns.
+#' 1) a column containing the variable's categories;
+#' 2) a column containing each category's frequency;
+#' 3) a column containing each category's proportion.
+#'
+#' @param data An object of type data.frame.
+#' @param variable The name of the variable from which to calculate
+#' the proportions.
+#'
+#' @return A data.frame which includes 3 columns.
+#' 1) `variable`: a column containing the variable's categories;
+#' 2) `n`: a column containing each category's frequency;
+#' 3) `prop`: a column containing each category's proportion.
+#' @export
+#' @importFrom magrittr `%>%`
+#' @importFrom rlang abort
+#' @author CLESSN
+#' @examples
+#'
+#' \dontrun{
+#'
+#' # Calculate the proportions of each cylinder configuration
+#' # from mtcars.
+#'
+#' calculate_proportions(mtcars,cyl)
+#' }
+calculate_proportions <- function(data, variable) {
+  if (!is.data.frame(data)) {
+    rlang::abort("Argument `data` must be a data frame.")
+  }
+  else {
+    D <- data %>%
+      dplyr::group_by({
+        {
+          variable
+        }
+      }) %>%
+      dplyr::summarize(n = dplyr::n()) %>% #category frequencies
+      stats::na.omit() %>%
+      dplyr::mutate(prop = n / sum(n))
+  }
+  if (length(table(D[, 1])) == 1) {
+    warning(paste0("`", names(D[, 1]), "`", " only has one category."))
+  }
+  return(D)
+}
diff --git a/README.Rmd b/README.Rmd
@@ -41,6 +41,8 @@ remotes::install_github("clessn/clessnverse")
 
 ## Examples
 
+### Wrangle data
+
 Normalize a continuous variable between 0 and 1
 
 ```{r example}
@@ -58,6 +60,41 @@ data %>%
   mutate(across(c(a, b), normalize_min_max))
 ```
 
+### Analyse data
+
+```{r}
+run_dictionary(
+  data.frame(colnames(attitude)),
+  text = colnames(attitude),
+  dictionary = quanteda::data_dictionary_LSD2015
+) %>% head()
+```
+
+### Visualise data
+
+```{r}
+
+p  <- ggplot2::ggplot(data = ggplot2::mpg) +
+  ggplot2::geom_point(mapping = ggplot2::aes(x = displ, y = cty, colour = class)) +
+  ggplot2::labs(title = "Look at this graph!",
+                subtitle = "What a great theme, eh?",
+                caption = "Data: API Twitter \nCLESSN") +
+  ggplot2::xlab("x axis label") +
+  ggplot2::ylab("y axis label")
+
+p + theme_clean_light()
+p + theme_clean_dark()
+
+p  <- ggplot2::ggplot(data = ggplot2::mpg) +
+  ggplot2::geom_point(mapping = ggplot2::aes(x = displ, y = cty, colour = class)) +
+  ggplot2::labs(title = "Look at this graph!",
+                subtitle = "What a great look, eh?",
+                caption = "Data: Twitter API \nCLESSN")
+
+p + scale_discrete_quorum(aesthetics = "colour")
+```
+
+
 ## Issues and suggestions
 
 You can submit bugs or suggestions in the Issues tab of this repo. To facilitate problem solving, please include a [minimal reproducible example](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html) of the issue.
diff --git a/README.md b/README.md
@@ -32,6 +32,8 @@ remotes::install_github("clessn/clessnverse")
 
 ## Examples
 
+### Wrangle data
+
 Normalize a continuous variable between 0 and 1
 
 ``` r
@@ -61,6 +63,60 @@ data %>%
 #> 4   0    0
 ```
 
+### Analyse data
+
+``` r
+run_dictionary(
+  data.frame(colnames(attitude)),
+  text = colnames(attitude),
+  dictionary = quanteda::data_dictionary_LSD2015
+) %>% head()
+#> 0.464 sec elapsed
+#>   doc_id negative positive neg_positive neg_negative
+#> 1  text1        0        0            0            0
+#> 2  text2        1        0            0            0
+#> 3  text3        0        1            0            0
+#> 4  text4        0        1            0            0
+#> 5  text5        0        0            0            0
+#> 6  text6        1        0            0            0
+```
+
+### Visualise data
+
+``` r
+
+p  <- ggplot2::ggplot(data = ggplot2::mpg) +
+  ggplot2::geom_point(mapping = ggplot2::aes(x = displ, y = cty, colour = class)) +
+  ggplot2::labs(title = "Look at this graph!",
+                subtitle = "What a great theme, eh?",
+                caption = "Data: API Twitter \nCLESSN") +
+  ggplot2::xlab("x axis label") +
+  ggplot2::ylab("y axis label")
+
+p + theme_clean_light()
+```
+
+<img src="man/figures/README-unnamed-chunk-3-1.png" width="100%" />
+
+``` r
+p + theme_clean_dark()
+```
+
+<img src="man/figures/README-unnamed-chunk-3-2.png" width="100%" />
+
+``` r
+
+p  <- ggplot2::ggplot(data = ggplot2::mpg) +
+  ggplot2::geom_point(mapping = ggplot2::aes(x = displ, y = cty, colour = class)) +
+  ggplot2::labs(title = "Look at this graph!",
+                subtitle = "What a great look, eh?",
+                caption = "Data: Twitter API \nCLESSN")
+
+p + scale_discrete_quorum(aesthetics = "colour")
+```
+
+<img src="man/figures/README-unnamed-chunk-3-3.png" width="100%" />
+
 ## Issues and suggestions
 
 You can submit bugs or suggestions in the Issues tab of this repo. To

diff --git a/man/calculate_proportions.Rd b/man/calculate_proportions.Rd
diff --git a/man/count_na.Rd b/man/count_na.Rd
diff --git a/man/normalize_min_max.Rd b/man/normalize_min_max.Rd
diff --git a/man/reduce_outliers.Rd b/man/reduce_outliers.Rd
diff --git a/man/run_dictionary.Rd b/man/run_dictionary.Rd
diff --git a/man/sample_biased.Rd b/man/sample_biased.Rd
diff --git a/man/scale.Rd b/man/scale.Rd
diff --git a/man/visualization.Rd b/man/visualization.Rd