diff --git a/DESCRIPTION b/DESCRIPTION
index 5cf0b12..1dca79c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: clessnverse
Title: Package for Data Domestication, Analysis, and Visualization
-Version: 0.5.3
+Version: 0.5.3.9000
Authors@R: c(
person("William", "Poirier", , "william.poirier.1@ulaval.ca", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-3274-1351")),
diff --git a/R/analysis.R b/R/analyse.R
similarity index 84%
rename from R/analysis.R
rename to R/analyse.R
index 6ef6740..90e5979 100644
--- a/R/analysis.R
+++ b/R/analyse.R
@@ -147,54 +147,6 @@ sample_biased <-
}
}
-#' Calculate the proportion of each category from one variable.
-#'
-#' This function creates a data.frame which includes 3 columns.
-#' 1) a column containing the variable's categories;
-#' 2) a column containing each category's frequency;
-#' 3) a column containing each category's proportion.
-#'
-#' @param data An object of type data.frame.
-#' @param variable The name of the variable from which to calculate
-#' the proportions.
-#' @return A data.frame which includes 3 columns.
-#' 1) `variable`: a column containing the variable's categories;
-#' 2) `n`: a column containing each category's frequency;
-#' 3) `prop`: a column containing each category's proportion.
-#' @export
-#' @importFrom magrittr `%>%`
-#' @importFrom rlang abort
-#' @author CLESSN
-#' @examples
-#'
-#' \dontrun{
-#'
-#' # Calculate the proportions of each cylinder configuration
-#' # from mtcars.
-#'
-#' calculate_proportions(mtcars,cyl)
-#' }
-calculate_proportions <- function(data, variable) {
- if (!is.data.frame(data)) {
- rlang::abort("Argument `data` must be a data frame.")
- }
- else {
- D <- data %>%
- dplyr::group_by({
- {
- variable
- }
- }) %>%
- dplyr::summarize(n = dplyr::n()) %>% #category frequencies
- stats::na.omit() %>%
- dplyr::mutate(prop = n / sum(n))
- }
- if (length(table(D[, 1])) == 1) {
- warning(paste0("`", names(D[, 1]), "`", " only has one category."))
- }
- return(D)
-}
-
#' Calculate dictionary expression mentions in a text.
#'
#' This function creates a data.frame which includes one column
diff --git a/R/transform.R b/R/transform.R
deleted file mode 100644
index 0cd9ab1..0000000
--- a/R/transform.R
+++ /dev/null
@@ -1,21 +0,0 @@
-#' Count NA in a vector
-#'
-#' @description
-#' `r lifecycle::badge("experimental")`
-#'
-#'
-#' @param x a vector
-#'
-#' @return number of NA or NaN in `x` (integer)
-#' @export
-#'
-#' @examples
-#' x <- c(4, 6, NA, 3, NaN, 1)
-#' count_na(x)
-#'
-#' z <- c(NA, NaN, "w", "a", "b", NA)
-#' count_na(z)
-#'
-count_na <- function(x){
- return(sum(is.na(x)))
-}
diff --git a/R/visualization.R b/R/visualise.R
similarity index 100%
rename from R/visualization.R
rename to R/visualise.R
diff --git a/R/domestication.R b/R/wrangle.R
similarity index 50%
rename from R/domestication.R
rename to R/wrangle.R
index 6f70061..a00a147 100644
--- a/R/domestication.R
+++ b/R/wrangle.R
@@ -1,4 +1,23 @@
-# Domestication
+#' Count NA in a vector
+#'
+#' @description
+#' `r lifecycle::badge("experimental")`
+#'
+#' @param x a vector
+#'
+#' @return number of NA or NaN in `x` (integer)
+#' @export
+#'
+#' @examples
+#' x <- c(4, 6, NA, 3, NaN, 1)
+#' count_na(x)
+#'
+#' z <- c(NA, NaN, "w", "a", "b", NA)
+#' count_na(z)
+#'
+count_na <- function(x){
+ return(sum(is.na(x)))
+}
#' Normalize a continuous variable between 0 and 1
#'
@@ -22,7 +41,6 @@
#'
#' data_output <- data %>%
#' mutate(across(c(a, b), normalize_min_max))
-
normalize_min_max <- function(x, remove_na = T) {
min <- min(x, na.rm = remove_na)
max <- max(x, na.rm = remove_na)
@@ -58,7 +76,6 @@ normalize_min_max <- function(x, remove_na = T) {
#' new_vector <- reduce_outliers(vector)
#' new_vector
#' hist(new_vector)
-
reduce_outliers <- function(vector) {
q1 <- stats::quantile(vector, 0.25) # identify the first quartile
q3 <- stats::quantile(vector, 0.75) # identify the first quartile
@@ -69,3 +86,52 @@ reduce_outliers <- function(vector) {
vector[vector < lim_min] <- lim_min # same thing with the lower limit
return(vector)
}
+
+#' Calculate the proportion of each category from one variable.
+#'
+#' This function creates a data.frame which includes 3 columns.
+#' 1) a column containing the variable's categories;
+#' 2) a column containing each category's frequency;
+#' 3) a column containing each category's proportion.
+#'
+#' @param data An object of type data.frame.
+#' @param variable The name of the variable from which to calculate
+#' the proportions.
+#'
+#' @return A data.frame which includes 3 columns.
+#' 1) `variable`: a column containing the variable's categories;
+#' 2) `n`: a column containing each category's frequency;
+#' 3) `prop`: a column containing each category's proportion.
+#' @export
+#' @importFrom magrittr `%>%`
+#' @importFrom rlang abort
+#' @author CLESSN
+#' @examples
+#'
+#' \dontrun{
+#'
+#' # Calculate the proportions of each cylinder configuration
+#' # from mtcars.
+#'
+#' calculate_proportions(mtcars,cyl)
+#' }
+calculate_proportions <- function(data, variable) {
+ if (!is.data.frame(data)) {
+ rlang::abort("Argument `data` must be a data frame.")
+ }
+ else {
+ D <- data %>%
+ dplyr::group_by({
+ {
+ variable
+ }
+ }) %>%
+ dplyr::summarize(n = dplyr::n()) %>% #category frequencies
+ stats::na.omit() %>%
+ dplyr::mutate(prop = n / sum(n))
+ }
+ if (length(table(D[, 1])) == 1) {
+ warning(paste0("`", names(D[, 1]), "`", " only has one category."))
+ }
+ return(D)
+}
diff --git a/README.Rmd b/README.Rmd
index 12a4b03..c129cf6 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -41,6 +41,8 @@ remotes::install_github("clessn/clessnverse")
## Examples
+### Wrangle data
+
Normalize a continuous variable between 0 and 1
```{r example}
@@ -58,6 +60,41 @@ data %>%
mutate(across(c(a, b), normalize_min_max))
```
+### Analyse data
+
+```{r}
+run_dictionary(
+ data.frame(colnames(attitude)),
+ text = colnames(attitude),
+ dictionary = quanteda::data_dictionary_LSD2015
+) %>% head()
+```
+
+### Visualise data
+
+```{r}
+
+p <- ggplot2::ggplot(data = ggplot2::mpg) +
+ ggplot2::geom_point(mapping = ggplot2::aes(x = displ, y = cty, colour = class)) +
+ ggplot2::labs(title = "Look at this graph!",
+ subtitle = "What a great theme, eh?",
+ caption = "Data: API Twitter \nCLESSN") +
+ ggplot2::xlab("x axis label") +
+ ggplot2::ylab("y axis label")
+
+p + theme_clean_light()
+p + theme_clean_dark()
+
+p <- ggplot2::ggplot(data = ggplot2::mpg) +
+ ggplot2::geom_point(mapping = ggplot2::aes(x = displ, y = cty, colour = class)) +
+ ggplot2::labs(title = "Look at this graph!",
+ subtitle = "What a great look, eh?",
+ caption = "Data: Twitter API \nCLESSN")
+
+p + scale_discrete_quorum(aesthetics = "colour")
+```
+
+
## Issues and suggestions
You can submit bugs or suggestions in the Issues tab of this repo. To facilitate problem solving, please include a [minimal reproducible example](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html) of the issue.
diff --git a/README.md b/README.md
index 1e51cfb..152b23b 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,8 @@ remotes::install_github("clessn/clessnverse")
## Examples
+### Wrangle data
+
Normalize a continuous variable between 0 and 1
``` r
@@ -61,6 +63,60 @@ data %>%
#> 4 0 0
```
+### Analyse data
+
+``` r
+run_dictionary(
+ data.frame(colnames(attitude)),
+ text = colnames(attitude),
+ dictionary = quanteda::data_dictionary_LSD2015
+) %>% head()
+#> 0.464 sec elapsed
+#> doc_id negative positive neg_positive neg_negative
+#> 1 text1 0 0 0 0
+#> 2 text2 1 0 0 0
+#> 3 text3 0 1 0 0
+#> 4 text4 0 1 0 0
+#> 5 text5 0 0 0 0
+#> 6 text6 1 0 0 0
+```
+
+### Visualise data
+
+``` r
+
+p <- ggplot2::ggplot(data = ggplot2::mpg) +
+ ggplot2::geom_point(mapping = ggplot2::aes(x = displ, y = cty, colour = class)) +
+ ggplot2::labs(title = "Look at this graph!",
+ subtitle = "What a great theme, eh?",
+ caption = "Data: API Twitter \nCLESSN") +
+ ggplot2::xlab("x axis label") +
+ ggplot2::ylab("y axis label")
+
+p + theme_clean_light()
+```
+
+
+
+``` r
+p + theme_clean_dark()
+```
+
+
+
+``` r
+
+p <- ggplot2::ggplot(data = ggplot2::mpg) +
+ ggplot2::geom_point(mapping = ggplot2::aes(x = displ, y = cty, colour = class)) +
+ ggplot2::labs(title = "Look at this graph!",
+ subtitle = "What a great look, eh?",
+ caption = "Data: Twitter API \nCLESSN")
+
+p + scale_discrete_quorum(aesthetics = "colour")
+```
+
+
+
## Issues and suggestions
You can submit bugs or suggestions in the Issues tab of this repo. To
diff --git a/man/calculate_proportions.Rd b/man/calculate_proportions.Rd
index 197434c..a0659d3 100644
--- a/man/calculate_proportions.Rd
+++ b/man/calculate_proportions.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/analysis.R
+% Please edit documentation in R/wrangle.R
\name{calculate_proportions}
\alias{calculate_proportions}
\title{Calculate the proportion of each category from one variable.}
diff --git a/man/count_na.Rd b/man/count_na.Rd
index 30dc851..2df18d9 100644
--- a/man/count_na.Rd
+++ b/man/count_na.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/transform.R
+% Please edit documentation in R/wrangle.R
\name{count_na}
\alias{count_na}
\title{Count NA in a vector}
diff --git a/man/normalize_min_max.Rd b/man/normalize_min_max.Rd
index 990f7d4..d44b888 100644
--- a/man/normalize_min_max.Rd
+++ b/man/normalize_min_max.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/domestication.R
+% Please edit documentation in R/wrangle.R
\name{normalize_min_max}
\alias{normalize_min_max}
\title{Normalize a continuous variable between 0 and 1}
diff --git a/man/reduce_outliers.Rd b/man/reduce_outliers.Rd
index 08d30ee..cc8a605 100644
--- a/man/reduce_outliers.Rd
+++ b/man/reduce_outliers.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/domestication.R
+% Please edit documentation in R/wrangle.R
\name{reduce_outliers}
\alias{reduce_outliers}
\title{Reduce outliers with the interquartile range method}
diff --git a/man/run_dictionary.Rd b/man/run_dictionary.Rd
index dbd407f..9e033c4 100644
--- a/man/run_dictionary.Rd
+++ b/man/run_dictionary.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/analysis.R
+% Please edit documentation in R/analyse.R
\name{run_dictionary}
\alias{run_dictionary}
\title{Calculate dictionary expression mentions in a text.}
diff --git a/man/sample_biased.Rd b/man/sample_biased.Rd
index fcbb0ac..86437d1 100644
--- a/man/sample_biased.Rd
+++ b/man/sample_biased.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/analysis.R
+% Please edit documentation in R/analyse.R
\name{sample_biased}
\alias{sample_biased}
\title{Create samples biased on the categories of one variable}
diff --git a/man/scale.Rd b/man/scale.Rd
index 2523c8b..50adad7 100644
--- a/man/scale.Rd
+++ b/man/scale.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/visualization.R
+% Please edit documentation in R/visualise.R
\name{scale}
\alias{scale}
\alias{scale_discrete_quorum}
diff --git a/man/visualization.Rd b/man/visualization.Rd
index 2126aad..5b28306 100644
--- a/man/visualization.Rd
+++ b/man/visualization.Rd
@@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/visualization.R
+% Please edit documentation in R/visualise.R
\name{visualization}
\alias{theme_clean_light}
\alias{theme_clean_dark}