Learning R Language
library(dplyr)
data(gapminder)
numeric: int and floats
character: char or strings
logical: booleans
apples <- 5
oranges <- 4
my_string <- "Teste R"
fruits <- apples + oranges
logica <- TRUE
str(variable)
glimpse(dataset)
class(my_numeric)
unique(cars$ncyl)
vetor <- c(1, 2, 0, -5, 100)
names(vetor) = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
nomes <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")
names(vetor) <- nomes
vetor1 <- c(1, 2, 3, 4, 5)
vetor2 <- c(0, 1, 3, -1, 10)
vetor_soma <- vetor1 + vetor2
vetor2_soma <- sum(vetor2)
vetor_soma <- sum(vetor1, vetor2)
vetor3 <- vetor2[3]
vetor4 <- vetor1[c(1:3)]
poker_midweek <- poker_vector[c(2:4)]
vetor3 <- vetor["Monday"]
vetor_comparison <- vetor > 2
vetor_comparado <- vetor[vetor_comparison]
media <- mean(vetor)
matriz = matrix(1:9, nrow = 3, byrow = TRUE)
star_wars_matrix <- matrix(box_office, nrow = 3, byrow = TRUE)
survey_vector <- c("M", "F", "F", "M", "M")
factor_survey_vector <- factor(survey_vector)
new_order_data <- factor(factor_data,levels = c("East","West","North"))
Levels may also be used to exhibit the unique cathegories of a variable (ex: levels(sex) >> male, female )
levels(factor_survey_vector) <- c("Female", "Male")
temperature_vector <- c("High", "Low", "High","Low", "Medium")
factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))
subset(df, subset = column1 > 1)
Lists can be constructed by list() function, with any elements, such as strings, dataframes, vectors, matrixes, numbers, etc.
lista <- list("a", "b", 1, 7, "dsfdsdf")
lista2 <- list(1, "a", vetor1, df, matriz1)
lista <- list(vec = vetor1, mat = matriz1, df=my_df)
lista2 [[3]] [2] or
lista2$vetor1[2] or
lista1 <- c(lista2, ano=1980)
filter(people, hair_color == "none", eye_color == "black")
filter(cars, ncyl %in% c(4, 6, 8))
filter(MedGPA, GPA >= 3.375 & GPA <= 3.77)
use filter to select values from a column, pipe this selection into a dataset and then apply to a new dataset
filtered_dataset <- dataset %>%
filter(coluna == "blue")
mutate(num_char_cat = ifelse(num_char < med_num_char, "below median", "at or above median"))
gap_asia <- gap2007 %>%
filter(continent=="Asia") %>%
mutate(is_outlier = (lifeExp <50))
Creating a new variable 'noise' filled with values of rnorm() function and inserting into 'mario_kart' dataset
rnorm(N) generates a vector of N pseudo-random normals with mean 0 and variance 1. N had to be the number of observations (lines)
mario_kart_noisy <- mario_kart %>% mutate(noise = rnorm(141))
table(df$a, df$b)
comics <- comics %>%
filter(align != "Reformed Criminals") %>%
droplevels()
gap2007 %>%
group_by(continent) %>%
summarize(mean(lifeExp), median(lifeExp))