forked from tidyverse/dplyr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmutate.Rd
199 lines (175 loc) · 7.06 KB
/
mutate.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mutate.R
\name{mutate}
\alias{mutate}
\alias{mutate.data.frame}
\alias{transmute}
\title{Create, modify, and delete columns}
\usage{
mutate(.data, ...)
\method{mutate}{data.frame}(
.data,
...,
.keep = c("all", "used", "unused", "none"),
.before = NULL,
.after = NULL
)
transmute(.data, ...)
}
\arguments{
\item{.data}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}
\item{...}{<\code{\link[=dplyr_data_masking]{data-masking}}> Name-value pairs.
The name gives the name of the column in the output.
The value can be:
\itemize{
\item A vector of length 1, which will be recycled to the correct length.
\item A vector the same length as the current group (or the whole data frame
if ungrouped).
\item \code{NULL}, to remove the column.
\item A data frame or tibble, to create multiple columns in the output.
}}
\item{.keep}{\Sexpr[results=rd]{lifecycle::badge("experimental")}
This is an experimental argument that allows you to control which columns
from \code{.data} are retained in the output:
\itemize{
\item \code{"all"}, the default, retains all variables.
\item \code{"used"} keeps any variables used to make new variables; it's useful
for checking your work as it displays inputs and outputs side-by-side.
\item \code{"unused"} keeps only existing variables \strong{not} used to make new
variables.
\item \code{"none"}, only keeps grouping keys (like \code{\link[=transmute]{transmute()}}).
}}
\item{.before, .after}{\Sexpr[results=rd]{lifecycle::badge("experimental")}
<\code{\link[=dplyr_tidy_select]{tidy-select}}> Optionally, control where new columns
should appear (the default is to add to the right hand side). See
\code{\link[=relocate]{relocate()}} for more details.}
}
\value{
An object of the same type as \code{.data}. The output has the following
properties:
\itemize{
\item Rows are not affected.
\item Existing columns will be preserved according to the \code{.keep} argument.
New columns will be placed according to the \code{.before} and \code{.after}
arguments. If \code{.keep = "none"} (as in \code{transmute()}), the output order
is determined only by \code{...}, not the order of existing columns.
\item Columns given value \code{NULL} will be removed
\item Groups will be recomputed if a grouping variable is mutated.
\item Data frame attributes are preserved.
}
}
\description{
\code{mutate()} adds new variables and preserves existing ones;
\code{transmute()} adds new variables and drops existing ones.
New variables overwrite existing variables of the same name.
Variables can be removed by setting their value to \code{NULL}.
}
\section{Useful mutate functions}{
\itemize{
\item \code{\link{+}}, \code{\link{-}}, \code{\link[=log]{log()}}, etc., for their usual mathematical meanings
\item \code{\link[=lead]{lead()}}, \code{\link[=lag]{lag()}}
\item \code{\link[=dense_rank]{dense_rank()}}, \code{\link[=min_rank]{min_rank()}}, \code{\link[=percent_rank]{percent_rank()}}, \code{\link[=row_number]{row_number()}},
\code{\link[=cume_dist]{cume_dist()}}, \code{\link[=ntile]{ntile()}}
\item \code{\link[=cumsum]{cumsum()}}, \code{\link[=cummean]{cummean()}}, \code{\link[=cummin]{cummin()}}, \code{\link[=cummax]{cummax()}}, \code{\link[=cumany]{cumany()}}, \code{\link[=cumall]{cumall()}}
\item \code{\link[=na_if]{na_if()}}, \code{\link[=coalesce]{coalesce()}}
\item \code{\link[=if_else]{if_else()}}, \code{\link[=recode]{recode()}}, \code{\link[=case_when]{case_when()}}
}
}
\section{Grouped tibbles}{
Because mutating expressions are computed within groups, they may
yield different results on grouped tibbles. This will be the case
as soon as an aggregating, lagging, or ranking function is
involved. Compare this ungrouped mutate:\preformatted{starwars \%>\%
select(name, mass, species) \%>\%
mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
}
With the grouped equivalent:\preformatted{starwars \%>\%
select(name, mass, species) \%>\%
group_by(species) \%>\%
mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
}
The former normalises \code{mass} by the global average whereas the
latter normalises by the averages within species levels.
}
\section{Methods}{
These function are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.
Methods available in currently loaded packages:
\itemize{
\item \code{mutate()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("mutate")}.
\item \code{transmute()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("transmute")}.
}
}
\examples{
# Newly created variables are available immediately
starwars \%>\%
select(name, mass) \%>\%
mutate(
mass2 = mass * 2,
mass2_squared = mass2 * mass2
)
# As well as adding new variables, you can use mutate() to
# remove variables and modify existing variables.
starwars \%>\%
select(name, height, mass, homeworld) \%>\%
mutate(
mass = NULL,
height = height * 0.0328084 # convert to feet
)
# Use across() with mutate() to apply a transformation
# to multiple columns in a tibble.
starwars \%>\%
select(name, homeworld, species) \%>\%
mutate(across(!name, as.factor))
# see more in ?across
# Window functions are useful for grouped mutates:
starwars \%>\%
select(name, mass, homeworld) \%>\%
group_by(homeworld) \%>\%
mutate(rank = min_rank(desc(mass)))
# see `vignette("window-functions")` for more details
# By default, new columns are placed on the far right.
# Experimental: you can override with `.before` or `.after`
df <- tibble(x = 1, y = 2)
df \%>\% mutate(z = x + y)
df \%>\% mutate(z = x + y, .before = 1)
df \%>\% mutate(z = x + y, .after = x)
# By default, mutate() keeps all columns from the input data.
# Experimental: You can override with `.keep`
df <- tibble(x = 1, y = 2, a = "a", b = "b")
df \%>\% mutate(z = x + y, .keep = "all") # the default
df \%>\% mutate(z = x + y, .keep = "used")
df \%>\% mutate(z = x + y, .keep = "unused")
df \%>\% mutate(z = x + y, .keep = "none") # same as transmute()
# Grouping ----------------------------------------
# The mutate operation may yield different results on grouped
# tibbles because the expressions are computed within groups.
# The following normalises `mass` by the global average:
starwars \%>\%
select(name, mass, species) \%>\%
mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
# Whereas this normalises `mass` by the averages within species
# levels:
starwars \%>\%
select(name, mass, species) \%>\%
group_by(species) \%>\%
mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
# Indirection ----------------------------------------
# Refer to column names stored as strings with the `.data` pronoun:
vars <- c("mass", "height")
mutate(starwars, prod = .data[[vars[[1]]]] * .data[[vars[[2]]]])
# Learn more in ?dplyr_data_masking
}
\seealso{
Other single table verbs:
\code{\link{arrange}()},
\code{\link{filter}()},
\code{\link{rename}()},
\code{\link{select}()},
\code{\link{slice}()},
\code{\link{summarise}()}
}
\concept{single table verbs}