forked from IndrajeetPatil/ggstatsplot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrouped_ggscatterstats.Rd
339 lines (282 loc) · 14.8 KB
/
grouped_ggscatterstats.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/grouped_ggscatterstats.R
\name{grouped_ggscatterstats}
\alias{grouped_ggscatterstats}
\title{Scatterplot with marginal distributions for all levels of a grouping
variable}
\usage{
grouped_ggscatterstats(data, x, y, type = "pearson", conf.level = 0.95,
bf.prior = 0.707, bf.message = TRUE, label.var = NULL,
label.expression = NULL, grouping.var, title.prefix = NULL,
xlab = NULL, ylab = NULL, method = "lm", method.args = list(),
formula = y ~ x, point.color = "black", point.size = 3,
point.alpha = 0.4, line.size = 1.5, point.width.jitter = 0,
point.height.jitter = 0, line.color = "blue", marginal = TRUE,
marginal.type = "histogram", marginal.size = 5, margins = c("both",
"x", "y"), package = "wesanderson", palette = "Royal1",
direction = 1, xfill = "#009E73", yfill = "#D55E00", xalpha = 1,
yalpha = 1, xsize = 0.7, ysize = 0.7, centrality.para = NULL,
results.subtitle = TRUE, stat.title = NULL, caption = NULL,
subtitle = NULL, nboot = 100, beta = 0.1, k = 2,
axes.range.restrict = FALSE, ggtheme = ggplot2::theme_bw(),
ggstatsplot.layer = TRUE, ggplot.component = NULL, return = "plot",
messages = TRUE, ...)
}
\arguments{
\item{data}{A dataframe (or a tibble) from which variables specified are to
be taken. A matrix or tables will \strong{not} be accepted.}
\item{x}{The column in \code{data} containing the explanatory variable to be
plotted on the x axis. Can be entered either as
a character string (e.g., \code{"x"}) or as a bare expression (e.g, \code{x}).}
\item{y}{The column in \code{data} containing the response (outcome) variable to
be plotted on the y axis. Can be entered either as
a character string (e.g., \code{"y"}) or as a bare expression (e.g, \code{y}).}
\item{type}{Type of association between paired samples required
("\code{"parametric"}: Pearson's product moment correlation coefficient" or
"\code{"nonparametric"}: Spearman's rho" or "\code{"robust"}: percentage bend
correlation coefficient" or "\code{"bayes"}: Bayes Factor for Pearson's \emph{r}").
Corresponding abbreviations are also accepted: \code{"p"} (for
parametric/pearson's), \code{"np"} (nonparametric/spearman), \code{"r"} (robust),
\code{"bf"} (for bayes factor), resp.}
\item{conf.level}{Scalar between 0 and 1. If unspecified, the defaults return
\code{95\%} lower and upper confidence intervals (\code{0.95}).}
\item{bf.prior}{A number between 0.5 and 2 (default \code{0.707}), the prior width
to use in calculating Bayes factors.}
\item{bf.message}{Logical that decides whether to display Bayes Factor in
favor of the \emph{null} hypothesis. This argument is relevant only \strong{for
parametric test} (Default: \code{TRUE}).}
\item{label.var}{Variable to use for points labels. Can be entered either as
a character string (e.g., \code{"var1"}) or as a bare expression (e.g, \code{var1}).}
\item{label.expression}{An expression evaluating to a logical vector that
determines the subset of data points to label. This argument can be entered
either as a character string (e.g., \code{"y < 4 & z < 20"}) or as a bare
expression (e.g., \code{y < 4 & z < 20}).}
\item{grouping.var}{A single grouping variable (can be entered either as a
bare name \code{x} or as a string \code{"x"}).}
\item{title.prefix}{Character string specifying the prefix text for the fixed
plot title (name of each factor level) (Default: \code{NULL}). If \code{NULL}, the
variable name entered for \code{grouping.var} will be used.}
\item{xlab}{Labels for \code{x} and \code{y} axis variables. If \code{NULL} (default),
variable names for \code{x} and \code{y} will be used.}
\item{ylab}{Labels for \code{x} and \code{y} axis variables. If \code{NULL} (default),
variable names for \code{x} and \code{y} will be used.}
\item{method}{Smoothing method (function) to use, accepts either a character vector,
e.g. \code{"auto"}, \code{"lm"}, \code{"glm"}, \code{"gam"}, \code{"loess"} or a function, e.g.
\code{MASS::rlm} or \code{mgcv::gam}, \code{stats::lm}, or \code{stats::loess}.
For \code{method = "auto"} the smoothing method is chosen based on the
size of the largest group (across all panels). \code{\link[=loess]{loess()}} is
used for less than 1,000 observations; otherwise \code{\link[mgcv:gam]{mgcv::gam()}} is
used with \code{formula = y ~ s(x, bs = "cs")}. Somewhat anecdotally,
\code{loess} gives a better appearance, but is \eqn{O(N^{2})}{O(N^2)} in memory,
so does not work for larger datasets.
If you have fewer than 1,000 observations but want to use the same \code{gam()}
model that \code{method = "auto"} would use, then set
\code{method = "gam", formula = y ~ s(x, bs = "cs")}.}
\item{method.args}{List of additional arguments passed on to the modelling
function defined by \code{method}.}
\item{formula}{Formula to use in smoothing function, eg. \code{y ~ x},
\code{y ~ poly(x, 2)}, \code{y ~ log(x)}}
\item{point.color}{Aesthetics specifying geom point
(defaults: \code{point.color = "black"}, \code{point.size = 3},\code{point.alpha = 0.4}).}
\item{point.size}{Aesthetics specifying geom point
(defaults: \code{point.color = "black"}, \code{point.size = 3},\code{point.alpha = 0.4}).}
\item{point.alpha}{Aesthetics specifying geom point
(defaults: \code{point.color = "black"}, \code{point.size = 3},\code{point.alpha = 0.4}).}
\item{line.size}{Size for the regression line.}
\item{point.width.jitter}{Degree of jitter in \code{x} and \code{y}
direction, respectively. Defaults to \code{0} (0%) of the resolution of the
data.}
\item{point.height.jitter}{Degree of jitter in \code{x} and \code{y}
direction, respectively. Defaults to \code{0} (0%) of the resolution of the
data.}
\item{line.color}{color for the regression line.}
\item{marginal}{Decides whether \code{ggExtra::ggMarginal()} plots will be
displayed; the default is \code{TRUE}.}
\item{marginal.type}{Type of marginal distribution to be plotted on the axes
(\code{"histogram"}, \code{"boxplot"}, \code{"density"}, \code{"violin"}, \code{"densigram"}).}
\item{marginal.size}{Integer describing the relative size of the marginal
plots compared to the main plot. A size of \code{5} means that the main plot is
5x wider and 5x taller than the marginal plots.}
\item{margins}{Character describing along which margins to show the plots.
Any of the following arguments are accepted: \code{"both"}, \code{"x"}, \code{"y"}.}
\item{package}{Name of package from which the palette is desired as string
or symbol.}
\item{palette}{Name of palette as string or symbol.}
\item{direction}{Either \code{1} or \code{-1}. If \code{-1} the palette will be reversed.}
\item{xfill}{Character describing color fill for \code{x} and \code{y} axes
marginal distributions (default: \code{"#009E73"} (for \code{x}) and \code{"#D55E00"} (for
\code{y})). If set to \code{NULL}, manual specification of colors will be turned off
and 2 colors from the specified \code{palette} from \code{package} will be selected.}
\item{yfill}{Character describing color fill for \code{x} and \code{y} axes
marginal distributions (default: \code{"#009E73"} (for \code{x}) and \code{"#D55E00"} (for
\code{y})). If set to \code{NULL}, manual specification of colors will be turned off
and 2 colors from the specified \code{palette} from \code{package} will be selected.}
\item{xalpha}{Numeric deciding transparency levels for the marginal
distributions. Any numbers from \code{0} (transparent) to \code{1} (opaque). The
default is \code{1} for both axes.}
\item{yalpha}{Numeric deciding transparency levels for the marginal
distributions. Any numbers from \code{0} (transparent) to \code{1} (opaque). The
default is \code{1} for both axes.}
\item{xsize}{Size for the marginal distribution boundaries (Default:
\code{0.7}).}
\item{ysize}{Size for the marginal distribution boundaries (Default:
\code{0.7}).}
\item{centrality.para}{Decides \emph{which} measure of central tendency (\code{"mean"}
or \code{"median"}) is to be displayed as vertical (for \code{x}) and horizontal (for
\code{y}) lines.}
\item{results.subtitle}{Decides whether the results of statistical tests are
to be displayed as a subtitle (Default: \code{TRUE}). If set to \code{FALSE}, only
the plot will be returned.}
\item{stat.title}{A character describing the test being run, which will be
added as a prefix in the subtitle. The default is \code{NULL}. An example of a
\code{stat.title} argument will be something like \code{"Student's t-test: "}.}
\item{caption}{The text for the plot caption.}
\item{subtitle}{The text for the plot subtitle. Will work only if
\code{results.subtitle = FALSE}.}
\item{nboot}{Number of bootstrap samples for computing confidence interval
for the effect size (Default: \code{100}).}
\item{beta}{bending constant (Default: \code{0.1}). For more, see \code{?WRS2::pbcor}.}
\item{k}{Number of digits after decimal point (should be an integer)
(Default: \code{k = 2}).}
\item{axes.range.restrict}{Logical that decides whether to restrict the axes
values ranges to \code{min} and \code{max} values of the axes variables (Default:
\code{FALSE}), only relevant for functions where axes variables are of numeric
type.}
\item{ggtheme}{A function, \code{ggplot2} theme name. Default value is
\code{ggplot2::theme_bw()}. Any of the \code{ggplot2} themes, or themes from
extension packages are allowed (e.g., \code{ggthemes::theme_fivethirtyeight()},
\code{hrbrthemes::theme_ipsum_ps()}, etc.).}
\item{ggstatsplot.layer}{Logical that decides whether \code{theme_ggstatsplot}
theme elements are to be displayed along with the selected \code{ggtheme}
(Default: \code{TRUE}).}
\item{ggplot.component}{A \code{ggplot} component to be added to the plot prepared
by \code{ggstatsplot}. This argument is primarily helpful for \code{grouped_} variant
of the current function. Default is \code{NULL}. The argument should be entered
as a function. If the given function has an argument \code{axes.range.restrict}
and if it has been set to \code{TRUE}, the added ggplot component \emph{might} not
work as expected.}
\item{return}{Character that describes what is to be returned: can be
\code{"plot"} (default) or \code{"subtitle"} or \code{"caption"}. Setting this to
\code{"subtitle"} will return the expression containing statistical results,
which will be a \code{NULL} if you set \code{results.subtitle = FALSE}. Setting this
to \code{"caption"} will return the expression containing details about Bayes
Factor analysis, but valid only when \code{type = "p"} and \code{bf.message = TRUE},
otherwise this will return a \code{NULL}.}
\item{messages}{Decides whether messages references, notes, and warnings are
to be displayed (Default: \code{TRUE}).}
\item{...}{Arguments passed on to \code{combine_plots}
\describe{
\item{title.text}{String or plotmath expression to be drawn as title for the
\emph{combined plot}.}
\item{title.color}{Text color for title.}
\item{title.size}{Point size of title text.}
\item{title.vjust}{Vertical justification for title. Default = \code{0.5}
(centered on \code{y}). \code{0} = baseline at \code{y}, \code{1} = ascender at \code{y}.}
\item{title.hjust}{Horizontal justification for title. Default = \code{0.5}
(centered on \code{x}). \code{0} = flush-left at x, \code{1} = flush-right.}
\item{title.fontface}{The font face (\code{"plain"}, \code{"bold"} (default),
\code{"italic"}, \code{"bold.italic"}) for title.}
\item{caption.text}{String or plotmath expression to be drawn as the caption
for the \emph{combined plot}.}
\item{caption.color}{Text color for caption.}
\item{caption.size}{Point size of title text.}
\item{caption.vjust}{Vertical justification for caption. Default = \code{0.5}
(centered on y). \code{0} = baseline at y, \code{1} = ascender at y.}
\item{caption.hjust}{Horizontal justification for caption. Default = \code{0.5}
(centered on x). \code{0} = flush-left at x, \code{1} = flush-right.}
\item{caption.fontface}{The font face (\code{"plain"} (default), \code{"bold"},
\code{"italic"}, \code{"bold.italic"}) for caption.}
\item{sub.text}{The label with which the \emph{combined plot} should be annotated.
Can be a plotmath expression.}
\item{sub.color}{Text color for annotation label (Default: \code{"black"}).}
\item{sub.size}{Point size of annotation text (Default: \code{12}).}
\item{sub.x}{The x position of annotation label (Default: \code{0.5}).}
\item{sub.y}{The y position of annotation label (Default: \code{0.5}).}
\item{sub.hjust}{Horizontal justification for annotation label (Default:
\code{0.5}).}
\item{sub.vjust}{Vertical justification for annotation label (Default:
\code{0.5}).}
\item{sub.vpadding}{Vertical padding. The total vertical space added to the
label, given in grid units. By default, this is added equally above and
below the label. However, by changing the y and vjust parameters, this can
be changed (Default: \code{grid::unit(1, "lines")}).}
\item{sub.fontface}{The font face (\code{"plain"} (default), \code{"bold"}, \code{"italic"},
\code{"bold.italic"}) for the annotation label.}
\item{sub.angle}{Angle at which annotation label is to be drawn (Default:
\code{0}).}
\item{sub.lineheight}{Line height of annotation label.}
\item{title.caption.rel.heights}{Numerical vector of relative columns heights
while combining (title, plot, caption).}
\item{title.rel.heights}{Numerical vector of relative columns heights while
combining (title, plot).}
\item{caption.rel.heights}{Numerical vector of relative columns heights while
combining (plot, caption).}
}}
}
\description{
Grouped scatterplots from \code{ggplot2} combined with marginal
histograms/boxplots/density plots with statistical details added as a
subtitle.
}
\examples{
\donttest{
# to ensure reproducibility
set.seed(123)
# basic function call
ggstatsplot::grouped_ggscatterstats(
data = dplyr::filter(
ggstatsplot::movies_long,
genre == "Comedy" |
genre == "Drama"
),
x = length,
y = rating,
method = "lm",
formula = y ~ x + I(x^3),
grouping.var = genre
)
# using labeling
# (also show how to modify basic plot from within function call)
ggstatsplot::grouped_ggscatterstats(
data = dplyr::filter(ggplot2::mpg, cyl != 5),
x = displ,
y = hwy,
grouping.var = cyl,
title.prefix = "Cylinder count",
type = "robust",
label.var = manufacturer,
label.expression = hwy > 25 & displ > 2.5,
xfill = NULL,
ggplot.component = ggplot2::scale_y_continuous(sec.axis = ggplot2::dup_axis()),
package = "yarrr",
palette = "appletv",
messages = FALSE
)
# labeling without expression
ggstatsplot::grouped_ggscatterstats(
data = dplyr::filter(
.data = ggstatsplot::movies_long,
rating == 7,
genre \%in\% c("Drama", "Comedy")
),
x = budget,
y = length,
grouping.var = genre,
bf.message = FALSE,
label.var = "title",
marginal = FALSE,
title.prefix = "Genre",
caption.text = "All movies have IMDB rating equal to 7."
)
}
}
\references{
\url{https://indrajeetpatil.github.io/ggstatsplot/articles/web_only/ggscatterstats.html}
}
\seealso{
\code{\link{ggscatterstats}}, \code{\link{ggcorrmat}},
\code{\link{grouped_ggcorrmat}}
}
\author{
Indrajeet Patil, Chuck Powell
}