diff --git a/DESCRIPTION b/DESCRIPTION index 1fa7b46..7cf5478 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,14 @@ Package: growthcleanr Type: Package Title: Data Cleaner for Anthropometric Measurements -Version: 2.0.2 +Version: 2.0.3 Authors@R: c( - person("Daymont","Carrie", email = "cdaymont@pennstatehealth.psu.edu", role = "aut"), + person("Daymont","Carrie", email = "cdaymont@pennstatehealth.psu.edu", role = c("ctb","cre")), person("Grundmeier","Robert", role = "aut"), person("Miller","Jeffrey", role = "aut"), person("Campos","Diego", role = "aut"), person("Chudnov","Dan", role = "ctb"), - person("De los Santos","Hannah", email = "hdelossantos@mitre.org", role = c("ctb","cre")), + person("De los Santos","Hannah", email = "hdelossantos@mitre.org", role = c("ctb")), person("Cao","Lusha", role = "ctb"), person("Silva","Steffani", role = "ctb"), person("Zhang","Hanzhe", role = "ctb"), diff --git a/NEWS.md b/NEWS.md index dd21b01..cfdecca 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,14 @@ +# growthcleanr 2.0.3 - 2022-11-01 + +## Added + +- CRAN release checklist now added under Developer Guidelines vignette (#99) + +## Changed + +- All possible levels for `cleangrowth()` output factor now enumerated +- Updated maintainer to Carrie Daymont + # growthcleanr 2.0.2 - 2022-09-13 ## Added diff --git a/R/growth.R b/R/growth.R index a04d53c..ab47195 100644 --- a/R/growth.R +++ b/R/growth.R @@ -204,7 +204,7 @@ cleangrowth <- function(subjid, # constants for pediatric # enumerate the different exclusion levels - exclude.levels <- c( + exclude.levels.peds <- c( 'Include', 'Unit-Error-High', 'Unit-Error-Low', @@ -235,6 +235,29 @@ cleangrowth <- function(subjid, 'Exclude-Too-Many-Errors-Other-Parameter' ) + exclude.levels.adult <- c( + "Include", + "Exclude-Adult-BIV", + "Exclude-Adult-Hundreds", + "Exclude-Adult-Unit-Errors", + "Exclude-Adult-Transpositions", + "Exclude-Adult-Weight-Cap-Identical", + "Exclude-Adult-Weight-Cap", + "Exclude-Adult-Swapped-Measurements", + "Exclude-Adult-Identical-Same-Day", + "Exclude-Adult-Extraneous-Same-Day", + "Exclude-Adult-Distinct-Pairs", + "Exclude-Adult-Distinct-3-Or-More", + "Exclude-Adult-EWMA-Extreme", + "Exclude-Adult-Distinct-Ordered-Pairs", + "Exclude-Adult-EWMA-Moderate", + "Exclude-Adult-Possibly-Impacted-By-Weight-Cap", + "Exclude-Adult-Distinct-Single", + "Exclude-Adult-Too-Many-Errors" + ) + + exclude.levels <- base::union(exclude.levels.peds, exclude.levels.adult) + # if there's no pediatric data, no need to go through this rigamarole if (nrow(data.all) > 0){ @@ -665,8 +688,7 @@ cleangrowth <- function(subjid, exclude = c(as.character(ret.df$exclude), res$result), mean_sde = c(rep(NA, nrow(ret.df)), res$mean_sde) ) - full_out[, exclude := factor(exclude, levels = unique(c(exclude.levels, - unique(exclude))))] + full_out[, exclude := factor(exclude, levels = exclude.levels)] full_out <- full_out[order(line),] # remove column added for keeping track full_out[, line := NULL] diff --git a/README.Rmd b/README.Rmd index 0e3bed1..e611469 100644 --- a/README.Rmd +++ b/README.Rmd @@ -102,6 +102,7 @@ The rest of this documentation includes: notes and suggestions for running `growthcleanr` with large data sources - [Next steps](https://carriedaymont.github.io/growthcleanr/articles/next-steps.html), notes on potential enhancements to the pediatric and adult algorithms +- [Developer guidelines](https://carriedaymont.github.io/growthcleanr/articles/developer-guidelines.html), advice for contributors to this package, including a CRAN release checklist ## Changes diff --git a/README.md b/README.md index d4e7232..01d3a0c 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,10 @@ The rest of this documentation includes: - [Next steps](https://carriedaymont.github.io/growthcleanr/articles/next-steps.html), notes on potential enhancements to the pediatric and adult algorithms +- [Developer + guidelines](https://carriedaymont.github.io/growthcleanr/articles/developer-guidelines.html), + advice for contributors to this package, including a CRAN release + checklist ## Changes diff --git a/_pkgdown.yml b/_pkgdown.yml index 7934b0f..8a0c688 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -19,6 +19,7 @@ articles: - utilities - large-data-sets - next-steps + - developer-guidelines reference: - title: "Cleaning height and weight observations" diff --git a/cran-comments.md b/cran-comments.md index f05f783..16d8dd9 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,4 +1,4 @@ -# CRAN submission growthcleanr 2.0.2 (1) +# CRAN submission growthcleanr 2.0.3 ## R CMD check results There were no ERRORs, WARNINGs, or NOTEs. @@ -6,8 +6,18 @@ There were no ERRORs, WARNINGs, or NOTEs. ## Downstream dependencies There are currently no downstream dependencies for this package. + + # Previous Submissions +# CRAN submission growthcleanr 2.0.2 (1) + +## R CMD check results +There were no ERRORs, WARNINGs, or NOTEs. + +## Downstream dependencies +There are currently no downstream dependencies for this package. + # CRAN submission growthcleanr 2.0.1 (6) ## R CMD check results diff --git a/docs/404.html b/docs/404.html index 137d06c..29e132b 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@
@@ -78,6 +78,9 @@MIT License - -Copyright (c) 2018-2022 Carrie Daymont - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +YEAR: 2022 +COPYRIGHT HOLDER: Carrie Daymont@@ -118,7 +102,7 @@License
vignettes/adult-algorithm.Rmd
adult-algorithm.Rmd
vignettes/configuration.Rmd
configuration.Rmd
ref.data.path
- defaults to using CDC reference data
from year 2000; supply a file path to use alternate reference data. Note
that when running from an installed growthcleanr
package
-(e.g. having called library(growthcleanr)
), this path does
+(e.g. having called library(growthcleanr)
), this path does
not need to be specified. Developers testing the source code directly
from the source directory will need to specify this as well.
error.load.mincount
- default 2
;
@@ -335,7 +338,7 @@
survey
+smoothed using the svysmooth()
function in the R survey
package to estimate the weight and height SD scores for each day up to
7,305 days, with a bandwidth chosen to balance between over- and
under-fitting, and interpolation between the estimates from this
diff --git a/docs/articles/developer-guidelines.html b/docs/articles/developer-guidelines.html
new file mode 100644
index 0000000..c6854bb
--- /dev/null
+++ b/docs/articles/developer-guidelines.html
@@ -0,0 +1,189 @@
+
+
+
+
+
+
+
+vignettes/developer-guidelines.Rmd
+ developer-guidelines.Rmd
When deploying updates to CRAN, here a few updates that you need to +check off before doing so:
+devtools::document()
+devtools::check()
:
+cran-comments.md
and cannot be fixed, fix them. There
+should be no ERRORs or WARNINGs before submitting to CRAN.README.Rmd
, re-knit to
+README.md
+pkgdown::build_site()
+Developed by Daymont Carrie, Grundmeier Robert, Miller Jeffrey, Campos Diego.
+Site built with pkgdown 2.0.6.
+Developed by Daymont Carrie, Grundmeier Robert, Miller Jeffrey, Campos Diego, De los Santos Hannah.
+Developed by Daymont Carrie, Grundmeier Robert, Miller Jeffrey, Campos Diego.
vignettes/installation.Rmd
installation.Rmd
growthcleanr
has been developed and tested using R
-version 3.6 and more recently version 4+. It should work using R on
-Windows, macOS, or Unix/Linux, although there are some additional platform-specific notes you may wish to review.
You can install the growthcleanr
package directly from
-GitHub using devtools
in the R console with:
To get started with growthcleanr
, install it from
+CRAN:
-install.packages("devtools")
-devtools::install_github("carriedaymont/growthcleanr", ref="main")
Note that ref="main"
is required; the default branch is
-“main”, and must be referred to explicitly.
If you are unable to install devtools
, a similar
-function is available in the remotes
package:
install.packages("growthcleanr")
To install the latest development version from GitHub using
+devtools
:
-install.packages("remotes")
-remotes::install_github("carriedaymont/growthcleanr", ref="main")
Note that ref="main"
is required; the default branch is
-“main”, and must be referred to explicitly.
growthcleanr
itself has several dependencies, so it may
-take a little while to download and install everything on your
-machine.
devtools::install_github("carriedaymont/growthcleanr", ref="main")
+Installing growthcleanr
will install several additional
+packages in turn.
See GitHub and source-level install for +developers for additional details.
If you want to work with and potentially change the
-growthcleanr
code itself, you can download or clone the
-growthcleanr
source code and then install it from source.
-To clone the source using git
:
% git clone https://github.com/carriedaymont/growthcleanr.git
Either way, once you have the growthcleanr
package
-source, open an R session from the growthcleanr
base
-directory. Then install growthcleanr using the R devtools
-package:
You can install the growthcleanr
package directly from
+GitHub using devtools
in the R console with:
+install.packages("devtools")
+devtools::install_github("carriedaymont/growthcleanr", ref="main")
growthcleanr
itself has several dependencies, so it may
+take a little while to download and install everything on your
+machine.
Note that the ref="main"
part is required; the default
+value of ref
refers to a branch name that is not used in
+the growthcleanr
repository, which instead uses a default
+branch called “main
”.
To install a different branch, for example if you want to test a
+branch associated with a merge request, specify the branch name as the
+value of ref
.
If you are unable to install devtools
, a similar
+function is available in the remotes
package:
+install.packages("remotes")
+remotes::install_github("carriedaymont/growthcleanr", ref="main")
If you are developing the growthcleanr
code itself, you
+can download or clone the growthcleanr
source code and then
+install it from source. To clone the source using git
:
% git clone https://github.com/carriedaymont/growthcleanr.git
Once you have the growthcleanr
package source, open an R
+session from the growthcleanr
base directory. Then install
+growthcleanr using the R devtools
package:
devtools::install(".")
You can also install the package from an installation file if one is -obtained.
vignettes/large-data-sets.Rmd
large-data-sets.Rmd
-library(growthcleanr)
+library(growthcleanr)
count <- splitinput(syngrowth, fname = "mydata", fdir = tempdir())
count
## [1] 7
diff --git a/docs/articles/next-steps.html b/docs/articles/next-steps.html
index 3260284..50011d9 100644
--- a/docs/articles/next-steps.html
+++ b/docs/articles/next-steps.html
@@ -33,7 +33,7 @@
vignettes/next-steps.Rmd
next-steps.Rmd
vignettes/output.Rmd
output.Rmd
vignettes/quickstart.Rmd
quickstart.Rmd
To get started with growthcleanr
, the
-growthcleanr
package must be installed. Installing
-growthcleanr
will in turn install several additional
-packages. growthcleanr
is not currently on CRAN.
One option for installation is using the devtools
-package. Install devtools
and growthcleanr
-with the following commands:
growthcleanr
package must be installed. To install the
+latest growthcleanr
release from CRAN:
-install.packages("devtools")
-devtools::install_github("carriedaymont/growthcleanr", ref="main")
If you are unable to install devtools
, a similar
-function is available in the remotes
package:
install.packages("growthcleanr")
To install the latest development version from GitHub using
+devtools
:
-install.packages("remotes")
-remotes::install_github("carriedaymont/growthcleanr", ref="main")
Note that ref="main"
is required whether you use
-devtools
or remotes
. The default
-growthcleanr
code branch is “main”, and this must be
-referred to explicitly.
devtools::install_github("carriedaymont/growthcleanr", ref="main")
+Installing growthcleanr
will install several additional
+packages in turn.
Further installation details and notes can be found under Installation.
For a data.frame object source_data
containing growth
data:
-library(growthcleanr)
+library(growthcleanr)
# prepare data as a data.table
data <- as.data.table(source_data)
diff --git a/docs/articles/usage.html b/docs/articles/usage.html
index db7600a..0876c26 100644
--- a/docs/articles/usage.html
+++ b/docs/articles/usage.html
@@ -33,7 +33,7 @@
vignettes/usage.Rmd
usage.Rmd
library(growthcleanr)
vignettes/utilities.Rmd
utilities.Rmd
Daymont Carrie. Author. +
Daymont Carrie. Contributor, maintainer.
De los Santos Hannah. Contributor, maintainer. +
De los Santos Hannah. Contributor.
Developed by Daymont Carrie, Grundmeier Robert, Miller Jeffrey, Campos Diego, De los Santos Hannah.
+Developed by Daymont Carrie, Grundmeier Robert, Miller Jeffrey, Campos Diego.
R/adjustcarryforward.R
+ Source: R/adjustcarryforward.R
acf_answers.Rd
cleangrowth
.
acf_answers( - subjid, - param, - agedays, - sex, - measurement, - orig.exclude, - sd.recenter = NA, - ewma.exp = -1.5, - ref.data.path = "", - quietly = T -)- -
subjid | -Vector of unique identifiers for each subject in the database. |
-
---|---|
param | -Vector identifying each measurement, may be 'WEIGHTKG', 'HEIGHTCM', or 'LENGTHCM' +
+
+
+
+ Arguments+
|
-
agedays | -Numeric vector containing the age in days at each measurement. |
-
sex | -Vector identifying the gender of the subject, may be 'M', 'm', or 0 for males, vs. 'F', -'f' or 1 for females. |
-
measurement | -Numeric vector containing the actual measurement data. Weight must be in -kilograms (kg), and linear measurements (height vs. length) in centimeters (cm). |
-
orig.exclude | -Vector of exclusion assessment results from cleangrowth() |
-
sd.recenter | -Data frame or table with median SD-scores per day of life |
-
ewma.exp | -Exponent to use for weighting measurements in the exponentially weighted moving +Note: at the moment, all LENGTHCM will be converted to HEIGHTCM. In the future, the algorithm will be updated to consider this difference. + + +Numeric vector containing the age in days at each measurement. Vector identifying the gender of the subject, may be 'M', 'm', or 0 for males, vs. 'F', +'f' or 1 for females. Numeric vector containing the actual measurement data. Weight must be in +kilograms (kg), and linear measurements (height vs. length) in centimeters (cm). Vector of exclusion assessment results from cleangrowth() Data frame or table with median SD-scores per day of life Exponent to use for weighting measurements in the exponentially weighted moving average calculations. Defaults to -1.5. This exponent should be negative in order to weight growth measurements closer to the measurement being evaluated more strongly. Exponents that are further from zero (e.g. -3) will increase the relative influence of measurements close in time to the measurement -being evaluated compared to using the default exponent. |
-
ref.data.path | -Path to reference data. If not supplied, the year 2000 -Centers for Disease Control (CDC) reference data will be used. |
-
quietly | -Determines if function messages are to be displayed and if log files (parallel only) -are to be generated. Defaults to TRUE. |
-
A data frame, containing an index "n" of rows, corresponding to the +being evaluated compared to using the default exponent.
+ + +Path to reference data. If not supplied, the year 2000 +Centers for Disease Control (CDC) reference data will be used.
Determines if function messages are to be displayed and if log files (parallel only) +are to be generated. Defaults to TRUE.
A data frame, containing an index "n" of rows, corresponding to the original order of the input vectors, and "acf_answers", containing the answers on whether a height value should be kept or excluded (returns "Definitely Exclude", "Definitely Include", or "Unknown" for height values, NA for weight values).
+adjustcarryforward
Uses absolute height velocity to identify values
excluded as carried forward values for reinclusion.R/adjustcarryforward.R
+ Source: R/adjustcarryforward.R
adjustcarryforward.Rd
adjustcarryforward( - subjid, - param, - agedays, - sex, - measurement, - orig.exclude, - exclude_opt = 0, - sd.recenter = NA, - ewma.exp = -1.5, - ref.data.path = "", - quietly = T, - minfactor = 0.5, - maxfactor = 2, - banddiff = 3, - banddiff_plus = 5.5, - min_ht.exp_under = 2, - min_ht.exp_over = 0, - max_ht.exp_under = 0.33, - max_ht.exp_over = 1.5 -)- -
Re-evaluated exclusion assessments based on height velocity.
- -+being evaluated compared to using the default exponent. + + +# Run on a small subset of given data -df <- as.data.frame(syngrowth) -df <- df[df$subjid %in% unique(df[, "subjid"])[1:5], ] -clean_df <- cbind(df, - "gcr_result" = cleangrowth(df$subjid, - df$param, - df$agedays, - df$sex, - df$measurement)) - -# Adjust carry forward values in cleaned data -adj_clean <- adjustcarryforward(subjid = clean_df$subjid, - param = clean_df$param, - agedays = clean_df$agedays, - sex = clean_df$sex, - measurement = clean_df$measurement, - orig.exclude = clean_df$gcr_result) -
Path to reference data. If not supplied, the year 2000 +Centers for Disease Control (CDC) reference data will be used.
Determines if function messages are to be displayed and if log files (parallel only) +are to be generated. Defaults to TRUE.
Sweep variable for computing mindiff.next.ht in 15f, default 0.5
Sweep variable for computing maxdiff.next.ht in 15f, default 2
Sweep variable for computing mindiff.next.ht in 15f, default 3
Sweep variable for computing maxdiff.next.ht in 15, default 5.5
Sweep variable for computing ht.exp in 15f, default 2
Sweep variable for computing ht.exp in 15f, default 0
Sweep variable for computing ht.exp in 15f, default 0.33
Sweep variable for computing ht.exp in 15f, default 1.5
Re-evaluated exclusion assessments based on height velocity.
+# Run on a small subset of given data
+df <- as.data.frame(syngrowth)
+df <- df[df$subjid %in% unique(df[, "subjid"])[1:5], ]
+clean_df <- cbind(df,
+ "gcr_result" = cleangrowth(df$subjid,
+ df$param,
+ df$agedays,
+ df$sex,
+ df$measurement))
+
+# Adjust carry forward values in cleaned data
+adj_clean <- adjustcarryforward(subjid = clean_df$subjid,
+ param = clean_df$param,
+ agedays = clean_df$agedays,
+ sex = clean_df$sex,
+ measurement = clean_df$measurement,
+ orig.exclude = clean_df$gcr_result)
+
Developed by Daymont Carrie, Grundmeier Robert, Miller Jeffrey, Campos Diego, De los Santos Hannah.
+Developed by Daymont Carrie, Grundmeier Robert, Miller Jeffrey, Campos Diego.
Clean growth measurements
cleangrowth( - subjid, - param, - agedays, - sex, - measurement, - recover.unit.error = F, - sd.extreme = 25, - z.extreme = 25, - lt3.exclude.mode = "default", - height.tolerance.cm = 2.5, - error.load.mincount = 2, - error.load.threshold = 0.5, - sd.recenter = NA, - sdmedian.filename = "", - sdrecentered.filename = "", - include.carryforward = F, - ewma.exp = -1.5, - ref.data.path = "", - log.path = ".", - parallel = F, - num.batches = NA, - quietly = T, - adult_cutpoint = 20, - weight_cap = Inf, - adult_columns_filename = "" -)- -
subjid | -Vector of unique identifiers for each subject in the database. |
-
---|---|
param | -Vector identifying each measurement, may be 'WEIGHTKG', 'WEIGHTLBS', 'HEIGHTCM', 'HEIGHTIN', or 'LENGTHCM' +
+
+
+
+ Arguments+
|
-
agedays | -Numeric vector containing the age in days at each measurement. |
-
sex | -Vector identifying the gender of the subject, may be 'M', 'm', or 0 for males, vs. 'F', 'f' or 1 for females. |
-
measurement | -Numeric vector containing the actual measurement data. Weight must be in -kilograms (kg), and linear measurements (height vs. length) in centimeters (cm). |
-
recover.unit.error | -Indicates whether the cleaning algorithm should +metric during algorithm calculations. + + +Numeric vector containing the age in days at each measurement. Vector identifying the gender of the subject, may be 'M', 'm', or 0 for males, vs. 'F', 'f' or 1 for females. Numeric vector containing the actual measurement data. Weight must be in +kilograms (kg), and linear measurements (height vs. length) in centimeters (cm). Indicates whether the cleaning algorithm should attempt to identify unit errors (I.e. inches vs. cm, lbs vs. kg). If unit errors are identified, the value will be corrected and retained within the -cleaning algorithm as a valid measurement. Defaults to FALSE. |
-
sd.extreme | -Measurements more than sd.extreme standard deviations from -the mean (either above or below) will be flagged as invalid. Defaults to 25. |
-
z.extreme | -Measurements with an absolute z-score greater than -z.extreme will be flagged as invalid. Defaults to 25. |
-
lt3.exclude.mode | -Determines type of exclusion procedure to use for 1 or 2 measurements of one type without +cleaning algorithm as a valid measurement. Defaults to FALSE. + + +Measurements more than sd.extreme standard deviations from +the mean (either above or below) will be flagged as invalid. Defaults to 25. Measurements with an absolute z-score greater than +z.extreme will be flagged as invalid. Defaults to 25. Determines type of exclusion procedure to use for 1 or 2 measurements of one type without matching same ageday measurements for the other parameter. Options include "default" (standard growthcleanr approach), and "flag.both" (in case of two measurements of one type without matching values for the other parameter, flag both -for exclusion if beyond threshold) |
-
height.tolerance.cm | -maximum decrease in height tolerated for sequential measurements |
-
error.load.mincount | -minimum count of exclusions on parameter before -considering excluding all measurements. Defaults to 2. |
-
error.load.threshold | -threshold of percentage of excluded measurement count to included measurement -count that must be exceeded before excluding all measurements of either parameter. Defaults to 0.5. |
-
sd.recenter | -specifies how to recenter medians. May be a data frame or +for exclusion if beyond threshold) + + +maximum decrease in height tolerated for sequential measurements minimum count of exclusions on parameter before +considering excluding all measurements. Defaults to 2. threshold of percentage of excluded measurement count to included measurement +count that must be exceeded before excluding all measurements of either parameter. Defaults to 0.5. specifies how to recenter medians. May be a data frame or table w/median SD-scores per day of life by gender and parameter, or "NHANES" -or "derive" as a character vector.
If specifying a data set, columns must include param, sex, agedays, and sd.median (referred to elsewhere as "modified Z-score"), and those medians will be used for recentering. A summary of how the NHANES reference medians were derived is -available in README.md. Defaults to NA. |
-
sdmedian.filename | -Name of file to save sd.median data calculated on the input dataset to as CSV. +available in README.md. Defaults to NA. + + +Name of file to save sd.median data calculated on the input dataset to as CSV. Defaults to "", for which this data will not be saved. Use for extracting medians for parallel processing -scenarios other than the built-in parallel option. |
-
sdrecentered.filename | -Name of file to save re-centered data to as CSV. Defaults to "", for which this -data will not be saved. Useful for post-processing and debugging. |
-
include.carryforward | -Determines whether Carry-Forward values are kept in the output. Defaults to False. |
-
ewma.exp | -Exponent to use for weighting measurements in the +scenarios other than the built-in parallel option. + + +Name of file to save re-centered data to as CSV. Defaults to "", for which this +data will not be saved. Useful for post-processing and debugging. Determines whether Carry-Forward values are kept in the output. Defaults to False. Exponent to use for weighting measurements in the exponentially weighted moving average calculations. Defaults to -1.5. This exponent should be negative in order to weight growth measurements closer to the measurement being evaluated more strongly. Exponents that are further from zero (e.g. -3) will increase the relative influence of measurements close in time to the measurement being evaluated compared to -using the default exponent. |
-
ref.data.path | -Path to reference data. If not supplied, the year 2000 -Centers for Disease Control (CDC) reference data will be used. |
-
log.path | -Path to log file output when running in parallel (non-quiet mode). Default is ".". A new -directory will be created if necessary. Set to NA to disable log files. |
-
parallel | -Determines if function runs in parallel. Defaults to FALSE. |
-
num.batches | -Specify the number of batches to run in parallel. Only +using the default exponent. + + +Path to reference data. If not supplied, the year 2000 +Centers for Disease Control (CDC) reference data will be used. Path to log file output when running in parallel (non-quiet mode). Default is NA. A new +directory will be created if necessary. Set to NA to disable log files. Determines if function runs in parallel. Defaults to FALSE. Specify the number of batches to run in parallel. Only applies if parallel is set to TRUE. Defaults to the number of workers -returned by the getDoParWorkers function in the foreach package. |
-
quietly | -Determines if function messages are to be displayed and if log files (parallel only) are to be generated. -Defaults to TRUE |
-
adult_cutpoint | -Number between 18 and 20, describing ages when the +returned by the getDoParWorkers function in the foreach package. + + +Determines if function messages are to be displayed and if log files (parallel only) are to be generated. +Defaults to TRUE Number between 18 and 20, describing ages when the pediatric algorithm should not be applied (< adult_cutpoint), and the adult algorithm should apply (>= adult_cutpoint). Numbers outside this range will be -changed to the closest number within the range. Defaults to 20. |
-
weight_cap | -Positive number, describing a weight cap in kg (rounded to the +changed to the closest number within the range. Defaults to 20. + + +Positive number, describing a weight cap in kg (rounded to the nearest .1, +/- .1) within the adult dataset. If there is no weight cap, set -to Inf. Defaults to Inf. |
-
adult_columns_filename | -Name of file to save original adult data, with additional output columns to +to Inf. Defaults to Inf. + + +Name of file to save original adult data, with additional output columns to as CSV. Defaults to "", for which this data will not be saved. Useful -for post-analysis. For more information on this output, please see README. |
-
Vector of exclusion codes for each of the input measurements.
-Possible values for each code are:
'Include', 'Unit-Error-High', 'Unit-Error-Low', 'Swapped-Measurements', 'Missing',
Vector of exclusion codes for each of the input measurements.
+ + +Possible values for each code are:
'Include', 'Unit-Error-High', 'Unit-Error-Low', 'Swapped-Measurements', 'Missing',
'Exclude-Carried-Forward', 'Exclude-SD-Cutoff', 'Exclude-EWMA-Extreme', 'Exclude-EWMA-Extreme-Pair',
'Exclude-Extraneous-Same-Day',
'Exclude-EWMA-8', 'Exclude-EWMA-9', 'Exclude-EWMA-10', 'Exclude-EWMA-11', 'Exclude-EWMA-12', 'Exclude-EWMA-13', 'Exclude-EWMA-14',
'Exclude-Min-Height-Change', 'Exclude-Max-Height-Change',
'Exclude-Pair-Delta-17', 'Exclude-Pair-Delta-18', 'Exclude-Pair-Delta-19',
'Exclude-Single-Outlier', 'Exclude-Too-Many-Errors', 'Exclude-Too-Many-Errors-Other-Parameter'
+# Run calculation using a small subset of given data -df_stats <- as.data.frame(syngrowth) -df_stats <- df_stats[df_stats$subjid %in% unique(df_stats[, "subjid"])[1:5], ] - -clean_stats <-cleangrowth(subjid = df_stats$subjid, - param = df_stats$param, - agedays = df_stats$agedays, - sex = df_stats$sex, - measurement = df_stats$measurement) - -# Once processed you can filter data based on result value -df_stats <- cbind(df_stats, "clean_result" = clean_stats) -clean_df_stats <- df_stats[df_stats$clean_result == "Include",] - -# Parallel processing: run using 2 cores and batches -clean_stats <- cleangrowth(subjid = df_stats$subjid, - param = df_stats$param, - agedays = df_stats$agedays, - sex = df_stats$sex, - measurement = df_stats$measurement, - parallel = TRUE, - num.batches = 2) -#> [2021-07-06 10:53:04] Writing batch logs to '.'...#> Warning: <anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’#> Warning: <anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’#> Warning: <anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’#> Warning: <anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’
# \donttest{
+# Run calculation using a small subset of given data
+df_stats <- as.data.frame(syngrowth)
+df_stats <- df_stats[df_stats$subjid %in% unique(df_stats[, "subjid"])[1:5], ]
+
+clean_stats <-cleangrowth(subjid = df_stats$subjid,
+ param = df_stats$param,
+ agedays = df_stats$agedays,
+ sex = df_stats$sex,
+ measurement = df_stats$measurement)
+
+# Once processed you can filter data based on result value
+df_stats <- cbind(df_stats, "clean_result" = clean_stats)
+clean_df_stats <- df_stats[df_stats$clean_result == "Include",]
+
+# Parallel processing: run using 2 cores and batches
+clean_stats <- cleangrowth(subjid = df_stats$subjid,
+ param = df_stats$param,
+ agedays = df_stats$agedays,
+ sex = df_stats$sex,
+ measurement = df_stats$measurement,
+ parallel = TRUE,
+ num.batches = 2)
+#> Warning: <anonymous>: ... may be used in an incorrect context: '.fun(piece, ...)'
+#> Warning: <anonymous>: ... may be used in an incorrect context: '.fun(piece, ...)'
+#> Warning: <anonymous>: ... may be used in an incorrect context: '.fun(piece, ...)'
+#> Warning: <anonymous>: ... may be used in an incorrect context: '.fun(piece, ...)'
+# }
+
ewma
calculates the exponentially weighted moving average (EWMA) for a set of numeric observations over time.
ewma(agedays, z, ewma.exp, ewma.adjacent = T)- -
agedays | -Vector of age in days for each z score (potentially transformed to adjust weighting). |
-
---|---|
z | -Input vector of numeric z-score data. |
-
ewma.exp | -Exponent to use for weighting. |
-
ewma.adjacent | -Specify whether EWMA values excluding adjacent measurements should be calculated. Defaults to TRUE. |
-
Data frame with 3 variables:
The first variable (ewma.all) contains the EWMA at observation time +
ewma(agedays, z, ewma.exp, ewma.adjacent = TRUE)
Vector of age in days for each z score (potentially transformed to adjust weighting).
Input vector of numeric z-score data.
Exponent to use for weighting.
Specify whether EWMA values excluding adjacent measurements should be calculated. Defaults to TRUE.
Data frame with 3 variables:
The first variable (ewma.all) contains the EWMA at observation time excluding only the actual observation for that time point.
The second variable (ewma.before) contains the EWMA for each observation excluding both the actual observation and the immediate prior observation.
The third variable (ewma.after) contains the EWMA for each observation excluding both the actual observation and the subsequent observation.
+# Run on 1 subject, 1 type of parameter -df_stats <- as.data.frame(syngrowth) -df_stats <- df_stats[df_stats$subjid == df_stats$subjid[1] & - df_stats$param == "HEIGHTCM", ] - -# Get the uncentered z-scores -measurement_to_z <- read_anthro(cdc.only = TRUE) -sd <- measurement_to_z(df_stats$param, - df_stats$agedays, - df_stats$sex, - df_stats$measurement, - TRUE) - -# Calculate exponentially weighted moving average -e_df <- ewma(df_stats$agedays, sd, ewma.exp = -1.5) -
# Run on 1 subject, 1 type of parameter
+df_stats <- as.data.frame(syngrowth)
+df_stats <- df_stats[df_stats$subjid == df_stats$subjid[1] &
+ df_stats$param == "HEIGHTCM", ]
+
+# Get the uncentered z-scores
+measurement_to_z <- read_anthro(cdc.only = TRUE)
+sd <- measurement_to_z(df_stats$param,
+ df_stats$agedays,
+ df_stats$sex,
+ df_stats$measurement,
+ TRUE)
+
+# Calculate exponentially weighted moving average
+e_df <- ewma(df_stats$agedays, sd, ewma.exp = -1.5)
+
ext_bmiz( - data, - age = "agem", - wt = "wt", - ht = "ht", - bmi = "bmi", - adjust.integer.age = T, - ref.data.path = "" -)+
ext_bmiz(
+ data,
+ age = "agem",
+ wt = "wt",
+ ht = "ht",
+ bmi = "bmi",
+ adjust.integer.age = TRUE,
+ ref.data.path = ""
+)
Input data frame or data table
Name of input column containing subject age in months in quotes, default "agem"
Name of input column containing weight (kg) value in quotes, default "wt"
Name of input column containing height (cm) value in quotes, default "ht"
data | -Input data frame or data table |
-
---|---|
age | -Name of input column containing subject age in months in quotes, default "agem" |
-
wt | -Name of input column containing weight (kg) value in quotes, default "wt" |
-
ht | -Name of input column containing height (cm) value in quotes, default "ht" |
-
bmi | -Name of input column containing calculated BMI in quotes, default "bmi" |
-
adjust.integer.age | -If age inputs are all integer, add 0.5 if TRUE; -default TRUE |
-
ref.data.path | -Path to directory containing reference data |
-
Name of input column containing calculated BMI in quotes, default "bmi"
Expanded data frame containing computed BMI values
-If age inputs are all integer, add 0.5 if TRUE; +default TRUE
Path to directory containing reference data
Expanded data frame containing computed BMI values
+This function should produce output equivalent to the SAS macro provided at https://www.cdc.gov/nccdphp/dnpao/growthcharts/resources/sas.htm
The extended BMIz is the inverse cumulative distribution function (CDF) of @@ -249,58 +184,57 @@
ref.data.path
to adjust the path to this file from your working
directory if necessary.
+ +# Run on a small subset of given data -df <- as.data.frame(syngrowth) -df <- df[df$subjid %in% unique(df[, "subjid"])[1:5], ] -df <- cbind(df, - "gcr_result" = cleangrowth(df$subjid, - df$param, - df$agedays, - df$sex, - df$measurement)) -df_wide <- longwide(df) # convert to wide format for ext_bmiz -df_wide_bmi <- simple_bmi(df_wide) # compute simple BMI - -# Calling the function with default column names -df_bmiz <- ext_bmiz(df_wide_bmi) - -# Specifying different column names; note that quotes are used -dfc <- simple_bmi(df_wide) -colnames(dfc)[colnames(dfc) %in% c("agem", "wt", "ht")] <- - c("agemos", "weightkg", "heightcm") -df_bmiz <- ext_bmiz(dfc, age="agemos", wt="weightkg", ht="heightcm") - -# Disabling conversion of all-integer age in months to (age + 0.5) -dfc <- simple_bmi(df_wide) -df_bmiz <- ext_bmiz(dfc, adjust.integer.age=FALSE) -
# Run on a small subset of given data
+df <- as.data.frame(syngrowth)
+df <- df[df$subjid %in% unique(df[, "subjid"])[1:5], ]
+df <- cbind(df,
+ "gcr_result" = cleangrowth(df$subjid,
+ df$param,
+ df$agedays,
+ df$sex,
+ df$measurement))
+df_wide <- longwide(df) # convert to wide format for ext_bmiz
+df_wide_bmi <- simple_bmi(df_wide) # compute simple BMI
+
+# Calling the function with default column names
+df_bmiz <- ext_bmiz(df_wide_bmi)
+
+# Specifying different column names; note that quotes are used
+dfc <- simple_bmi(df_wide)
+colnames(dfc)[colnames(dfc) %in% c("agem", "wt", "ht")] <-
+ c("agemos", "weightkg", "heightcm")
+df_bmiz <- ext_bmiz(dfc, age="agemos", wt="weightkg", ht="heightcm")
+
+# Disabling conversion of all-integer age in months to (age + 0.5)
+dfc <- simple_bmi(df_wide)
+df_bmiz <- ext_bmiz(dfc, adjust.integer.age=FALSE)
+