diff --git a/DESCRIPTION b/DESCRIPTION index 1fa7b46..7cf5478 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,14 @@ Package: growthcleanr Type: Package Title: Data Cleaner for Anthropometric Measurements -Version: 2.0.2 +Version: 2.0.3 Authors@R: c( - person("Daymont","Carrie", email = "cdaymont@pennstatehealth.psu.edu", role = "aut"), + person("Daymont","Carrie", email = "cdaymont@pennstatehealth.psu.edu", role = c("ctb","cre")), person("Grundmeier","Robert", role = "aut"), person("Miller","Jeffrey", role = "aut"), person("Campos","Diego", role = "aut"), person("Chudnov","Dan", role = "ctb"), - person("De los Santos","Hannah", email = "hdelossantos@mitre.org", role = c("ctb","cre")), + person("De los Santos","Hannah", email = "hdelossantos@mitre.org", role = c("ctb")), person("Cao","Lusha", role = "ctb"), person("Silva","Steffani", role = "ctb"), person("Zhang","Hanzhe", role = "ctb"), diff --git a/NEWS.md b/NEWS.md index dd21b01..cfdecca 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,14 @@ +# growthcleanr 2.0.3 - 2022-11-01 + +## Added + +- CRAN release checklist now added under Developer Guidelines vignette (#99) + +## Changed + +- All possible levels for `cleangrowth()` output factor now enumerated +- Updated maintainer to Carrie Daymont + # growthcleanr 2.0.2 - 2022-09-13 ## Added diff --git a/R/growth.R b/R/growth.R index a04d53c..ab47195 100644 --- a/R/growth.R +++ b/R/growth.R @@ -204,7 +204,7 @@ cleangrowth <- function(subjid, # constants for pediatric # enumerate the different exclusion levels - exclude.levels <- c( + exclude.levels.peds <- c( 'Include', 'Unit-Error-High', 'Unit-Error-Low', @@ -235,6 +235,29 @@ cleangrowth <- function(subjid, 'Exclude-Too-Many-Errors-Other-Parameter' ) + exclude.levels.adult <- c( + "Include", + "Exclude-Adult-BIV", + "Exclude-Adult-Hundreds", + "Exclude-Adult-Unit-Errors", + "Exclude-Adult-Transpositions", + "Exclude-Adult-Weight-Cap-Identical", + "Exclude-Adult-Weight-Cap", + "Exclude-Adult-Swapped-Measurements", + "Exclude-Adult-Identical-Same-Day", + "Exclude-Adult-Extraneous-Same-Day", + "Exclude-Adult-Distinct-Pairs", + "Exclude-Adult-Distinct-3-Or-More", + "Exclude-Adult-EWMA-Extreme", + "Exclude-Adult-Distinct-Ordered-Pairs", + "Exclude-Adult-EWMA-Moderate", + "Exclude-Adult-Possibly-Impacted-By-Weight-Cap", + "Exclude-Adult-Distinct-Single", + "Exclude-Adult-Too-Many-Errors" + ) + + exclude.levels <- base::union(exclude.levels.peds, exclude.levels.adult) + # if there's no pediatric data, no need to go through this rigamarole if (nrow(data.all) > 0){ @@ -665,8 +688,7 @@ cleangrowth <- function(subjid, exclude = c(as.character(ret.df$exclude), res$result), mean_sde = c(rep(NA, nrow(ret.df)), res$mean_sde) ) - full_out[, exclude := factor(exclude, levels = unique(c(exclude.levels, - unique(exclude))))] + full_out[, exclude := factor(exclude, levels = exclude.levels)] full_out <- full_out[order(line),] # remove column added for keeping track full_out[, line := NULL] diff --git a/README.Rmd b/README.Rmd index 0e3bed1..e611469 100644 --- a/README.Rmd +++ b/README.Rmd @@ -102,6 +102,7 @@ The rest of this documentation includes: notes and suggestions for running `growthcleanr` with large data sources - [Next steps](https://carriedaymont.github.io/growthcleanr/articles/next-steps.html), notes on potential enhancements to the pediatric and adult algorithms +- [Developer guidelines](https://carriedaymont.github.io/growthcleanr/articles/developer-guidelines.html), advice for contributors to this package, including a CRAN release checklist ## Changes diff --git a/README.md b/README.md index d4e7232..01d3a0c 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,10 @@ The rest of this documentation includes: - [Next steps](https://carriedaymont.github.io/growthcleanr/articles/next-steps.html), notes on potential enhancements to the pediatric and adult algorithms +- [Developer + guidelines](https://carriedaymont.github.io/growthcleanr/articles/developer-guidelines.html), + advice for contributors to this package, including a CRAN release + checklist ## Changes diff --git a/_pkgdown.yml b/_pkgdown.yml index 7934b0f..8a0c688 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -19,6 +19,7 @@ articles: - utilities - large-data-sets - next-steps + - developer-guidelines reference: - title: "Cleaning height and weight observations" diff --git a/cran-comments.md b/cran-comments.md index f05f783..16d8dd9 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,4 +1,4 @@ -# CRAN submission growthcleanr 2.0.2 (1) +# CRAN submission growthcleanr 2.0.3 ## R CMD check results There were no ERRORs, WARNINGs, or NOTEs. @@ -6,8 +6,18 @@ There were no ERRORs, WARNINGs, or NOTEs. ## Downstream dependencies There are currently no downstream dependencies for this package. + + # Previous Submissions +# CRAN submission growthcleanr 2.0.2 (1) + +## R CMD check results +There were no ERRORs, WARNINGs, or NOTEs. + +## Downstream dependencies +There are currently no downstream dependencies for this package. + # CRAN submission growthcleanr 2.0.1 (6) ## R CMD check results diff --git a/docs/404.html b/docs/404.html index 137d06c..29e132b 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ growthcleanr - 2.0.1 + 2.0.3 @@ -78,6 +78,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • @@ -125,7 +128,7 @@

    Page not found (404)

  • Reference @@ -84,27 +87,8 @@

    License

    -
    MIT License
    -
    -Copyright (c) 2018-2022 Carrie Daymont
    -
    -Permission is hereby granted, free of charge, to any person obtaining a copy
    -of this software and associated documentation files (the "Software"), to deal
    -in the Software without restriction, including without limitation the rights
    -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    -copies of the Software, and to permit persons to whom the Software is
    -furnished to do so, subject to the following conditions:
    -
    -The above copyright notice and this permission notice shall be included in all
    -copies or substantial portions of the Software.
    -
    -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    -SOFTWARE.
    +
    YEAR: 2022
    +COPYRIGHT HOLDER: Carrie Daymont
     
    @@ -118,7 +102,7 @@

    License

  • @@ -110,7 +113,7 @@ @@ -79,6 +79,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • @@ -110,7 +113,7 @@
  • error.load.mincount - default 2; @@ -335,7 +338,7 @@

    Derivation processsurvey +smoothed using the svysmooth() function in the R survey package to estimate the weight and height SD scores for each day up to 7,305 days, with a bandwidth chosen to balance between over- and under-fitting, and interpolation between the estimates from this diff --git a/docs/articles/developer-guidelines.html b/docs/articles/developer-guidelines.html new file mode 100644 index 0000000..c6854bb --- /dev/null +++ b/docs/articles/developer-guidelines.html @@ -0,0 +1,189 @@ + + + + + + + +Developer guidelines • growthcleanr + + + + + + + + + + + + +
    +
    + + + + +
    +
    + + + + +
    +

    CRAN Release Checklist +

    +

    When deploying updates to CRAN, here a few updates that you need to +check off before doing so:

    +
      +
    • Update version number in DESCRIPTION in format major.minor.patch +(e.g. 1.2.1)
    • +
    • Update package documentation with +devtools::document() +
    • +
    • Run devtools::check(): +
        +
      • If any problems come up that have not previously been documented in +cran-comments.md and cannot be fixed, fix them. There +should be no ERRORs or WARNINGs before submitting to CRAN.
      • +
      • Look through past CRAN submissions to make sure you haven’t +reintroduced a bug/note that has been addressed in a previous +version
      • +
      +
    • +
    • If you have made any changes to README.Rmd, re-knit to +README.md +
    • +
    • Update pkgdown site with pkgdown::build_site() +
    • +
    • Update NEWS.md with version updates
    • +
    • Update cran-comments.md in format
    • +
    • After the branch is merged, create a GitHub release
    • +
    • After the branch is merged, submit to CRAN with previously written +comments!
    • +
    +
    +
    + + + +
    + + + +
    + +
    +

    +

    Site built with pkgdown 2.0.6.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/articles/index.html b/docs/articles/index.html index 927b54f..1ea1d77 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3 @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -111,13 +114,15 @@

    Advanced

    Next steps
    +
    Developer guidelines
    +
  • @@ -110,7 +113,7 @@ +

    To install the latest development version from GitHub using +devtools:

    -install.packages("remotes")
    -remotes::install_github("carriedaymont/growthcleanr", ref="main")
    -

    Note that ref="main" is required; the default branch is -“main”, and must be referred to explicitly.

    -

    growthcleanr itself has several dependencies, so it may -take a little while to download and install everything on your -machine.

    +devtools::install_github("carriedaymont/growthcleanr", ref="main") +

    Installing growthcleanr will install several additional +packages in turn.

    +

    See GitHub and source-level install for +developers for additional details.

    Optional packages

    @@ -247,21 +245,39 @@

    Docker

    -

    Source-level install for developers +

    +GitHub and source-level install for +developers

    -

    If you want to work with and potentially change the -growthcleanr code itself, you can download or clone the -growthcleanr source code and then install it from source. -To clone the source using git:

    -
    % git clone https://github.com/carriedaymont/growthcleanr.git
    -

    Either way, once you have the growthcleanr package -source, open an R session from the growthcleanr base -directory. Then install growthcleanr using the R devtools -package:

    +

    You can install the growthcleanr package directly from +GitHub using devtools in the R console with:

    +
    +install.packages("devtools")
    +devtools::install_github("carriedaymont/growthcleanr", ref="main")
    +

    growthcleanr itself has several dependencies, so it may +take a little while to download and install everything on your +machine.

    +

    Note that the ref="main" part is required; the default +value of ref refers to a branch name that is not used in +the growthcleanr repository, which instead uses a default +branch called “main”.

    +

    To install a different branch, for example if you want to test a +branch associated with a merge request, specify the branch name as the +value of ref.

    +

    If you are unable to install devtools, a similar +function is available in the remotes package:

    +install.packages("remotes")
    +remotes::install_github("carriedaymont/growthcleanr", ref="main")
    +

    If you are developing the growthcleanr code itself, you +can download or clone the growthcleanr source code and then +install it from source. To clone the source using git:

    +
    % git clone https://github.com/carriedaymont/growthcleanr.git
    +

    Once you have the growthcleanr package source, open an R +session from the growthcleanr base directory. Then install +growthcleanr using the R devtools package:

    +
     devtools::install(".")
    -

    You can also install the package from an installation file if one is -obtained.

    diff --git a/docs/articles/large-data-sets.html b/docs/articles/large-data-sets.html index af80168..19a5bd0 100644 --- a/docs/articles/large-data-sets.html +++ b/docs/articles/large-data-sets.html @@ -33,7 +33,7 @@ growthcleanr - 2.0.1 + 2.0.3 @@ -79,6 +79,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • @@ -110,7 +113,7 @@ @@ -79,6 +79,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • @@ -110,7 +113,7 @@ @@ -79,6 +79,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • @@ -110,7 +113,7 @@ @@ -79,6 +79,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • @@ -110,7 +113,7 @@ +

    To install the latest development version from GitHub using +devtools:

    -install.packages("remotes")
    -remotes::install_github("carriedaymont/growthcleanr", ref="main")
    -

    Note that ref="main" is required whether you use -devtools or remotes. The default -growthcleanr code branch is “main”, and this must be -referred to explicitly.

    +devtools::install_github("carriedaymont/growthcleanr", ref="main") +

    Installing growthcleanr will install several additional +packages in turn.

    Further installation details and notes can be found under Installation.

    @@ -304,7 +299,7 @@

    ExampleFor a data.frame object source_data containing growth data:

    -library(growthcleanr)
    +library(growthcleanr)
     
     # prepare data as a data.table
     data <- as.data.table(source_data)
    diff --git a/docs/articles/usage.html b/docs/articles/usage.html
    index db7600a..0876c26 100644
    --- a/docs/articles/usage.html
    +++ b/docs/articles/usage.html
    @@ -33,7 +33,7 @@
           
           
             growthcleanr
    -        2.0.1
    +        2.0.3
           
         
    @@ -79,6 +79,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • @@ -110,7 +113,7 @@

    Basic operations using example synthetic data

    diff --git a/docs/articles/utilities.html b/docs/articles/utilities.html index 929fb96..8dc24e4 100644 --- a/docs/articles/utilities.html +++ b/docs/articles/utilities.html @@ -33,7 +33,7 @@ growthcleanr - 2.0.1 + 2.0.3
    @@ -79,6 +79,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • @@ -111,7 +114,7 @@

    Utilities for computing pediatric BMI percentiles, Z-scores, and related tools

    -

    2022-07-17

    +

    2022-11-02

    Source: vignettes/utilities.Rmd diff --git a/docs/authors.html b/docs/authors.html index a8926fb..7c3e80f 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3 @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -87,7 +90,7 @@

    Authors

    • -

      Daymont Carrie. Author. +

      Daymont Carrie. Contributor, maintainer.

    • @@ -107,7 +110,7 @@

      Authors

    • -

      De los Santos Hannah. Contributor, maintainer. +

      De los Santos Hannah. Contributor.

    • @@ -178,7 +181,7 @@

      Citation

      diff --git a/docs/index.html b/docs/index.html index 4926827..d4afb83 100644 --- a/docs/index.html +++ b/docs/index.html @@ -12,7 +12,7 @@ - - - - - - - -Answers for adjustcarryforward — acf_answers • growthcleanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Answers for adjustcarryforward — acf_answers • growthcleanr - - + + - - -
      -
      -
    - - -
    +
    @@ -162,109 +95,107 @@

    Answers for adjustcarryforward

    for a given dataset, already run through cleangrowth.

    -
    acf_answers(
    -  subjid,
    -  param,
    -  agedays,
    -  sex,
    -  measurement,
    -  orig.exclude,
    -  sd.recenter = NA,
    -  ewma.exp = -1.5,
    -  ref.data.path = "",
    -  quietly = T
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    subjid

    Vector of unique identifiers for each subject in the database.

    param

    Vector identifying each measurement, may be 'WEIGHTKG', 'HEIGHTCM', or 'LENGTHCM' +

    +
    acf_answers(
    +  subjid,
    +  param,
    +  agedays,
    +  sex,
    +  measurement,
    +  orig.exclude,
    +  sd.recenter = NA,
    +  ewma.exp = -1.5,
    +  ref.data.path = "",
    +  quietly = TRUE
    +)
    +
    + +
    +

    Arguments

    +
    subjid
    +

    Vector of unique identifiers for each subject in the database.

    + + +
    param
    +

    Vector identifying each measurement, may be 'WEIGHTKG', 'HEIGHTCM', or 'LENGTHCM' 'HEIGHTCM' vs. 'LENGTHCM' only affects z-score calculations between ages 24 to 35 months (730 to 1095 days). All linear measurements below 731 days of life (age 0-23 months) are interpreted as supine length, and all linear measurements above 1095 days of life (age 36+ months) are interpreted as standing height. -Note: at the moment, all LENGTHCM will be converted to HEIGHTCM. In the future, the algorithm will be updated to consider this difference.

    agedays

    Numeric vector containing the age in days at each measurement.

    sex

    Vector identifying the gender of the subject, may be 'M', 'm', or 0 for males, vs. 'F', -'f' or 1 for females.

    measurement

    Numeric vector containing the actual measurement data. Weight must be in -kilograms (kg), and linear measurements (height vs. length) in centimeters (cm).

    orig.exclude

    Vector of exclusion assessment results from cleangrowth()

    sd.recenter

    Data frame or table with median SD-scores per day of life

    ewma.exp

    Exponent to use for weighting measurements in the exponentially weighted moving +Note: at the moment, all LENGTHCM will be converted to HEIGHTCM. In the future, the algorithm will be updated to consider this difference.

    + + +
    agedays
    +

    Numeric vector containing the age in days at each measurement.

    + + +
    sex
    +

    Vector identifying the gender of the subject, may be 'M', 'm', or 0 for males, vs. 'F', +'f' or 1 for females.

    + + +
    measurement
    +

    Numeric vector containing the actual measurement data. Weight must be in +kilograms (kg), and linear measurements (height vs. length) in centimeters (cm).

    + + +
    orig.exclude
    +

    Vector of exclusion assessment results from cleangrowth()

    + + +
    sd.recenter
    +

    Data frame or table with median SD-scores per day of life

    + + +
    ewma.exp
    +

    Exponent to use for weighting measurements in the exponentially weighted moving average calculations. Defaults to -1.5. This exponent should be negative in order to weight growth measurements closer to the measurement being evaluated more strongly. Exponents that are further from zero (e.g. -3) will increase the relative influence of measurements close in time to the measurement -being evaluated compared to using the default exponent.

    ref.data.path

    Path to reference data. If not supplied, the year 2000 -Centers for Disease Control (CDC) reference data will be used.

    quietly

    Determines if function messages are to be displayed and if log files (parallel only) -are to be generated. Defaults to TRUE.

    - -

    Value

    - -

    A data frame, containing an index "n" of rows, corresponding to the +being evaluated compared to using the default exponent.

    + + +
    ref.data.path
    +

    Path to reference data. If not supplied, the year 2000 +Centers for Disease Control (CDC) reference data will be used.

    + + +
    quietly
    +

    Determines if function messages are to be displayed and if log files (parallel only) +are to be generated. Defaults to TRUE.

    + +
    +
    +

    Value

    + + +

    A data frame, containing an index "n" of rows, corresponding to the original order of the input vectors, and "acf_answers", containing the answers on whether a height value should be kept or excluded (returns "Definitely Exclude", "Definitely Include", or "Unknown" for height values, NA for weight values).

    +
    + -
    - +
    - - + + diff --git a/docs/reference/adjustcarryforward.html b/docs/reference/adjustcarryforward.html index 80f36ed..861d736 100644 --- a/docs/reference/adjustcarryforward.html +++ b/docs/reference/adjustcarryforward.html @@ -1,73 +1,18 @@ - - - - - - - -adjustcarryforward +<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>adjustcarryforward adjustcarryforward Uses absolute height velocity to identify values -excluded as carried forward values for reinclusion. — adjustcarryforward • growthcleanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - -
    -
    - -
    - -
    +
    @@ -170,66 +103,65 @@

    adjustcarryforward excluded as carried forward values for reinclusion.

    -
    adjustcarryforward(
    -  subjid,
    -  param,
    -  agedays,
    -  sex,
    -  measurement,
    -  orig.exclude,
    -  exclude_opt = 0,
    -  sd.recenter = NA,
    -  ewma.exp = -1.5,
    -  ref.data.path = "",
    -  quietly = T,
    -  minfactor = 0.5,
    -  maxfactor = 2,
    -  banddiff = 3,
    -  banddiff_plus = 5.5,
    -  min_ht.exp_under = 2,
    -  min_ht.exp_over = 0,
    -  max_ht.exp_under = 0.33,
    -  max_ht.exp_over = 1.5
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    subjid

    Vector of unique identifiers for each subject in the database.

    param

    Vector identifying each measurement, may be 'WEIGHTKG', 'HEIGHTCM', or 'LENGTHCM' +

    +
    adjustcarryforward(
    +  subjid,
    +  param,
    +  agedays,
    +  sex,
    +  measurement,
    +  orig.exclude,
    +  exclude_opt = 0,
    +  sd.recenter = NA,
    +  ewma.exp = -1.5,
    +  ref.data.path = "",
    +  quietly = TRUE,
    +  minfactor = 0.5,
    +  maxfactor = 2,
    +  banddiff = 3,
    +  banddiff_plus = 5.5,
    +  min_ht.exp_under = 2,
    +  min_ht.exp_over = 0,
    +  max_ht.exp_under = 0.33,
    +  max_ht.exp_over = 1.5
    +)
    +
    + +
    +

    Arguments

    +
    subjid
    +

    Vector of unique identifiers for each subject in the database.

    + + +
    param
    +

    Vector identifying each measurement, may be 'WEIGHTKG', 'HEIGHTCM', or 'LENGTHCM' 'HEIGHTCM' vs. 'LENGTHCM' only affects z-score calculations between ages 24 to 35 months (730 to 1095 days). All linear measurements below 731 days of life (age 0-23 months) are interpreted as supine length, and all linear measurements above 1095 days of life (age 36+ months) are interpreted as standing height. -Note: at the moment, all LENGTHCM will be converted to HEIGHTCM. In the future, the algorithm will be updated to consider this difference.

    agedays

    Numeric vector containing the age in days at each measurement.

    sex

    Vector identifying the gender of the subject, may be 'M', 'm', or 0 for males, vs. 'F', -'f' or 1 for females.

    measurement

    Numeric vector containing the actual measurement data. Weight must be in -kilograms (kg), and linear measurements (height vs. length) in centimeters (cm).

    orig.exclude

    Vector of exclusion assessment results from cleangrowth()

    exclude_opt

    Number from 0 to 3 indicating which option to use to handle strings of carried-forwards: -0. no change.

      -
    1. when deciding to exclude values, if we have a string of carried forwards, +Note: at the moment, all LENGTHCM will be converted to HEIGHTCM. In the future, the algorithm will be updated to consider this difference.

      + + +
      agedays
      +

      Numeric vector containing the age in days at each measurement.

      + + +
      sex
      +

      Vector identifying the gender of the subject, may be 'M', 'm', or 0 for males, vs. 'F', +'f' or 1 for females.

      + + +
      measurement
      +

      Numeric vector containing the actual measurement data. Weight must be in +kilograms (kg), and linear measurements (height vs. length) in centimeters (cm).

      + + +
      orig.exclude
      +

      Vector of exclusion assessment results from cleangrowth()

      + + +
      exclude_opt
      +

      Number from 0 to 3 indicating which option to use to handle strings of carried-forwards: +0. no change.

      1. when deciding to exclude values, if we have a string of carried forwards, drop the most deviant value, and all CFs in the same string, and move on as normal.

      2. when deciding to exclude values, if the most deviant in a @@ -242,112 +174,112 @@

        Arg string from 1:N. Exclude all after the first that is flagged for exclusion when comparing to the Include before and after. Make sure remove things designated as include.

      3. -
    sd.recenter

    Data frame or table with median SD-scores per day of life

    ewma.exp

    Exponent to use for weighting measurements in the exponentially weighted moving + + + +

    sd.recenter
    +

    Data frame or table with median SD-scores per day of life

    + + +
    ewma.exp
    +

    Exponent to use for weighting measurements in the exponentially weighted moving average calculations. Defaults to -1.5. This exponent should be negative in order to weight growth measurements closer to the measurement being evaluated more strongly. Exponents that are further from zero (e.g. -3) will increase the relative influence of measurements close in time to the measurement -being evaluated compared to using the default exponent.

    ref.data.path

    Path to reference data. If not supplied, the year 2000 -Centers for Disease Control (CDC) reference data will be used.

    quietly

    Determines if function messages are to be displayed and if log files (parallel only) -are to be generated. Defaults to TRUE.

    minfactor

    Sweep variable for computing mindiff.next.ht in 15f, default 0.5

    maxfactor

    Sweep variable for computing maxdiff.next.ht in 15f, default 2

    banddiff

    Sweep variable for computing mindiff.next.ht in 15f, default 3

    banddiff_plus

    Sweep variable for computing maxdiff.next.ht in 15, default 5.5

    min_ht.exp_under

    Sweep variable for computing ht.exp in 15f, default 2

    min_ht.exp_over

    Sweep variable for computing ht.exp in 15f, default 0

    max_ht.exp_under

    Sweep variable for computing ht.exp in 15f, default 0.33

    max_ht.exp_over

    Sweep variable for computing ht.exp in 15f, default 1.5

    - -

    Value

    - -

    Re-evaluated exclusion assessments based on height velocity.

    - -

    Examples

    -
    # Run on a small subset of given data -df <- as.data.frame(syngrowth) -df <- df[df$subjid %in% unique(df[, "subjid"])[1:5], ] -clean_df <- cbind(df, - "gcr_result" = cleangrowth(df$subjid, - df$param, - df$agedays, - df$sex, - df$measurement)) - -# Adjust carry forward values in cleaned data -adj_clean <- adjustcarryforward(subjid = clean_df$subjid, - param = clean_df$param, - agedays = clean_df$agedays, - sex = clean_df$sex, - measurement = clean_df$measurement, - orig.exclude = clean_df$gcr_result) -
    +being evaluated compared to using the default exponent.

    + + +
    ref.data.path
    +

    Path to reference data. If not supplied, the year 2000 +Centers for Disease Control (CDC) reference data will be used.

    + + +
    quietly
    +

    Determines if function messages are to be displayed and if log files (parallel only) +are to be generated. Defaults to TRUE.

    + + +
    minfactor
    +

    Sweep variable for computing mindiff.next.ht in 15f, default 0.5

    + + +
    maxfactor
    +

    Sweep variable for computing maxdiff.next.ht in 15f, default 2

    + + +
    banddiff
    +

    Sweep variable for computing mindiff.next.ht in 15f, default 3

    + + +
    banddiff_plus
    +

    Sweep variable for computing maxdiff.next.ht in 15, default 5.5

    + + +
    min_ht.exp_under
    +

    Sweep variable for computing ht.exp in 15f, default 2

    + + +
    min_ht.exp_over
    +

    Sweep variable for computing ht.exp in 15f, default 0

    + + +
    max_ht.exp_under
    +

    Sweep variable for computing ht.exp in 15f, default 0.33

    + + +
    max_ht.exp_over
    +

    Sweep variable for computing ht.exp in 15f, default 1.5

    + +
    +
    +

    Value

    + + +

    Re-evaluated exclusion assessments based on height velocity.

    +
    + +
    +

    Examples

    +
    # Run on a small subset of given data
    +df <- as.data.frame(syngrowth)
    +df <- df[df$subjid %in% unique(df[, "subjid"])[1:5], ]
    +clean_df <- cbind(df,
    +                  "gcr_result" = cleangrowth(df$subjid,
    +                                             df$param,
    +                                             df$agedays,
    +                                             df$sex,
    +                                             df$measurement))
    +
    +# Adjust carry forward values in cleaned data
    +adj_clean <- adjustcarryforward(subjid = clean_df$subjid,
    +                                param = clean_df$param,
    +                                agedays = clean_df$agedays,
    +                                sex = clean_df$sex,
    +                                measurement = clean_df$measurement,
    +                                orig.exclude = clean_df$gcr_result)
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/bmianthro.html b/docs/reference/bmianthro.html index 0f712c2..82896db 100644 --- a/docs/reference/bmianthro.html +++ b/docs/reference/bmianthro.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3 @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -111,7 +114,7 @@

    bmianthro.txt.gz

    diff --git a/docs/reference/cleangrowth.html b/docs/reference/cleangrowth.html index 4733083..27851c4 100644 --- a/docs/reference/cleangrowth.html +++ b/docs/reference/cleangrowth.html @@ -1,67 +1,12 @@ - - - - - - - -Clean growth measurements — cleangrowth • growthcleanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Clean growth measurements — cleangrowth • growthcleanr - - - - + + -
    -
    - -
    - -
    +
    @@ -160,255 +93,256 @@

    Clean growth measurements

    Clean growth measurements

    -
    cleangrowth(
    -  subjid,
    -  param,
    -  agedays,
    -  sex,
    -  measurement,
    -  recover.unit.error = F,
    -  sd.extreme = 25,
    -  z.extreme = 25,
    -  lt3.exclude.mode = "default",
    -  height.tolerance.cm = 2.5,
    -  error.load.mincount = 2,
    -  error.load.threshold = 0.5,
    -  sd.recenter = NA,
    -  sdmedian.filename = "",
    -  sdrecentered.filename = "",
    -  include.carryforward = F,
    -  ewma.exp = -1.5,
    -  ref.data.path = "",
    -  log.path = ".",
    -  parallel = F,
    -  num.batches = NA,
    -  quietly = T,
    -  adult_cutpoint = 20,
    -  weight_cap = Inf,
    -  adult_columns_filename = ""
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    subjid

    Vector of unique identifiers for each subject in the database.

    param

    Vector identifying each measurement, may be 'WEIGHTKG', 'WEIGHTLBS', 'HEIGHTCM', 'HEIGHTIN', or 'LENGTHCM' +

    +
    cleangrowth(
    +  subjid,
    +  param,
    +  agedays,
    +  sex,
    +  measurement,
    +  recover.unit.error = FALSE,
    +  sd.extreme = 25,
    +  z.extreme = 25,
    +  lt3.exclude.mode = "default",
    +  height.tolerance.cm = 2.5,
    +  error.load.mincount = 2,
    +  error.load.threshold = 0.5,
    +  sd.recenter = NA,
    +  sdmedian.filename = "",
    +  sdrecentered.filename = "",
    +  include.carryforward = FALSE,
    +  ewma.exp = -1.5,
    +  ref.data.path = "",
    +  log.path = NA,
    +  parallel = FALSE,
    +  num.batches = NA,
    +  quietly = TRUE,
    +  adult_cutpoint = 20,
    +  weight_cap = Inf,
    +  adult_columns_filename = ""
    +)
    +
    + +
    +

    Arguments

    +
    subjid
    +

    Vector of unique identifiers for each subject in the database.

    + + +
    param
    +

    Vector identifying each measurement, may be 'WEIGHTKG', 'WEIGHTLBS', 'HEIGHTCM', 'HEIGHTIN', or 'LENGTHCM' 'HEIGHTCM'/'HEIGHTIN' vs. 'LENGTHCM' only affects z-score calculations between ages 24 to 35 months (730 to 1095 days). All linear measurements below 731 days of life (age 0-23 months) are interpreted as supine length, and all linear measurements above 1095 days of life (age 36+ months) are interpreted as standing height. Note: at the moment, all LENGTHCM will be converted to HEIGHTCM. In the future, the algorithm will be updated to consider this difference. Additionally, imperial 'HEIGHTIN' and 'WEIGHTLBS' measurements are converted to -metric during algorithm calculations.

    agedays

    Numeric vector containing the age in days at each measurement.

    sex

    Vector identifying the gender of the subject, may be 'M', 'm', or 0 for males, vs. 'F', 'f' or 1 for females.

    measurement

    Numeric vector containing the actual measurement data. Weight must be in -kilograms (kg), and linear measurements (height vs. length) in centimeters (cm).

    recover.unit.error

    Indicates whether the cleaning algorithm should +metric during algorithm calculations.

    + + +
    agedays
    +

    Numeric vector containing the age in days at each measurement.

    + + +
    sex
    +

    Vector identifying the gender of the subject, may be 'M', 'm', or 0 for males, vs. 'F', 'f' or 1 for females.

    + + +
    measurement
    +

    Numeric vector containing the actual measurement data. Weight must be in +kilograms (kg), and linear measurements (height vs. length) in centimeters (cm).

    + + +
    recover.unit.error
    +

    Indicates whether the cleaning algorithm should attempt to identify unit errors (I.e. inches vs. cm, lbs vs. kg). If unit errors are identified, the value will be corrected and retained within the -cleaning algorithm as a valid measurement. Defaults to FALSE.

    sd.extreme

    Measurements more than sd.extreme standard deviations from -the mean (either above or below) will be flagged as invalid. Defaults to 25.

    z.extreme

    Measurements with an absolute z-score greater than -z.extreme will be flagged as invalid. Defaults to 25.

    lt3.exclude.mode

    Determines type of exclusion procedure to use for 1 or 2 measurements of one type without +cleaning algorithm as a valid measurement. Defaults to FALSE.

    + + +
    sd.extreme
    +

    Measurements more than sd.extreme standard deviations from +the mean (either above or below) will be flagged as invalid. Defaults to 25.

    + + +
    z.extreme
    +

    Measurements with an absolute z-score greater than +z.extreme will be flagged as invalid. Defaults to 25.

    + + +
    lt3.exclude.mode
    +

    Determines type of exclusion procedure to use for 1 or 2 measurements of one type without matching same ageday measurements for the other parameter. Options include "default" (standard growthcleanr approach), and "flag.both" (in case of two measurements of one type without matching values for the other parameter, flag both -for exclusion if beyond threshold)

    height.tolerance.cm

    maximum decrease in height tolerated for sequential measurements

    error.load.mincount

    minimum count of exclusions on parameter before -considering excluding all measurements. Defaults to 2.

    error.load.threshold

    threshold of percentage of excluded measurement count to included measurement -count that must be exceeded before excluding all measurements of either parameter. Defaults to 0.5.

    sd.recenter

    specifies how to recenter medians. May be a data frame or +for exclusion if beyond threshold)

    + + +
    height.tolerance.cm
    +

    maximum decrease in height tolerated for sequential measurements

    + + +
    error.load.mincount
    +

    minimum count of exclusions on parameter before +considering excluding all measurements. Defaults to 2.

    + + +
    error.load.threshold
    +

    threshold of percentage of excluded measurement count to included measurement +count that must be exceeded before excluding all measurements of either parameter. Defaults to 0.5.

    + + +
    sd.recenter
    +

    specifies how to recenter medians. May be a data frame or table w/median SD-scores per day of life by gender and parameter, or "NHANES" -or "derive" as a character vector.

      -
    • If sd.recenter is specified as a data set, use the data set

    • +or "derive" as a character vector.

      • If sd.recenter is specified as a data set, use the data set

      • If sd.recenter is specified as "nhanes", use NHANES reference medians

      • If sd.recenter is specified as "derive", derive from input

      • -
      • If sd.recenter is not specified or NA:

          -
        • If the input set has at least 5,000 observations, derive medians from input

        • +
        • If sd.recenter is not specified or NA:

          • If the input set has at least 5,000 observations, derive medians from input

          • If the input set has fewer than 5,000 observations, use NHANES

        • -
        - -

        If specifying a data set, columns must include param, sex, agedays, and sd.median +

      If specifying a data set, columns must include param, sex, agedays, and sd.median (referred to elsewhere as "modified Z-score"), and those medians will be used for recentering. A summary of how the NHANES reference medians were derived is -available in README.md. Defaults to NA.

    sdmedian.filename

    Name of file to save sd.median data calculated on the input dataset to as CSV. +available in README.md. Defaults to NA.

    + + +
    sdmedian.filename
    +

    Name of file to save sd.median data calculated on the input dataset to as CSV. Defaults to "", for which this data will not be saved. Use for extracting medians for parallel processing -scenarios other than the built-in parallel option.

    sdrecentered.filename

    Name of file to save re-centered data to as CSV. Defaults to "", for which this -data will not be saved. Useful for post-processing and debugging.

    include.carryforward

    Determines whether Carry-Forward values are kept in the output. Defaults to False.

    ewma.exp

    Exponent to use for weighting measurements in the +scenarios other than the built-in parallel option.

    + + +
    sdrecentered.filename
    +

    Name of file to save re-centered data to as CSV. Defaults to "", for which this +data will not be saved. Useful for post-processing and debugging.

    + + +
    include.carryforward
    +

    Determines whether Carry-Forward values are kept in the output. Defaults to False.

    + + +
    ewma.exp
    +

    Exponent to use for weighting measurements in the exponentially weighted moving average calculations. Defaults to -1.5. This exponent should be negative in order to weight growth measurements closer to the measurement being evaluated more strongly. Exponents that are further from zero (e.g. -3) will increase the relative influence of measurements close in time to the measurement being evaluated compared to -using the default exponent.

    ref.data.path

    Path to reference data. If not supplied, the year 2000 -Centers for Disease Control (CDC) reference data will be used.

    log.path

    Path to log file output when running in parallel (non-quiet mode). Default is ".". A new -directory will be created if necessary. Set to NA to disable log files.

    parallel

    Determines if function runs in parallel. Defaults to FALSE.

    num.batches

    Specify the number of batches to run in parallel. Only +using the default exponent.

    + + +
    ref.data.path
    +

    Path to reference data. If not supplied, the year 2000 +Centers for Disease Control (CDC) reference data will be used.

    + + +
    log.path
    +

    Path to log file output when running in parallel (non-quiet mode). Default is NA. A new +directory will be created if necessary. Set to NA to disable log files.

    + + +
    parallel
    +

    Determines if function runs in parallel. Defaults to FALSE.

    + + +
    num.batches
    +

    Specify the number of batches to run in parallel. Only applies if parallel is set to TRUE. Defaults to the number of workers -returned by the getDoParWorkers function in the foreach package.

    quietly

    Determines if function messages are to be displayed and if log files (parallel only) are to be generated. -Defaults to TRUE

    adult_cutpoint

    Number between 18 and 20, describing ages when the +returned by the getDoParWorkers function in the foreach package.

    + + +
    quietly
    +

    Determines if function messages are to be displayed and if log files (parallel only) are to be generated. +Defaults to TRUE

    + + +
    adult_cutpoint
    +

    Number between 18 and 20, describing ages when the pediatric algorithm should not be applied (< adult_cutpoint), and the adult algorithm should apply (>= adult_cutpoint). Numbers outside this range will be -changed to the closest number within the range. Defaults to 20.

    weight_cap

    Positive number, describing a weight cap in kg (rounded to the +changed to the closest number within the range. Defaults to 20.

    + + +
    weight_cap
    +

    Positive number, describing a weight cap in kg (rounded to the nearest .1, +/- .1) within the adult dataset. If there is no weight cap, set -to Inf. Defaults to Inf.

    adult_columns_filename

    Name of file to save original adult data, with additional output columns to +to Inf. Defaults to Inf.

    + + +
    adult_columns_filename
    +

    Name of file to save original adult data, with additional output columns to as CSV. Defaults to "", for which this data will not be saved. Useful -for post-analysis. For more information on this output, please see README.

    +for post-analysis. For more information on this output, please see README.

    -

    Value

    +
    +
    +

    Value

    + -

    Vector of exclusion codes for each of the input measurements.

    -

    Possible values for each code are:

      -
    • 'Include', 'Unit-Error-High', 'Unit-Error-Low', 'Swapped-Measurements', 'Missing',

    • +

      Vector of exclusion codes for each of the input measurements.

      + + +

      Possible values for each code are:

      • 'Include', 'Unit-Error-High', 'Unit-Error-Low', 'Swapped-Measurements', 'Missing',

      • 'Exclude-Carried-Forward', 'Exclude-SD-Cutoff', 'Exclude-EWMA-Extreme', 'Exclude-EWMA-Extreme-Pair',

      • 'Exclude-Extraneous-Same-Day',

      • 'Exclude-EWMA-8', 'Exclude-EWMA-9', 'Exclude-EWMA-10', 'Exclude-EWMA-11', 'Exclude-EWMA-12', 'Exclude-EWMA-13', 'Exclude-EWMA-14',

      • 'Exclude-Min-Height-Change', 'Exclude-Max-Height-Change',

      • 'Exclude-Pair-Delta-17', 'Exclude-Pair-Delta-18', 'Exclude-Pair-Delta-19',

      • 'Exclude-Single-Outlier', 'Exclude-Too-Many-Errors', 'Exclude-Too-Many-Errors-Other-Parameter'

      • -
      - - -

      Examples

      -
      # Run calculation using a small subset of given data -df_stats <- as.data.frame(syngrowth) -df_stats <- df_stats[df_stats$subjid %in% unique(df_stats[, "subjid"])[1:5], ] - -clean_stats <-cleangrowth(subjid = df_stats$subjid, - param = df_stats$param, - agedays = df_stats$agedays, - sex = df_stats$sex, - measurement = df_stats$measurement) - -# Once processed you can filter data based on result value -df_stats <- cbind(df_stats, "clean_result" = clean_stats) -clean_df_stats <- df_stats[df_stats$clean_result == "Include",] - -# Parallel processing: run using 2 cores and batches -clean_stats <- cleangrowth(subjid = df_stats$subjid, - param = df_stats$param, - agedays = df_stats$agedays, - sex = df_stats$sex, - measurement = df_stats$measurement, - parallel = TRUE, - num.batches = 2) -
      #> [2021-07-06 10:53:04] Writing batch logs to '.'...
      #> Warning: <anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’
      #> Warning: <anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’
      #> Warning: <anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’
      #> Warning: <anonymous>: ... may be used in an incorrect context: ‘.fun(piece, ...)’
      +
    + +
    +

    Examples

    +
    # \donttest{
    +# Run calculation using a small subset of given data
    +df_stats <- as.data.frame(syngrowth)
    +df_stats <- df_stats[df_stats$subjid %in% unique(df_stats[, "subjid"])[1:5], ]
    +
    +clean_stats <-cleangrowth(subjid = df_stats$subjid,
    +                         param = df_stats$param,
    +                         agedays = df_stats$agedays,
    +                         sex = df_stats$sex,
    +                         measurement = df_stats$measurement)
    +
    +# Once processed you can filter data based on result value
    +df_stats <- cbind(df_stats, "clean_result" = clean_stats)
    +clean_df_stats <- df_stats[df_stats$clean_result == "Include",]
    +
    +# Parallel processing: run using 2 cores and batches
    +clean_stats <- cleangrowth(subjid = df_stats$subjid,
    +                           param = df_stats$param,
    +                           agedays = df_stats$agedays,
    +                           sex = df_stats$sex,
    +                           measurement = df_stats$measurement,
    +                           parallel = TRUE,
    +                           num.batches = 2)
    +#> Warning: <anonymous>: ... may be used in an incorrect context: '.fun(piece, ...)'
    +#> Warning: <anonymous>: ... may be used in an incorrect context: '.fun(piece, ...)'
    +#> Warning: <anonymous>: ... may be used in an incorrect context: '.fun(piece, ...)'
    +#> Warning: <anonymous>: ... may be used in an incorrect context: '.fun(piece, ...)'
    +# }
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/ewma.html b/docs/reference/ewma.html index 7e4fa81..bae6a81 100644 --- a/docs/reference/ewma.html +++ b/docs/reference/ewma.html @@ -1,67 +1,12 @@ - - - - - - - -Exponentially Weighted Moving Average (EWMA) — ewma • growthcleanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Exponentially Weighted Moving Average (EWMA) — ewma • growthcleanr - - - - + + -
    -
    - -
    - -
    +
    @@ -160,83 +93,80 @@

    Exponentially Weighted Moving Average (EWMA)

    ewma calculates the exponentially weighted moving average (EWMA) for a set of numeric observations over time.

    -
    ewma(agedays, z, ewma.exp, ewma.adjacent = T)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    agedays

    Vector of age in days for each z score (potentially transformed to adjust weighting).

    z

    Input vector of numeric z-score data.

    ewma.exp

    Exponent to use for weighting.

    ewma.adjacent

    Specify whether EWMA values excluding adjacent measurements should be calculated. Defaults to TRUE.

    - -

    Value

    - -

    Data frame with 3 variables:

      -
    • The first variable (ewma.all) contains the EWMA at observation time +

      +
      ewma(agedays, z, ewma.exp, ewma.adjacent = TRUE)
      +
      + +
      +

      Arguments

      +
      agedays
      +

      Vector of age in days for each z score (potentially transformed to adjust weighting).

      + + +
      z
      +

      Input vector of numeric z-score data.

      + + +
      ewma.exp
      +

      Exponent to use for weighting.

      + + +
      ewma.adjacent
      +

      Specify whether EWMA values excluding adjacent measurements should be calculated. Defaults to TRUE.

      + +
      +
      +

      Value

      + + +

      Data frame with 3 variables:

      • The first variable (ewma.all) contains the EWMA at observation time excluding only the actual observation for that time point.

      • The second variable (ewma.before) contains the EWMA for each observation excluding both the actual observation and the immediate prior observation.

      • The third variable (ewma.after) contains the EWMA for each observation excluding both the actual observation and the subsequent observation.

      • -
      - - -

      Examples

      -
      # Run on 1 subject, 1 type of parameter -df_stats <- as.data.frame(syngrowth) -df_stats <- df_stats[df_stats$subjid == df_stats$subjid[1] & - df_stats$param == "HEIGHTCM", ] - -# Get the uncentered z-scores -measurement_to_z <- read_anthro(cdc.only = TRUE) -sd <- measurement_to_z(df_stats$param, - df_stats$agedays, - df_stats$sex, - df_stats$measurement, - TRUE) - -# Calculate exponentially weighted moving average -e_df <- ewma(df_stats$agedays, sd, ewma.exp = -1.5) -
      +
    + +
    +

    Examples

    +
    # Run on 1 subject, 1 type of parameter
    +df_stats <- as.data.frame(syngrowth)
    +df_stats <- df_stats[df_stats$subjid == df_stats$subjid[1] &
    +                       df_stats$param == "HEIGHTCM", ]
    +
    +# Get the uncentered z-scores
    +measurement_to_z <- read_anthro(cdc.only = TRUE)
    +sd <- measurement_to_z(df_stats$param,
    +                       df_stats$agedays,
    +                       df_stats$sex,
    +                       df_stats$measurement,
    +                       TRUE)
    +
    +# Calculate exponentially weighted moving average
    +e_df <- ewma(df_stats$agedays, sd, ewma.exp = -1.5)
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/ext_bmiz.html b/docs/reference/ext_bmiz.html index b4023ff..43991e8 100644 --- a/docs/reference/ext_bmiz.html +++ b/docs/reference/ext_bmiz.html @@ -1,72 +1,17 @@ - - - - - - - -Calculate extended BMI measures — ext_bmiz • growthcleanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Calculate extended BMI measures — ext_bmiz • growthcleanr - - - - - - - - - + + - - - - -
    -
    - -
    - -
    +
    @@ -170,55 +103,57 @@

    Calculate extended BMI measures

    differ only for children who have a BMI > 95th percentile.

    -
    ext_bmiz(
    -  data,
    -  age = "agem",
    -  wt = "wt",
    -  ht = "ht",
    -  bmi = "bmi",
    -  adjust.integer.age = T,
    -  ref.data.path = ""
    -)
    +
    +
    ext_bmiz(
    +  data,
    +  age = "agem",
    +  wt = "wt",
    +  ht = "ht",
    +  bmi = "bmi",
    +  adjust.integer.age = TRUE,
    +  ref.data.path = ""
    +)
    +
    + +
    +

    Arguments

    +
    data
    +

    Input data frame or data table

    + + +
    age
    +

    Name of input column containing subject age in months in quotes, default "agem"

    + + +
    wt
    +

    Name of input column containing weight (kg) value in quotes, default "wt"

    + + +
    ht
    +

    Name of input column containing height (cm) value in quotes, default "ht"

    -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    data

    Input data frame or data table

    age

    Name of input column containing subject age in months in quotes, default "agem"

    wt

    Name of input column containing weight (kg) value in quotes, default "wt"

    ht

    Name of input column containing height (cm) value in quotes, default "ht"

    bmi

    Name of input column containing calculated BMI in quotes, default "bmi"

    adjust.integer.age

    If age inputs are all integer, add 0.5 if TRUE; -default TRUE

    ref.data.path

    Path to directory containing reference data

    -

    Value

    +
    bmi
    +

    Name of input column containing calculated BMI in quotes, default "bmi"

    -

    Expanded data frame containing computed BMI values

    -

    Details

    +
    adjust.integer.age
    +

    If age inputs are all integer, add 0.5 if TRUE; +default TRUE

    + + +
    ref.data.path
    +

    Path to directory containing reference data

    + +
    +
    +

    Value

    + + +

    Expanded data frame containing computed BMI values

    +
    +
    +

    Details

    This function should produce output equivalent to the SAS macro provided at https://www.cdc.gov/nccdphp/dnpao/growthcharts/resources/sas.htm

    The extended BMIz is the inverse cumulative distribution function (CDF) of @@ -249,58 +184,57 @@

    Details in this package for convenience. If you are developing this package, use ref.data.path to adjust the path to this file from your working directory if necessary.

    +

    -

    Examples

    -
    # Run on a small subset of given data -df <- as.data.frame(syngrowth) -df <- df[df$subjid %in% unique(df[, "subjid"])[1:5], ] -df <- cbind(df, - "gcr_result" = cleangrowth(df$subjid, - df$param, - df$agedays, - df$sex, - df$measurement)) -df_wide <- longwide(df) # convert to wide format for ext_bmiz -df_wide_bmi <- simple_bmi(df_wide) # compute simple BMI - -# Calling the function with default column names -df_bmiz <- ext_bmiz(df_wide_bmi) - -# Specifying different column names; note that quotes are used -dfc <- simple_bmi(df_wide) -colnames(dfc)[colnames(dfc) %in% c("agem", "wt", "ht")] <- - c("agemos", "weightkg", "heightcm") -df_bmiz <- ext_bmiz(dfc, age="agemos", wt="weightkg", ht="heightcm") - -# Disabling conversion of all-integer age in months to (age + 0.5) -dfc <- simple_bmi(df_wide) -df_bmiz <- ext_bmiz(dfc, adjust.integer.age=FALSE) -
    +
    +

    Examples

    +
    # Run on a small subset of given data
    +df <- as.data.frame(syngrowth)
    +df <- df[df$subjid %in% unique(df[, "subjid"])[1:5], ]
    +df <- cbind(df,
    +            "gcr_result" = cleangrowth(df$subjid,
    +                                       df$param,
    +                                       df$agedays,
    +                                       df$sex,
    +                                       df$measurement))
    +df_wide <- longwide(df) # convert to wide format for ext_bmiz
    +df_wide_bmi <- simple_bmi(df_wide) # compute simple BMI
    +
    +# Calling the function with default column names
    +df_bmiz <- ext_bmiz(df_wide_bmi)
    +
    +# Specifying different column names; note that quotes are used
    +dfc <- simple_bmi(df_wide)
    +colnames(dfc)[colnames(dfc) %in% c("agem", "wt", "ht")] <-
    +  c("agemos", "weightkg", "heightcm")
    +df_bmiz <- ext_bmiz(dfc, age="agemos", wt="weightkg", ht="heightcm")
    +
    +# Disabling conversion of all-integer age in months to (age + 0.5)
    +dfc <- simple_bmi(df_wide)
    +df_bmiz <- ext_bmiz(dfc, adjust.integer.age=FALSE)
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/growth_cdc_ext.html b/docs/reference/growth_cdc_ext.html index 8f4dede..2a5fb3f 100644 --- a/docs/reference/growth_cdc_ext.html +++ b/docs/reference/growth_cdc_ext.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3 @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -111,7 +114,7 @@

    growthfile_cdc_ext.csv.gz

    diff --git a/docs/reference/index.html b/docs/reference/index.html index 2fbb4f2..951153b 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3
    @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -223,7 +226,7 @@

    Datasets @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -111,7 +114,7 @@

    lenanthro.txt.gz

    diff --git a/docs/reference/longwide.html b/docs/reference/longwide.html index 010e12c..eafdcbf 100644 --- a/docs/reference/longwide.html +++ b/docs/reference/longwide.html @@ -1,67 +1,12 @@ - - - - - - - -Transform data in growthcleanr format into wide structure for BMI calculation — longwide • growthcleanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Transform data in growthcleanr format into wide structure for BMI calculation — longwide • growthcleanr - - - - + + -
    -
    - -
    - -
    +
    @@ -160,112 +93,112 @@

    Transform data in growthcleanr format into wide structure for BMI calculatio

    longwide transforms data from long to wide format. Ideal for transforming output from growthcleanr::cleangrowth() into a format suitable for growthcleanr::ext_bmiz().

    -
    longwide(
    -  long_df,
    -  id = "id",
    -  subjid = "subjid",
    -  sex = "sex",
    -  agedays = "agedays",
    -  param = "param",
    -  measurement = "measurement",
    -  gcr_result = "gcr_result",
    -  include_all = FALSE,
    -  inclusion_types = c("Include")
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    long_df

    A data frame to be transformed. Expects columns: id, subjid, sex, agedays, param, measurement, and gcr_result.

    id

    name of observation ID column

    subjid

    name of subject ID column

    sex

    name of sex descriptor column

    agedays

    name of age (in days) descriptor column

    param

    name of parameter column to identify each type of measurement

    measurement

    name of measurement column containing the actual measurement data

    gcr_result

    name of column of results from growthcleanr::cleangrowth()

    include_all

    Determines whether the function keeps all exclusion codes. If TRUE, all exclusion types are kept and the inclusion_types argument is ignored. Defaults to FALSE.

    inclusion_types

    Vector indicating which exclusion codes from the cleaning algorithm should be included in the data, given that include_all is FALSE. For all options, see growthcleanr::cleangrowth(). Defaults to c("Include").

    - -

    Value

    - -

    Returns a data frame transformed from long to wide. Includes only values flagged with indicated inclusion types. Note that, for each subject, heights without corresponding weights for a given age (and vice versa) will be dropped.

    - -

    Examples

    -
    # Run on a small subset of given data -df <- as.data.frame(syngrowth) -df <- df[df$subjid %in% unique(df[, "subjid"])[1:5], ] -df <- cbind(df, - "gcr_result" = cleangrowth(df$subjid, - df$param, - df$agedays, - df$sex, - df$measurement)) -# Convert to wide format -wide_df <- longwide(df) - -# Include all inclusion types -wide_df <- longwide(df, include_all = TRUE) - -# Specify all inclusion codes -wide_df <- longwide(df, inclusion_types = c("Include", "Exclude-Carried-Forward")) -
    +
    +
    longwide(
    +  long_df,
    +  id = "id",
    +  subjid = "subjid",
    +  sex = "sex",
    +  agedays = "agedays",
    +  param = "param",
    +  measurement = "measurement",
    +  gcr_result = "gcr_result",
    +  include_all = FALSE,
    +  inclusion_types = c("Include")
    +)
    +
    + +
    +

    Arguments

    +
    long_df
    +

    A data frame to be transformed. Expects columns: id, subjid, sex, agedays, param, measurement, and gcr_result.

    + + +
    id
    +

    name of observation ID column

    + + +
    subjid
    +

    name of subject ID column

    + + +
    sex
    +

    name of sex descriptor column

    + + +
    agedays
    +

    name of age (in days) descriptor column

    + + +
    param
    +

    name of parameter column to identify each type of measurement

    + + +
    measurement
    +

    name of measurement column containing the actual measurement data

    + + +
    gcr_result
    +

    name of column of results from growthcleanr::cleangrowth()

    + + +
    include_all
    +

    Determines whether the function keeps all exclusion codes. If TRUE, all exclusion types are kept and the inclusion_types argument is ignored. Defaults to FALSE.

    + + +
    inclusion_types
    +

    Vector indicating which exclusion codes from the cleaning algorithm should be included in the data, given that include_all is FALSE. For all options, see growthcleanr::cleangrowth(). Defaults to c("Include").

    + +
    +
    +

    Value

    + + +

    Returns a data frame transformed from long to wide. Includes only values flagged with indicated inclusion types. Note that, for each subject, heights without corresponding weights for a given age (and vice versa) will be dropped.

    +
    + +
    +

    Examples

    +
    # Run on a small subset of given data
    +df <- as.data.frame(syngrowth)
    +df <- df[df$subjid %in% unique(df[, "subjid"])[1:5], ]
    +df <- cbind(df,
    +            "gcr_result" = cleangrowth(df$subjid,
    +                                       df$param,
    +                                       df$agedays,
    +                                       df$sex,
    +                                       df$measurement))
    +# Convert to wide format
    +wide_df <- longwide(df)
    +
    +# Include all inclusion types
    +wide_df <- longwide(df, include_all = TRUE)
    +
    +# Specify all inclusion codes
    +wide_df <- longwide(df, inclusion_types = c("Include", "Exclude-Carried-Forward"))
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/nhanes-reference-medians.html b/docs/reference/nhanes-reference-medians.html index b5d5d6c..e646f3f 100644 --- a/docs/reference/nhanes-reference-medians.html +++ b/docs/reference/nhanes-reference-medians.html @@ -18,7 +18,7 @@ growthcleanr - 2.0.1 + 2.0.3 @@ -60,6 +60,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -109,7 +112,7 @@

    nhanes-reference-medians.csv.gz

    diff --git a/docs/reference/read_anthro.html b/docs/reference/read_anthro.html index 3b79c13..5d761bc 100644 --- a/docs/reference/read_anthro.html +++ b/docs/reference/read_anthro.html @@ -1,67 +1,12 @@ - - - - - - - -Function to calculate z-scores and csd-scores based on anthro tables. — read_anthro • growthcleanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Function to calculate z-scores and csd-scores based on anthro tables. — read_anthro • growthcleanr - - + + - - -
    -
    - -
    - -
    +
    @@ -160,58 +93,58 @@

    Function to calculate z-scores and csd-scores based on anthro tables.

    Function to calculate z-scores and csd-scores based on anthro tables.

    -
    read_anthro(path = "", cdc.only = F)
    - -

    Arguments

    - - - - - - - - - - -
    path

    Path to supplied reference anthro data. Defaults to package anthro tables.

    cdc.only

    Whether or not only CDC data should be used. Defaults to false.

    - -

    Value

    - -

    Function for calculating BMI based on measurement, age in days, sex, and measurement value.

    - -

    Examples

    -
    # Return calculating function with all defaults -afunc <- read_anthro() - -# Return calculating function while specifying a path and using only CDC data -afunc <- read_anthro(path = system.file("extdata", package = "growthcleanr"), - cdc.only = TRUE) -
    +
    +
    read_anthro(path = "", cdc.only = FALSE)
    +
    + +
    +

    Arguments

    +
    path
    +

    Path to supplied reference anthro data. Defaults to package anthro tables.

    + + +
    cdc.only
    +

    Whether or not only CDC data should be used. Defaults to false.

    + +
    +
    +

    Value

    + + +

    Function for calculating BMI based on measurement, age in days, sex, and measurement value.

    +
    + +
    +

    Examples

    +
    # Return calculating function with all defaults
    +afunc <- read_anthro()
    +
    +# Return calculating function while specifying a path and using only CDC data
    +afunc <- read_anthro(path = system.file("extdata", package = "growthcleanr"),
    +                     cdc.only = TRUE)
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/recode_sex.html b/docs/reference/recode_sex.html index 2532cf5..1f0d798 100644 --- a/docs/reference/recode_sex.html +++ b/docs/reference/recode_sex.html @@ -1,68 +1,13 @@ - - - - - - - -Recode binary sex variable for compatibility — recode_sex • growthcleanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Recode binary sex variable for compatibility — recode_sex • growthcleanr - + + - - - -
    -
    - -
    - -
    +
    @@ -162,91 +95,91 @@

    Recode binary sex variable for compatibility

    Useful in transforming output from growthcleanr::cleangrowth() into a format suitable for growthcleanr::ext_bmiz().

    -
    recode_sex(
    -  input_data,
    -  sourcecol = "sex",
    -  sourcem = "0",
    -  sourcef = "1",
    -  targetcol = "sex_recoded",
    -  targetm = 1L,
    -  targetf = 2L
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    input_data

    a data frame or data table to be transformed. Expects a source column containing a binary sex variable.

    sourcecol

    name of sex descriptor column. Defaults to "sex"

    sourcem

    variable indicating "male" sex in input data. Defaults to "0"

    sourcef

    variable indicating "female" sex in input data. Defaults to "1"

    targetcol

    desired name of recoded sex descriptor column. Defaults to "sex_recoded"

    targetm

    desired name of recoded sex variable indicating "male" sex in output data. Defaults to 1

    targetf

    desired name of recoded sex variable indicating "female" sex in output data. Defaults to 2

    - -

    Value

    - -

    Returns a data table with recoded sex variables.

    - -

    Examples

    -
    # Run on given data -df <- as.data.frame(syngrowth) - -# Run with all defaults -df_r <- recode_sex(df) - -# Specify different targets -df_rt <- recode_sex(df, targetcol = "sexr", targetm = "Male", targetf = "Female") - -# Specify different inputs -df_ri <- recode_sex(df_rt, sourcecol = "sexr", sourcem = "Male", sourcef = "Female") -
    +
    +
    recode_sex(
    +  input_data,
    +  sourcecol = "sex",
    +  sourcem = "0",
    +  sourcef = "1",
    +  targetcol = "sex_recoded",
    +  targetm = 1L,
    +  targetf = 2L
    +)
    +
    + +
    +

    Arguments

    +
    input_data
    +

    a data frame or data table to be transformed. Expects a source column containing a binary sex variable.

    + + +
    sourcecol
    +

    name of sex descriptor column. Defaults to "sex"

    + + +
    sourcem
    +

    variable indicating "male" sex in input data. Defaults to "0"

    + + +
    sourcef
    +

    variable indicating "female" sex in input data. Defaults to "1"

    + + +
    targetcol
    +

    desired name of recoded sex descriptor column. Defaults to "sex_recoded"

    + + +
    targetm
    +

    desired name of recoded sex variable indicating "male" sex in output data. Defaults to 1

    + + +
    targetf
    +

    desired name of recoded sex variable indicating "female" sex in output data. Defaults to 2

    + +
    +
    +

    Value

    + + +

    Returns a data table with recoded sex variables.

    +
    + +
    +

    Examples

    +
    # Run on given data
    +df <- as.data.frame(syngrowth)
    +
    +# Run with all defaults
    +df_r <- recode_sex(df)
    +
    +# Specify different targets
    +df_rt <- recode_sex(df, targetcol = "sexr", targetm = "Male", targetf = "Female")
    +
    +# Specify different inputs
    +df_ri <- recode_sex(df_rt, sourcecol = "sexr", sourcem = "Male", sourcef = "Female")
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/sd_median.html b/docs/reference/sd_median.html index ca7ea26..f3df27f 100644 --- a/docs/reference/sd_median.html +++ b/docs/reference/sd_median.html @@ -1,67 +1,12 @@ - - - - - - - -Calculate median SD score by age for each parameter. — sd_median • growthcleanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Calculate median SD score by age for each parameter. — sd_median • growthcleanr - - + + - - -
    -
    - -
    - -
    +
    @@ -160,77 +93,77 @@

    Calculate median SD score by age for each parameter.

    Calculate median SD score by age for each parameter.

    -
    sd_median(param, sex, agedays, sd.orig)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    param

    Vector identifying each measurement, may be 'WEIGHTKG', or 'HEIGHTCM'.

    sex

    Vector identifying the gender of the subject, may be 'M', 'm', or 0 for males, vs. 'F', 'f' or 1 for females.

    agedays

    Numeric vector containing the age in days at each measurement.

    sd.orig

    Vector of previously calculated standard deviation (SD) scores for each measurement before re-centering.

    - -

    Value

    - -

    Table of data with median SD-scores per day of life by gender and parameter.

    - -

    Examples

    -
    # Run on 1 subject -df_stats <- as.data.frame(syngrowth) -df_stats <- df_stats[df_stats$subjid == df_stats$subjid[1], ] - -# Get the original standard deviations -measurement_to_z <- read_anthro(cdc.only = TRUE) -sd.orig <- measurement_to_z(df_stats$param, - df_stats$agedays, - df_stats$sex, - df_stats$measurement, - TRUE) - -# Calculate median standard deviations -sd.m <- sd_median(df_stats$param, - df_stats$sex, - df_stats$agedays, - sd.orig) -
    +
    +
    sd_median(param, sex, agedays, sd.orig)
    +
    + +
    +

    Arguments

    +
    param
    +

    Vector identifying each measurement, may be 'WEIGHTKG', or 'HEIGHTCM'.

    + + +
    sex
    +

    Vector identifying the gender of the subject, may be 'M', 'm', or 0 for males, vs. 'F', 'f' or 1 for females.

    + + +
    agedays
    +

    Numeric vector containing the age in days at each measurement.

    + + +
    sd.orig
    +

    Vector of previously calculated standard deviation (SD) scores for each measurement before re-centering.

    + +
    +
    +

    Value

    + + +

    Table of data with median SD-scores per day of life by gender and parameter.

    +
    + +
    +

    Examples

    +
    # Run on 1 subject
    +df_stats <- as.data.frame(syngrowth)
    +df_stats <- df_stats[df_stats$subjid == df_stats$subjid[1], ]
    +
    +# Get the original standard deviations
    +measurement_to_z <- read_anthro(cdc.only = TRUE)
    +sd.orig <- measurement_to_z(df_stats$param,
    +                       df_stats$agedays,
    +                       df_stats$sex,
    +                       df_stats$measurement,
    +                       TRUE)
    +
    +# Calculate median standard deviations
    +sd.m <- sd_median(df_stats$param,
    +                  df_stats$sex,
    +                  df_stats$agedays,
    +                  sd.orig)
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/simple_bmi.html b/docs/reference/simple_bmi.html index a34b715..2d679b3 100644 --- a/docs/reference/simple_bmi.html +++ b/docs/reference/simple_bmi.html @@ -1,68 +1,13 @@ - - - - - - - -Compute BMI using standard formula — simple_bmi • growthcleanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Compute BMI using standard formula — simple_bmi • growthcleanr - - - - + + -
    -
    - -
    - -
    +
    @@ -162,75 +95,75 @@

    Compute BMI using standard formula

    output from longwide().

    -
    simple_bmi(wide_df, wtcol = "wt", htcol = "ht")
    - -

    Arguments

    - - - - - - - - - - - - - - -
    wide_df

    A data frame or data table containing heights and weights in -wide format, e.g., after transformation with longwide()

    wtcol

    name of observation height value column, default 'wt'

    htcol

    name of subject weight value column, default 'ht'

    - -

    Value

    - -

    Returns a data table with the added column "bmi"

    - -

    Examples

    -
    # Simple usage -# Run on a small subset of given data -df <- as.data.frame(syngrowth) -df <- df[df$subjid %in% unique(df[, "subjid"])[1:5], ] -df <- cbind(df, - "gcr_result" = cleangrowth(df$subjid, - df$param, - df$agedays, - df$sex, - df$measurement)) -# Convert to wide format -wide_df <- longwide(df) -wide_df_with_bmi <- simple_bmi(wide_df) - -# Specifying different column names; note that quotes are used -colnames(wide_df)[colnames(wide_df) %in% c("wt", "ht")] <- - c("weight", "height") -wide_df_with_bmi <- simple_bmi(wide_df, wtcol = "weight", htcol = "height") -
    +
    +
    simple_bmi(wide_df, wtcol = "wt", htcol = "ht")
    +
    + +
    +

    Arguments

    +
    wide_df
    +

    A data frame or data table containing heights and weights in +wide format, e.g., after transformation with longwide()

    + + +
    wtcol
    +

    name of observation height value column, default 'wt'

    + + +
    htcol
    +

    name of subject weight value column, default 'ht'

    + +
    +
    +

    Value

    + + +

    Returns a data table with the added column "bmi"

    +
    + +
    +

    Examples

    +
    # Simple usage
    +# Run on a small subset of given data
    +df <- as.data.frame(syngrowth)
    +df <- df[df$subjid %in% unique(df[, "subjid"])[1:5], ]
    +df <- cbind(df,
    +            "gcr_result" = cleangrowth(df$subjid,
    +                                       df$param,
    +                                       df$agedays,
    +                                       df$sex,
    +                                       df$measurement))
    +# Convert to wide format
    +wide_df <- longwide(df)
    +wide_df_with_bmi <- simple_bmi(wide_df)
    +
    +# Specifying different column names; note that quotes are used
    +colnames(wide_df)[colnames(wide_df) %in% c("wt", "ht")] <-
    +  c("weight", "height")
    +wide_df_with_bmi <- simple_bmi(wide_df, wtcol = "weight", htcol = "height")
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/splitinput.html b/docs/reference/splitinput.html index e785184..5995a54 100644 --- a/docs/reference/splitinput.html +++ b/docs/reference/splitinput.html @@ -1,71 +1,16 @@ - - - - - - - -Split input data into multiple files — splitinput • growthcleanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Split input data into multiple files — splitinput • growthcleanr - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +
    @@ -168,84 +101,87 @@

    Split input data into multiple files

    result.

    -
    splitinput(
    -  df,
    -  fname = deparse(substitute(df)),
    -  fdir = ".",
    -  min_nrow = 10000,
    -  keepcol = "subjid"
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    df

    data frame to split

    fname

    new name for each of the split files to start with

    fdir

    directory to put each of the split files (default working directory)

    min_nrow

    minimum number of rows for each split file (default 10000)

    keepcol

    the column name (default "subjid") to use to keep records with the same values together in the same single split file

    - -

    Value

    - -

    the count number referring to the last split file written

    - -

    Examples

    -
    if (FALSE) { -# Run on given data -df <- as.data.frame(syngrowth) - -# Run with all defaults -splitinput(df) - -# Specifying the name, directory and minimum row size -splitinput(df, fname = "syngrowth", fdir = tempdir(), min_nrow = 5000) - -# Specifying a different subject ID column -colnames(df)[colnames(df) == "subjid"] <- "sub_id" -splitinput(df, keepcol = "sub_id") -} -
    +
    +
    splitinput(
    +  df,
    +  fname = deparse(substitute(df)),
    +  fdir = NA,
    +  min_nrow = 10000,
    +  keepcol = "subjid"
    +)
    +
    + +
    +

    Arguments

    +
    df
    +

    data frame to split

    + + +
    fname
    +

    new name for each of the split files to start with

    + + +
    fdir
    +

    directory to put each of the split files (use "." for working directory). Must be changed from default (NA), which will trigger error.

    + + +
    min_nrow
    +

    minimum number of rows for each split file (default 10000)

    + + +
    keepcol
    +

    the column name (default "subjid") to use to keep records with the same values together in the same single split file

    + +
    +
    +

    Value

    + + +

    the count number referring to the last split file written

    +
    + +
    +

    Examples

    +
    # \donttest{
    +# Run on given data
    +df <- as.data.frame(syngrowth)
    +
    +# Run with all defaults (specifying directory)
    +splitinput(df, fdir = tempdir())
    +#> [1] 7
    +
    +# Specifying the name, directory and minimum row size
    +splitinput(df, fname = "syngrowth", fdir = tempdir(), min_nrow = 5000)
    +#> [1] 15
    +
    +# Specifying a different subject ID column
    +colnames(df)[colnames(df) == "subjid"] <- "sub_id"
    +splitinput(df, fdir = tempdir(), keepcol = "sub_id")
    +#> [1] 7
    +# }
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/syngrowth.html b/docs/reference/syngrowth.html index 6f3fbcc..cce3275 100644 --- a/docs/reference/syngrowth.html +++ b/docs/reference/syngrowth.html @@ -1,69 +1,14 @@ - - - - - - - -syngrowth — syngrowth • growthcleanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -syngrowth — syngrowth • growthcleanr - - - - - - - - - + + - - - - -
    -
    - -
    - -
    +
    @@ -164,42 +97,41 @@

    syngrowth

    for testing with growthcleanr. Contains both pediatric and adult data.

    -
    syngrowth
    - - -

    Format

    +
    +
    syngrowth
    +
    +
    +

    Format

    A data frame with six variables: id, subjid, sex, agedays, param, and measurement

    -

    Details

    - +
    +
    +

    Details

    Example electronic health record (heightcm, weightkg) data.

    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/tanner_ht_vel.html b/docs/reference/tanner_ht_vel.html index f56f399..5b9f6a6 100644 --- a/docs/reference/tanner_ht_vel.html +++ b/docs/reference/tanner_ht_vel.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3 @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -111,7 +114,7 @@

    tanner_ht_vel.csv.gz

    diff --git a/docs/reference/tanner_ht_vel_with_2sd.html b/docs/reference/tanner_ht_vel_with_2sd.html index da34f2f..b877e86 100644 --- a/docs/reference/tanner_ht_vel_with_2sd.html +++ b/docs/reference/tanner_ht_vel_with_2sd.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3
    @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -112,7 +115,7 @@

    tanner_ht_vel_with_2sd.csv.gz

    diff --git a/docs/reference/test_syngrowth_sas_output_compare.html b/docs/reference/test_syngrowth_sas_output_compare.html index 814151f..282acf6 100644 --- a/docs/reference/test_syngrowth_sas_output_compare.html +++ b/docs/reference/test_syngrowth_sas_output_compare.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3
    @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -107,7 +110,7 @@

    test_syngrowth_sas_output_compare.csv.gz

    diff --git a/docs/reference/test_syngrowth_wide.html b/docs/reference/test_syngrowth_wide.html index 7078c4f..4b5b64a 100644 --- a/docs/reference/test_syngrowth_wide.html +++ b/docs/reference/test_syngrowth_wide.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3
    @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -107,7 +110,7 @@

    test_syngrowth_wide.csv.gz

    diff --git a/docs/reference/testacf.html b/docs/reference/testacf.html index 2410bd0..c04da1c 100644 --- a/docs/reference/testacf.html +++ b/docs/reference/testacf.html @@ -20,7 +20,7 @@ growthcleanr - 2.0.1 + 2.0.3
    @@ -62,6 +62,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -102,15 +105,15 @@

    Function to test adjust carried forward

    seed = 7, searchtype = "random", grid.length = 9, - writeout = F, + writeout = FALSE, outfile = paste0("test_adjustcarryforward_", format(Sys.time(), "%m-%d-%Y_%H-%M-%S")), - quietly = F, + quietly = FALSE, param = "none", - debug = F, + debug = FALSE, maxrecs = 0, exclude_opt = 0, - add_answers = T + add_answers = TRUE )
    @@ -183,7 +186,7 @@

    Value

    diff --git a/docs/reference/weianthro.html b/docs/reference/weianthro.html index 1e49922..53b1219 100644 --- a/docs/reference/weianthro.html +++ b/docs/reference/weianthro.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3
    @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -111,7 +114,7 @@

    weianthro.csv.gz

    diff --git a/docs/reference/who_ht_maxvel.html b/docs/reference/who_ht_maxvel.html index a7106e3..54f1907 100644 --- a/docs/reference/who_ht_maxvel.html +++ b/docs/reference/who_ht_maxvel.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3
    @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -112,7 +115,7 @@

    who_ht_maxvel_3sd.csv.gz

    diff --git a/docs/reference/who_ht_maxvel_2sd.html b/docs/reference/who_ht_maxvel_2sd.html index fb6b169..82f0c98 100644 --- a/docs/reference/who_ht_maxvel_2sd.html +++ b/docs/reference/who_ht_maxvel_2sd.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3
    @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -112,7 +115,7 @@

    who_ht_maxvel_2sd.csv.gz

    diff --git a/docs/reference/who_ht_vel_2sd.html b/docs/reference/who_ht_vel_2sd.html index f294bdd..875885c 100644 --- a/docs/reference/who_ht_vel_2sd.html +++ b/docs/reference/who_ht_vel_2sd.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3
    @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -112,7 +115,7 @@

    who_ht_vel_2sd.csv.gz

    diff --git a/docs/reference/who_ht_vel_3sd.html b/docs/reference/who_ht_vel_3sd.html index 8fd6381..4a4630f 100644 --- a/docs/reference/who_ht_vel_3sd.html +++ b/docs/reference/who_ht_vel_3sd.html @@ -17,7 +17,7 @@ growthcleanr - 2.0.1 + 2.0.3
    @@ -59,6 +59,9 @@
  • Next steps
  • +
  • + Developer guidelines +
  • Reference @@ -112,7 +115,7 @@

    who_ht_vel_3sd.csv.gz