Title: | Multilevel and Mixed Effects Model Diagnostics and Effect Sizes |
---|---|
Description: | Effect sizes, diagnostics and performance metrics for multilevel and mixed effects models. Includes marginal and conditional 'R2' estimates for linear mixed effects models based on Johnson (2014) <doi:10.1111/2041-210X.12225>. |
Authors: | Joshua F. Wiley [aut, cre] |
Maintainer: | Joshua F. Wiley <[email protected]> |
License: | GPL (>= 3) |
Version: | 0.2.0 |
Built: | 2025-01-23 04:27:58 UTC |
Source: | https://github.com/jwiley/multileveltools |
This function estimates the autocorrelation over time in a time series by a higher level unit, given by ID.
acfByID( xvar, timevar, idvar, data, lag.max = 10L, na.function = c("na.approx", "na.spline", "na.locf"), ... )
acfByID( xvar, timevar, idvar, data, lag.max = 10L, na.function = c("na.approx", "na.spline", "na.locf"), ... )
xvar |
A character string giving the variable name of the variable to calculate autocorrelations on. |
timevar |
A character string giving the variable name of the time variable. |
idvar |
A character string giving the variable name of the ID variable. Can be missing if only one time series provided, in which case one will be created. |
data |
A data.table containing the variables used in the formula. This is a required argument. If a data.frame, it will silently coerce to a data.table. If not a data.table or data.frame, it will attempt to coerce, with a message. |
lag.max |
An integer of the maximum lag to estimate. Must be equal to or greater than the number of observations for all IDs in the dataset. |
na.function |
A character string giving the name of the function to use to address any missing data. Functions come from the zoo package, and must be one of: “na.approx”, “na.spline”, “na.locf”. |
... |
Additional arguments passed to |
A data.table of the estimated autocorrelations by ID and lag
## example 1 dat <- data.table::data.table( x = sin(1:30), time = 1:30, id = 1) acfByID("x", "time", "id", data = dat) ## example 2 dat2 <- data.table::data.table( x = c(sin(1:30), sin((1:30)/10)), time = c(1:30, 1:30), id = rep(1:2, each = 30)) dat2$x[4] <- NA res <- acfByID("x", "time", "id", data = dat2, na.function = "na.approx") ggplot2::ggplot(res, ggplot2::aes(factor(Lag), AutoCorrelation)) + ggplot2::geom_boxplot() ## clean up rm(dat, dat2, res)
## example 1 dat <- data.table::data.table( x = sin(1:30), time = 1:30, id = 1) acfByID("x", "time", "id", data = dat) ## example 2 dat2 <- data.table::data.table( x = c(sin(1:30), sin((1:30)/10)), time = c(1:30, 1:30), id = rep(1:2, each = 30)) dat2$x[4] <- NA res <- acfByID("x", "time", "id", data = dat2, na.function = "na.approx") ggplot2::ggplot(res, ggplot2::aes(factor(Lag), AutoCorrelation)) + ggplot2::geom_boxplot() ## clean up rm(dat, dat2, res)
Format results from a linear mixed model
## S3 method for class 'modelTest.merMod' APAStyler( object, format = list(FixedEffects = c("%s%s [%s, %s]"), RandomEffects = c("%s", "%s [%s, %s]"), EffectSizes = c("%s/%s, %s")), digits = 2, pcontrol = list(digits = 3, stars = TRUE, includeP = FALSE, includeSign = FALSE, dropLeadingZero = TRUE), ... )
## S3 method for class 'modelTest.merMod' APAStyler( object, format = list(FixedEffects = c("%s%s [%s, %s]"), RandomEffects = c("%s", "%s [%s, %s]"), EffectSizes = c("%s/%s, %s")), digits = 2, pcontrol = list(digits = 3, stars = TRUE, includeP = FALSE, includeSign = FALSE, dropLeadingZero = TRUE), ... )
object |
A list of one (or more) models estimated from lmer |
format |
A list giving the formatting style to be used for the fixed effecvts, random effects, and effect sizes. For the random effects, must be two options, one for when the random effects do not have confidence intervals and one when the random effects do have confidence intervals. |
digits |
A numeric value indicating the number of digits to print. This is still in early implementation stages and currently does not change all parts of the output (which default to 2 decimals per APA style). |
pcontrol |
A list controlling how p values are formatted. |
... |
Additional arguments passed to |
a data table of character data
library(JWileymisc) data(sleepstudy, package = "lme4") m1 <- lme4::lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) m2 <- lme4::lmer(Reaction ~ Days + I(Days^2) + (1 + Days | Subject), data = sleepstudy) testm1 <- modelTest(m1) testm2 <- modelTest(m2) APAStyler(testm1) APAStyler(list(Linear = testm1, Quadratic = testm2)) APAStyler(testm1, format = list( FixedEffects = "%s, %s (%s, %s)", RandomEffects = c("%s", "%s (%s, %s)"), EffectSizes = "%s, %s; %s"), pcontrol = list(digits = 3, stars = FALSE, includeP = TRUE, includeSign = TRUE, dropLeadingZero = TRUE)) testm1 <- modelTest(m1, method = "profile") testm2 <- modelTest(m2, method = "profile") APAStyler(testm1) APAStyler(list(Linear = testm1, Quadratic = testm2)) APAStyler(testm1, format = list( FixedEffects = "%s, %s (%s, %s)", RandomEffects = c("%s", "%s (%s, %s)"), EffectSizes = "%s, %s; %s"), pcontrol = list(digits = 3, stars = FALSE, includeP = TRUE, includeSign = TRUE, dropLeadingZero = TRUE)) rm(m1, m2, testm1, testm2)
library(JWileymisc) data(sleepstudy, package = "lme4") m1 <- lme4::lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) m2 <- lme4::lmer(Reaction ~ Days + I(Days^2) + (1 + Days | Subject), data = sleepstudy) testm1 <- modelTest(m1) testm2 <- modelTest(m2) APAStyler(testm1) APAStyler(list(Linear = testm1, Quadratic = testm2)) APAStyler(testm1, format = list( FixedEffects = "%s, %s (%s, %s)", RandomEffects = c("%s", "%s (%s, %s)"), EffectSizes = "%s, %s; %s"), pcontrol = list(digits = 3, stars = FALSE, includeP = TRUE, includeSign = TRUE, dropLeadingZero = TRUE)) testm1 <- modelTest(m1, method = "profile") testm2 <- modelTest(m2, method = "profile") APAStyler(testm1) APAStyler(list(Linear = testm1, Quadratic = testm2)) APAStyler(testm1, format = list( FixedEffects = "%s, %s (%s, %s)", RandomEffects = c("%s", "%s (%s, %s)"), EffectSizes = "%s, %s; %s"), pcontrol = list(digits = 3, stars = FALSE, includeP = TRUE, includeSign = TRUE, dropLeadingZero = TRUE)) rm(m1, m2, testm1, testm2)
This function creates the desired number of lags and tests consecutive models from a model with no lags (lag 0), lag 0 + lag1, etc. and reports model performance. This helps evaluate how many lags are needed.
evaluateLags(formula, lagvar, nlags = 0L, idvar, data, ...)
evaluateLags(formula, lagvar, nlags = 0L, idvar, data, ...)
formula |
A |
lagvar |
A |
nlags |
An |
idvar |
A |
data |
A |
... |
Additional arguments passed to |
Currently only linear mixed effects models are allowed.
## these examples are slow to run data(aces_daily, package = "JWileymisc") evaluateLags( "NegAff ~ Female + Age + BornAUS + (1 | UserID)", "STRESS", 4L, "UserID", aces_daily) ## not run, more complex example with random slope, fails to converge evaluateLags( "NegAff ~ Female + Age + BornAUS + (1 + STRESS | UserID)", "STRESS", 5L, "UserID", aces_daily) ## use different control to fit model and now converges strictControl <- lme4::lmerControl(optCtrl = list( algorithm = "NLOPT_LN_NELDERMEAD", xtol_abs = 1e-10, ftol_abs = 1e-10)) evaluateLags( "NegAff ~ Female + Age + BornAUS + (1 + STRESS | UserID)", "STRESS", 5L, "UserID", aces_daily, control = strictControl)
## these examples are slow to run data(aces_daily, package = "JWileymisc") evaluateLags( "NegAff ~ Female + Age + BornAUS + (1 | UserID)", "STRESS", 4L, "UserID", aces_daily) ## not run, more complex example with random slope, fails to converge evaluateLags( "NegAff ~ Female + Age + BornAUS + (1 + STRESS | UserID)", "STRESS", 5L, "UserID", aces_daily) ## use different control to fit model and now converges strictControl <- lme4::lmerControl(optCtrl = list( algorithm = "NLOPT_LN_NELDERMEAD", xtol_abs = 1e-10, ftol_abs = 1e-10)) evaluateLags( "NegAff ~ Female + Age + BornAUS + (1 + STRESS | UserID)", "STRESS", 5L, "UserID", aces_daily, control = strictControl)
This function estimates the ICC from mixed effects models estimated using lme4.
iccMixed(dv, id, data, family = c("gaussian", "binomial"))
iccMixed(dv, id, data, family = c("gaussian", "binomial"))
dv |
A character string giving the variable name of the dependent variable. |
id |
A character vector of length one or more giving the ID variable(s). Can be more than one. |
data |
A data.table containing the variables used in the formula. This is a required argument. If a data.frame, it will silently coerce to a data.table. If not a data.table or data.frame, it will attempt to coerce, with a message. |
family |
A character vector giving the family to use for the model. Currently only supports “gaussian” or “binomial”. |
A data table of the ICCs
For details, see Campbell, M. K., Mollison, J., and Grimshaw, J. M. (2001) <doi:10.1002/1097-0258(20010215)20:3 "Cluster trials in implementation research: estimation of intracluster correlation coefficients and sample size."
iccMixed("mpg", "cyl", mtcars) iccMixed("mpg", "cyl", data.table::as.data.table(mtcars)) iccMixed("mpg", "cyl", data.table::as.data.table(mtcars), family = "gaussian") iccMixed("mpg", c("cyl", "am"), data.table::as.data.table(mtcars)) iccMixed("am", "cyl", data.table::as.data.table(mtcars), family = "binomial")
iccMixed("mpg", "cyl", mtcars) iccMixed("mpg", "cyl", data.table::as.data.table(mtcars)) iccMixed("mpg", "cyl", data.table::as.data.table(mtcars), family = "gaussian") iccMixed("mpg", c("cyl", "am"), data.table::as.data.table(mtcars)) iccMixed("am", "cyl", data.table::as.data.table(mtcars), family = "binomial")
This function decomposes a variable in a long data set by grouping factors, such as by ID. If multiple grouping factors are listed, the decomposition is in order from left to right. Residuals from the lowest level are returned.
meanDecompose(formula, data)
meanDecompose(formula, data)
formula |
A formula of the variables to be used in the analysis. Should have the form: variable ~ groupingfactors. |
data |
A data table or data frame containing the variables used in the formula. This is a required argument. |
A list of data tables with the means or residuals
meanDecompose(mpg ~ vs, data = mtcars) meanDecompose(mpg ~ vs + cyl, data = mtcars) ## Example plotting the results tmp <- meanDecompose(Sepal.Length ~ Species, data = iris) do.call(ggpubr::ggarrange, c(lapply(names(tmp), function(x) { plot(JWileymisc::testDistribution(tmp[[x]]$X), plot = FALSE, varlab = x)$Density }), ncol = 1)) rm(tmp)
meanDecompose(mpg ~ vs, data = mtcars) meanDecompose(mpg ~ vs + cyl, data = mtcars) ## Example plotting the results tmp <- meanDecompose(Sepal.Length ~ Species, data = iris) do.call(ggpubr::ggarrange, c(lapply(names(tmp), function(x) { plot(JWileymisc::testDistribution(tmp[[x]]$X), plot = FALSE, varlab = x)$Density }), ncol = 1)) rm(tmp)
Tiny helper function to calculate the mean and deviations from the mean, both returned as a list. Works nicely with data.table to calculate a between and within variable.
meanDeviations(x, na.rm = TRUE)
meanDeviations(x, na.rm = TRUE)
x |
A vector, appropriate for the |
na.rm |
A logical, whether to remove missing
or not. Defaults to |
A list of the mean (first element) and deviations from the mean (second element).
## simple example showing what it does meanDeviations(1:10) ## example use case, applied to a data.table library(data.table) d <- as.data.table(iris) d[, c("BSepal.Length", "WSepal.Length") := meanDeviations(Sepal.Length), by = Species] str(d) rm(d)
## simple example showing what it does meanDeviations(1:10) ## example use case, applied to a data.table library(data.table) d <- as.data.table(iris) d[, c("BSepal.Length", "WSepal.Length") := meanDeviations(Sepal.Length), by = Species] str(d) rm(d)
This function provides fit statistics and effect sizes for model comparisons. The models must be nested.
## S3 method for class 'merMod' modelCompare(model1, model2, ...)
## S3 method for class 'merMod' modelCompare(model1, model2, ...)
model1 |
A model estimated by |
model2 |
A model estimated by |
... |
Additional arguments, not currently used but included to match generic. |
a data table with the fit indices for each model and comparing models to each other.
For estimating the marginal and conditional R-squared values, see: Nakagawa, S. and Schielzeth, H. (2013). A general and simple method for obtaining R2 from generalized linear mixed-effects models. Methods in Ecology and Evolution, 4(2), 133-142. as well as: Johnson, P. C. (2014). Extension of Nakagawa & Schielzeth's R2GLMM to random slopes models. Methods in Ecology and Evolution, 5(9), 944-946.
library(JWileymisc) data(aces_daily, package = "JWileymisc") m1 <- lme4::lmer(PosAff ~ STRESS + (1 + STRESS | UserID), data = aces_daily) m2 <- lme4::lmer(PosAff ~ STRESS + (1 | UserID), data = aces_daily) m3 <- lme4::lmer(PosAff ~ STRESS + Female + (1 | UserID), data = aces_daily) modelCompare(m1, m2) modelCompare(m2, m3) rm(m1, m2, m3)
library(JWileymisc) data(aces_daily, package = "JWileymisc") m1 <- lme4::lmer(PosAff ~ STRESS + (1 + STRESS | UserID), data = aces_daily) m2 <- lme4::lmer(PosAff ~ STRESS + (1 | UserID), data = aces_daily) m3 <- lme4::lmer(PosAff ~ STRESS + Female + (1 | UserID), data = aces_daily) modelCompare(m1, m2) modelCompare(m2, m3) rm(m1, m2, m3)
This function creates a number of diagnostics
for lme
models.
## S3 method for class 'lme' modelDiagnostics( object, ev.perc = 0.001, robust = FALSE, distr = "normal", standardized = TRUE, ... )
## S3 method for class 'lme' modelDiagnostics( object, ev.perc = 0.001, robust = FALSE, distr = "normal", standardized = TRUE, ... )
object |
A fitted model object of class lme. |
ev.perc |
A real number between 0 and 1 indicating the proportion of the theoretical distribution beyond which values are considered extreme values (possible outliers). Defaults to .001. |
robust |
Whether to use robust mean and standard deviation estimates for normal distribution |
distr |
A character string given the assumed distribution.
Passed on to |
standardized |
A logical whether to use standardized pearson residuals.
Defaults to |
... |
Additional arguments, not currently used. |
A logical (is.modelDiagnostics
) or
a modelDiagnostics object (list) for
as.modelDiagnostics
and modelDiagnostics
.
library(JWileymisc) sleep[1,1] <- NA m <- nlme::lme(extra ~ group, data = sleep, random = ~ 1 | ID, na.action = "na.omit") md <- modelDiagnostics(m, ev.perc = .1) md$extremeValues plot(md) data(aces_daily, package = "JWileymisc") m <- nlme::lme(PosAff ~ STRESS, data = aces_daily, random = ~ 1 + STRESS | UserID, na.action = "na.omit") md <- modelDiagnostics(m, ev.perc = .001) md$extremeValues plot(md) m <- nlme::lme(extra ~ 1, data = sleep, random = ~ 1 | ID/group, na.action = "na.omit") md <- modelDiagnostics(m, ev.perc = .1) md$extremeValues plot(md) rm(m, md, sleep)
library(JWileymisc) sleep[1,1] <- NA m <- nlme::lme(extra ~ group, data = sleep, random = ~ 1 | ID, na.action = "na.omit") md <- modelDiagnostics(m, ev.perc = .1) md$extremeValues plot(md) data(aces_daily, package = "JWileymisc") m <- nlme::lme(PosAff ~ STRESS, data = aces_daily, random = ~ 1 + STRESS | UserID, na.action = "na.omit") md <- modelDiagnostics(m, ev.perc = .001) md$extremeValues plot(md) m <- nlme::lme(extra ~ 1, data = sleep, random = ~ 1 | ID/group, na.action = "na.omit") md <- modelDiagnostics(m, ev.perc = .1) md$extremeValues plot(md) rm(m, md, sleep)
This function creates a number of diagnostics
for merMod
models.
## S3 method for class 'merMod' modelDiagnostics( object, ev.perc = 0.001, robust = FALSE, distr = "normal", standardized = TRUE, ... )
## S3 method for class 'merMod' modelDiagnostics( object, ev.perc = 0.001, robust = FALSE, distr = "normal", standardized = TRUE, ... )
object |
A fitted model object, either of class merMod from the lme4 package or merModLmerTest from the lmerTest package. |
ev.perc |
A real number between 0 and 1 indicating the proportion of the theoretical distribution beyond which values are considered extreme values (possible outliers). Defaults to .001. |
robust |
Whether to use robust mean and standard deviation estimates for normal distribution |
distr |
A character string given the assumed distribution.
Passed on to |
standardized |
A logical whether to use standardized residuals.
Defaults to |
... |
Additional arguments, not currently used. |
A logical (is.modelDiagnostics
) or
a modelDiagnostics object (list) for
as.modelDiagnostics
and modelDiagnostics
.
library(JWileymisc) sleep[1,1] <- NA m <- lme4::lmer(extra ~ group + (1 | ID), data = sleep) md <- modelDiagnostics(m, ev.perc = .1) md$extremeValues class(md) plot(md) data(aces_daily, package = "JWileymisc") m <- lme4::lmer(PosAff ~ STRESS + (1 + STRESS | UserID), data = aces_daily) md <- modelDiagnostics(m, ev.perc = .1) # gm1 <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd), # data = lme4::cbpp, family = binomial) # modelDiagnostics(gm1) ## should be an error rm(m, md, sleep)
library(JWileymisc) sleep[1,1] <- NA m <- lme4::lmer(extra ~ group + (1 | ID), data = sleep) md <- modelDiagnostics(m, ev.perc = .1) md$extremeValues class(md) plot(md) data(aces_daily, package = "JWileymisc") m <- lme4::lmer(PosAff ~ STRESS + (1 + STRESS | UserID), data = aces_daily) md <- modelDiagnostics(m, ev.perc = .1) # gm1 <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd), # data = lme4::cbpp, family = binomial) # modelDiagnostics(gm1) ## should be an error rm(m, md, sleep)
For pseudo R2 by cluster, the squared correlation between observed and predicted values for each cluster unit is returned. For the overall model, the marginal and conditional R2 are calculated as described in the references.
## S3 method for class 'merMod' modelPerformance(object, ...)
## S3 method for class 'merMod' modelPerformance(object, ...)
object |
A model from |
... |
Additional arguments, not currently used. |
a named vector with the marginal and conditional R2 values,
if CLUSTER = FALSE
, otherwise, a data table with the pseudo R2
for each cluster unit.
A list with a data.table
with the following elements:
A character string indicating the model type, here merMod
A character string indicating whether the model was estimated with REML or ML
The number of observations
A character string indicating the number of unique units in each grouping/clustering variable.
Akaike Information Criterion
Bayesian Information Criterion
log likelihood
log likelihood degrees of freedom
Residual standard deviation
in sample variance explained by the fixed effects
in sample variance explained by the fixed and random effects
Cohen's F2 effect size R2 / (1 - R2) based off the Marginal R2
Cohen's F2 effect size R2 / (1 - R2) based off the Conditional R2
For estimating the marginal and conditional R-squared values, see: Nakagawa, S. and Schielzeth, H. (2013) <doi:10.1111/j.2041-210x.2012.00261.x> "A general and simple method for obtaining R2 from generalized linear mixed-effects models" and also: Johnson, P. C. (2014) <doi:10.1111/2041-210X.12225> "Extension of Nakagawa & Schielzeth's R2GLMM to random slopes models"
library(JWileymisc) data(aces_daily, package = "JWileymisc") m1 <- lme4::lmer(PosAff ~ 1 + (1 | UserID), data = aces_daily) modelPerformance(m1) m1 <- lme4::lmer(PosAff ~ STRESS + (1 + STRESS | UserID), data = aces_daily) modelPerformance(m1) rm(m1)
library(JWileymisc) data(aces_daily, package = "JWileymisc") m1 <- lme4::lmer(PosAff ~ 1 + (1 | UserID), data = aces_daily) modelPerformance(m1) m1 <- lme4::lmer(PosAff ~ STRESS + (1 + STRESS | UserID), data = aces_daily) modelPerformance(m1) rm(m1)
This function extends the current drop1
method for
merMod
class objects from the lme4 package. Where
the default method to be able to drop both fixed and random
effects at once.
## S3 method for class 'merMod' modelTest(object, method = c("Wald", "profile", "boot"), control, ...)
## S3 method for class 'merMod' modelTest(object, method = c("Wald", "profile", "boot"), control, ...)
object |
A |
method |
A character vector indicating the types of confidence intervals to calculate. One of “Wald”, “profile”, or “boot”. |
control |
A |
... |
Additional arguments passed to |
At the moment, the function is aimed to lmer
models
and has very few features for glmer
or nlmer
models. The primary motivation was to provide a way to
provide an overall test of whether a variable
“matters”. In multilevel data, a variable may be
included in both the fixed and random effects. To provide
an overall test of whether it matters requires jointly testing
the fixed and random effects. This also is needed to provide
an overall effect size.
The function works by generating a formula with one specific
variable or “term” removed at all levels. A model is then
fit on this reduced formula and compared to the full model passed
in. This is a complex operation for mixed effects models for several
reasons. Firstly, R
has no default mechanism for dropping
terms from both the fixed and random portions. Secondly,
mixed effects models do not accomodate all types of models. For example,
if a model includes only a random slope with no random intercept,
if the random slope was dropped, there would be no more random effects,
and at that point, lmer
or glmer
will not run the model.
It is theoretically possible to instead fit the model using
lm
or glm
but this becomes more complex for certain
model comparisons and calculations and is not currently implemented.
Marginal and conditional R2 values are calculated for each term,
and these are used also to calculate something akin to an
f-squared effect size.
This is a new function and it is important to carefully evaluate
the results and check that they are accurate and that they are
sensible. Check accuracy by viewing the model formulae for each
reduced model and checking that those are indeed accurate.
In terms of checking whether a result is sensible or not,
there is a large literature on the difficulty interpretting
main effect tests in the presence of interactions. As it is
challenging to detect all interactions, especially ones that are
made outside of R
formulae, all terms are tested. However,
it likely does not make sense to report results from dropping a
main effect but keeping the interaction term, so present
and interpret these with caution.
## these examples are slow to run library(JWileymisc) m1 <- lme4::lmer(extra ~ group + (1 | ID), data = sleep, REML=FALSE) modelTest(m1) data(aces_daily, package = "JWileymisc") strictControl <- lme4::lmerControl(optCtrl = list( algorithm = "NLOPT_LN_NELDERMEAD", xtol_abs = 1e-10, ftol_abs = 1e-10)) m1 <- lme4::lmer(NegAff ~ STRESS + (1 + STRESS | UserID), data = aces_daily, control = strictControl) modelTest(m1, method = "profile") m2 <- lme4::lmer(NegAff ~ STRESS + I(STRESS^2) + (1 + STRESS | UserID), data = aces_daily, control = strictControl) ## might normally use more bootstraps but keeping low for faster run modelTest(m2, method = "boot", nsim = 100)
## these examples are slow to run library(JWileymisc) m1 <- lme4::lmer(extra ~ group + (1 | ID), data = sleep, REML=FALSE) modelTest(m1) data(aces_daily, package = "JWileymisc") strictControl <- lme4::lmerControl(optCtrl = list( algorithm = "NLOPT_LN_NELDERMEAD", xtol_abs = 1e-10, ftol_abs = 1e-10)) m1 <- lme4::lmer(NegAff ~ STRESS + (1 + STRESS | UserID), data = aces_daily, control = strictControl) modelTest(m1, method = "profile") m2 <- lme4::lmer(NegAff ~ STRESS + I(STRESS^2) + (1 + STRESS | UserID), data = aces_daily, control = strictControl) ## might normally use more bootstraps but keeping low for faster run modelTest(m2, method = "boot", nsim = 100)
This function estimates the (approximate) effective sample size.
nEffective(n, k, icc, dv, id, data, family = c("gaussian", "binomial"))
nEffective(n, k, icc, dv, id, data, family = c("gaussian", "binomial"))
n |
The number of unique/indepedent units of observation |
k |
The (average) number of observations per unit |
icc |
The estimated ICC. If missing, will estimate (and requires that the family argument be correctly specified). |
dv |
A character string giving the variable name of the dependent variable. |
id |
A character vector of length one giving the ID variable. |
data |
A data.table containing the variables used in the formula. This is a required argument. If a data.frame, it will silently coerce to a data.table. If not a data.table or data.frame, it will attempt to coerce, with a message. |
family |
A character vector giving the family to use for the model. Currently only supports “gaussian” or “binomial”. |
A data.table including the effective sample size.
For details, see Campbell, M. K., Mollison, J., and Grimshaw, J. M. (2001) <doi:10.1002/1097-0258(20010215)20:3 "Cluster trials in implementation research: estimation of intracluster correlation coefficients and sample size."
## example where n, k, and icc are estimated from the data ## provided, partly using iccMixed function nEffective(dv = "mpg", id = "cyl", data = mtcars) ## example where n, k, and icc are known (or being 'set') ## useful for sensitivity analyses nEffective(n = 60, k = 10, icc = .6)
## example where n, k, and icc are estimated from the data ## provided, partly using iccMixed function nEffective(dv = "mpg", id = "cyl", data = mtcars) ## example where n, k, and icc are known (or being 'set') ## useful for sensitivity analyses nEffective(n = 60, k = 10, icc = .6)
This function uses multilevel structural equation modelling to calculate between and within reliability using coefficient omega.
omegaSEM(items, id, data, savemodel = FALSE)
omegaSEM(items, id, data, savemodel = FALSE)
items |
A character vector giving the variables that map to the items in the scale. Note that these should be reverse scored prior to running this function. |
id |
A character string giving the name of the variable that indicates which rows of the dataset belong to the same person or group for the multilevel analysis. |
data |
A data table or data frame to be used for analysis. |
savemodel |
A logical value indicating whether the underlying model
should be saved and returned. Defaults to |
a list with two elements, the first, “Results” contains the
estimates for coefficient omega at the within and between level. The
next element, “Fit” contains the entire fitted model from lavaan, if
savemodel = TRUE
.
Geldhof, G. J., Preacher, K. J., & Zyphur, M. J. (2014) <doi:10.1037/a0032138> "Reliability estimation in a multilevel confirmatory factor analysis framework"
data(aces_daily, package = "JWileymisc") omegaSEM( items = c("COPEPrb", "COPEPrc", "COPEExp"), id = "UserID", data = aces_daily, savemodel = FALSE)
data(aces_daily, package = "JWileymisc") omegaSEM( items = c("COPEPrb", "COPEPrc", "COPEExp"), id = "UserID", data = aces_daily, savemodel = FALSE)
This function creates a number of diagnostic plots from lme models.
## S3 method for class 'modelDiagnostics.lme' plot(x, y, plot = TRUE, ask = TRUE, ncol = 1, nrow = 1, ...)
## S3 method for class 'modelDiagnostics.lme' plot(x, y, plot = TRUE, ask = TRUE, ncol = 1, nrow = 1, ...)
x |
A fitted model object from |
y |
Included to match the generic. Not used. |
plot |
A logical value whether or not to plot the results or simply return the graaphical objects. |
ask |
A logical whether to ask before changing plots. Only applies to interactive environments. |
ncol |
The number of columns to use for plots. Defaults to 1. |
nrow |
The number of rows to use for plots. Defaults to 1. |
... |
Included to match the generic. Not used. |
a list including plots of the residuals, residuals versus fitted values, and one list for plots of all random effects
library(JWileymisc) sleep[1,1] <- NA m <- nlme::lme(extra ~ group, data = sleep, random = ~ 1 | ID, na.action = "na.omit") md <- modelDiagnostics(m, ev.perc = .1) md$extremeValues plot(md) data(aces_daily, package = "JWileymisc") m <- nlme::lme(PosAff ~ STRESS, data = aces_daily, random = ~ 1 + STRESS | UserID, na.action = "na.omit") md <- modelDiagnostics(m, ev.perc = .001) md$extremeValues plot(md$modelDiagnostics[[2]][[2]]) plot(md, ncol = 2, nrow = 2) plot(md, ncol = 2, nrow = 3) rm(m, md, sleep)
library(JWileymisc) sleep[1,1] <- NA m <- nlme::lme(extra ~ group, data = sleep, random = ~ 1 | ID, na.action = "na.omit") md <- modelDiagnostics(m, ev.perc = .1) md$extremeValues plot(md) data(aces_daily, package = "JWileymisc") m <- nlme::lme(PosAff ~ STRESS, data = aces_daily, random = ~ 1 + STRESS | UserID, na.action = "na.omit") md <- modelDiagnostics(m, ev.perc = .001) md$extremeValues plot(md$modelDiagnostics[[2]][[2]]) plot(md, ncol = 2, nrow = 2) plot(md, ncol = 2, nrow = 3) rm(m, md, sleep)
This function creates a number of diagnostic plots from lmer models.
## S3 method for class 'modelDiagnostics.merMod' plot(x, y, plot = TRUE, ask = TRUE, ncol = 1, nrow = 1, ...)
## S3 method for class 'modelDiagnostics.merMod' plot(x, y, plot = TRUE, ask = TRUE, ncol = 1, nrow = 1, ...)
x |
A fitted model object from |
y |
Included to match the generic. Not used. |
plot |
A logical value whether or not to plot the results or simply return the graaphical objects. |
ask |
A logical whether to ask before changing plots. Only applies to interactive environments. |
ncol |
The number of columns to use for plots. Defaults to 1. |
nrow |
The number of rows to use for plots. Defaults to 1. |
... |
Included to match the generic. Not used. |
a list including plots of the residuals, residuals versus fitted values, and one list for plots of all random effects
library(JWileymisc) sleep[1,1] <- NA m <- lme4::lmer(extra ~ group + (1 | ID), data = sleep) md <- modelDiagnostics(m, ev.perc = .1) md$extremeValues data(aces_daily, package = "JWileymisc") m <- lme4::lmer(PosAff ~ STRESS + (1 + STRESS | UserID), data = aces_daily) md <- modelDiagnostics(m, ev.perc = .001) md$extremeValues plot(md$modelDiagnostics[[2]][[2]]) plot(md, ncol = 2, nrow = 2) plot(md, ncol = 2, nrow = 3) rm(m, md, sleep)
library(JWileymisc) sleep[1,1] <- NA m <- lme4::lmer(extra ~ group + (1 | ID), data = sleep) md <- modelDiagnostics(m, ev.perc = .1) md$extremeValues data(aces_daily, package = "JWileymisc") m <- lme4::lmer(PosAff ~ STRESS + (1 + STRESS | UserID), data = aces_daily) md <- modelDiagnostics(m, ev.perc = .001) md$extremeValues plot(md$modelDiagnostics[[2]][[2]]) plot(md, ncol = 2, nrow = 2) plot(md, ncol = 2, nrow = 3) rm(m, md, sleep)
For pseudo R2 by cluster, the squared correlation between observed and predicted values for each cluster unit is returned. For the overall model, the marginal and conditional R2 are calculated as described in the references.
## S3 method for class 'merMod' R2(object, cluster = FALSE, ...)
## S3 method for class 'merMod' R2(object, cluster = FALSE, ...)
object |
A model estimated by |
cluster |
A logical whether to calculate individual pseudo R2 values by
cluster unit (if |
... |
Added for consistency with generic. Not currently used. |
a named vector with the marginal and conditional R2 values,
if CLUSTER = FALSE
, otherwise, a data table with the pseudo R2
for each cluster unit.
For estimating the marginal and conditional R-squared values, see: Nakagawa, S. and Schielzeth, H. (2013) <doi:10.1111/j.2041-210x.2012.00261.x> "A general and simple method for obtaining R2 from generalized linear mixed-effects models" and also: Johnson, P. C. (2014) <doi:10.1111/2041-210X.12225> "Extension of Nakagawa & Schielzeth's R2GLMM to random slopes models"
library(JWileymisc) data(aces_daily, package = "JWileymisc") m1 <- lme4::lmer(PosAff ~ STRESS + (1 + STRESS | UserID), data = aces_daily) R2(m1) R2(m1, cluster = TRUE) hist(R2(m1, cluster = TRUE)$R2) rm(m1)
library(JWileymisc) data(aces_daily, package = "JWileymisc") m1 <- lme4::lmer(PosAff ~ STRESS + (1 + STRESS | UserID), data = aces_daily) R2(m1) R2(m1, cluster = TRUE) hist(R2(m1, cluster = TRUE)$R2) rm(m1)
Create data and plots for brms random effect models
ranefdata(object, usevars, newdata, idvar)
ranefdata(object, usevars, newdata, idvar)
object |
a |
usevars |
a character vector of random effects to plot |
newdata |
a data.table object with the data used to generate the random effects, this is used as an anchor for the random intercepts so they have a meaningful 0 point |
idvar |
a character string specifying the grouping variable name for the random effects |
a list with the following components:
plota list of ggplot objects
plotdata list of data.table objects with the data used to generate the plots
relonga data.table object with the random effects in long format
yhata list of data.table objects with the expected values for the random effects
usevarsa character vector of the random effects to plot
idvara character string specifying the grouping variable name for the random effects
if (FALSE) { library(data.table) library(brms) library(cmdstanr) library(ggpubr) dmixed <- withr::with_seed( seed = 12345, code = { nGroups <- 100 nObs <- 20 theta.location <- matrix(rnorm(nGroups * 2), nrow = nGroups, ncol = 2) theta.location[, 1] <- theta.location[, 1] - mean(theta.location[, 1]) theta.location[, 2] <- theta.location[, 2] - mean(theta.location[, 2]) theta.location[, 1] <- theta.location[, 1] / sd(theta.location[, 1]) theta.location[, 2] <- theta.location[, 2] / sd(theta.location[, 2]) theta.location <- theta.location %*% chol(matrix(c(1.5, -.25, -.25, .5^2), 2)) theta.location[, 1] <- theta.location[, 1] - 2.5 theta.location[, 2] <- theta.location[, 2] + 1 dmixed <- data.table( x = rep(rep(0:1, each = nObs / 2), times = nGroups)) dmixed[, ID := rep(seq_len(nGroups), each = nObs)] for (i in seq_len(nGroups)) { dmixed[ID == i, y := rnorm( n = nObs, mean = theta.location[i, 1] + theta.location[i, 2] * x, sd = exp(1 + theta.location[i, 1] + theta.location[i, 2] * x)) ] } copy(dmixed) }) ls.me <- brm(bf( y ~ 1 + x + (1 + x | p | ID), sigma ~ 1 + x + (1 + x | p | ID)), family = "gaussian", data = dmixed, seed = 1234, silent = 2, refresh = 0, iter = 2000, warmup = 1000, thin = 1, chains = 4L, cores = 4L, backend = "cmdstanr") out <- ranefdata( ls.me, newdata = data.table(ID = dmixed$ID[1], x = 0), usevars = c("Intercept", "x", "sigma_Intercept", "sigma_x"), idvar = "ID") do.call(ggarrange, c(out$replots, ncol=2,nrow=2)) do.call(ggarrange, c(out$scatterplots, ncol=2,nrow=3)) }
if (FALSE) { library(data.table) library(brms) library(cmdstanr) library(ggpubr) dmixed <- withr::with_seed( seed = 12345, code = { nGroups <- 100 nObs <- 20 theta.location <- matrix(rnorm(nGroups * 2), nrow = nGroups, ncol = 2) theta.location[, 1] <- theta.location[, 1] - mean(theta.location[, 1]) theta.location[, 2] <- theta.location[, 2] - mean(theta.location[, 2]) theta.location[, 1] <- theta.location[, 1] / sd(theta.location[, 1]) theta.location[, 2] <- theta.location[, 2] / sd(theta.location[, 2]) theta.location <- theta.location %*% chol(matrix(c(1.5, -.25, -.25, .5^2), 2)) theta.location[, 1] <- theta.location[, 1] - 2.5 theta.location[, 2] <- theta.location[, 2] + 1 dmixed <- data.table( x = rep(rep(0:1, each = nObs / 2), times = nGroups)) dmixed[, ID := rep(seq_len(nGroups), each = nObs)] for (i in seq_len(nGroups)) { dmixed[ID == i, y := rnorm( n = nObs, mean = theta.location[i, 1] + theta.location[i, 2] * x, sd = exp(1 + theta.location[i, 1] + theta.location[i, 2] * x)) ] } copy(dmixed) }) ls.me <- brm(bf( y ~ 1 + x + (1 + x | p | ID), sigma ~ 1 + x + (1 + x | p | ID)), family = "gaussian", data = dmixed, seed = 1234, silent = 2, refresh = 0, iter = 2000, warmup = 1000, thin = 1, chains = 4L, cores = 4L, backend = "cmdstanr") out <- ranefdata( ls.me, newdata = data.table(ID = dmixed$ID[1], x = 0), usevars = c("Intercept", "x", "sigma_Intercept", "sigma_x"), idvar = "ID") do.call(ggarrange, c(out$replots, ncol=2,nrow=2)) do.call(ggarrange, c(out$scatterplots, ncol=2,nrow=3)) }
residualDiagnostics methods for lme objects
## S3 method for class 'lme' residualDiagnostics( object, ev.perc = 0.001, robust = FALSE, distr = "normal", standardized = TRUE, ... )
## S3 method for class 'lme' residualDiagnostics( object, ev.perc = 0.001, robust = FALSE, distr = "normal", standardized = TRUE, ... )
object |
An object with class |
ev.perc |
The extreme value percentile to use. Defaults to .001. |
robust |
A logical value, whether to use robust estimates or not.
Defaults to |
distr |
A character string specifying the assumed distribution. Currently “normal”, but future options may be supported in the future. |
standardized |
A logical value whether to use standardized pearson
residual values or not. Defaults to |
... |
Additional arguments. Not currently used. |
A logical (is.residualDiagnostics
) or
a residualDiagnostics object (list) for
as.residualDiagnostics
and residualDiagnostics
.
library(JWileymisc) sleep[1,1] <- NA m <- nlme::lme(extra ~ group, data = sleep, random = ~ 1 | ID, na.action = na.omit) residualDiagnostics(m)$Residuals m <- nlme::lme(hp ~ mpg, data = mtcars, random = ~ 1 | cyl, na.action = na.omit) residualDiagnostics(m)$Residuals rm(m, sleep)
library(JWileymisc) sleep[1,1] <- NA m <- nlme::lme(extra ~ group, data = sleep, random = ~ 1 | ID, na.action = na.omit) residualDiagnostics(m)$Residuals m <- nlme::lme(hp ~ mpg, data = mtcars, random = ~ 1 | cyl, na.action = na.omit) residualDiagnostics(m)$Residuals rm(m, sleep)
residualDiagnostics methods for merMod objects
## S3 method for class 'merMod' residualDiagnostics( object, ev.perc = 0.001, robust = FALSE, distr = "normal", standardized = TRUE, ... )
## S3 method for class 'merMod' residualDiagnostics( object, ev.perc = 0.001, robust = FALSE, distr = "normal", standardized = TRUE, ... )
object |
An object with class |
ev.perc |
The extreme value percentile to use. Defaults to .001. |
robust |
A logical value, whether to use robust estimates or not.
Defaults to |
distr |
A character string specifying the assumed distribution.
Currently “normal”, but may expand in the future if
|
standardized |
A logical value whether to use standardized
residual values or not. Defaults to |
... |
Additional arguments. Not currently used. |
A logical (is.residualDiagnostics
) or
a residualDiagnostics object (list) for
as.residualDiagnostics
and residualDiagnostics
.
library(JWileymisc) sleep[1,1] <- NA m <- lme4::lmer(extra ~ group + (1 | ID), data = sleep) residualDiagnostics(m)$Residuals # gm1 <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd), # data = lme4::cbpp, family = binomial) # residualDiagnostics(gm1) ## should be an error rm(m, sleep)
library(JWileymisc) sleep[1,1] <- NA m <- lme4::lmer(extra ~ group + (1 | ID), data = sleep) residualDiagnostics(m)$Residuals # gm1 <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd), # data = lme4::cbpp, family = binomial) # residualDiagnostics(gm1) ## should be an error rm(m, sleep)
This function estimates the simple moving average for a specific window and weights it with a variety of optional decays (e.g., exponential, linear, none). Whether to omit missing data or not is based on the missing threshold, which is a proportion and indicates the tolerance. If the weighted proportion missing exceeds this threshold, then that observvation is missing, otherwise, missing data are excluded and the weighted simple moving average calculated on the non missing data.
weighted.sma( x, window, decay = c("exponential", "linear", "none"), alpha, missThreshold = 0 )
weighted.sma( x, window, decay = c("exponential", "linear", "none"), alpha, missThreshold = 0 )
x |
Time series data on which to calculate the weighted simple moving average. It is assumed that these data are in the correct order and that time is equally spaced. Any missing data should be filled in with NAs. |
window |
An integer indicating the size of the window to use. This window will include the current value. |
decay |
A character string indicating the type of decay to use on the weights. |
alpha |
An optional value. Not needed for |
missThreshold |
A numeric value indicating the proportion of data that can be missing for a given window before the resulting simple moving average is set to missing. This is a proportion of the weighted data, so not all data points will necessarily be equally weighted. |
A numeric vector of the weighted simple moving averages
dweights <- expand.grid( time = 0:10, alpha = seq(0, 1, by = .1)) library(ggplot2) ggplot(dweights, aes(time, (1 - alpha)^time, colour = factor(alpha))) + geom_line() + geom_point() + theme_bw() + scale_x_reverse() + theme(legend.position = "bottom") + ggtitle("Exponential Decay in Weights") ggplot(dweights, aes(time, pmax(1 - alpha * time, 0), colour = factor(alpha))) + geom_line() + geom_point() + theme_bw() + scale_x_reverse() + theme(legend.position = "bottom") + ggtitle("Linear Decay in Weights") weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "none", missThreshold = 0) weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "exponential", alpha = 0, missThreshold = 0) weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "linear", alpha = 0, missThreshold = 0) weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "exponential", alpha = .1, missThreshold = 0) weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "exponential", alpha = .5, missThreshold = 0) weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "linear", alpha = .1, missThreshold = 0) weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "linear", alpha = .3, missThreshold = 0) weighted.sma(c(1, NA, NA, 4, 5), window = 4L, decay = "exponential", alpha = .4, missThreshold = .4) ## clean up rm(dweights)
dweights <- expand.grid( time = 0:10, alpha = seq(0, 1, by = .1)) library(ggplot2) ggplot(dweights, aes(time, (1 - alpha)^time, colour = factor(alpha))) + geom_line() + geom_point() + theme_bw() + scale_x_reverse() + theme(legend.position = "bottom") + ggtitle("Exponential Decay in Weights") ggplot(dweights, aes(time, pmax(1 - alpha * time, 0), colour = factor(alpha))) + geom_line() + geom_point() + theme_bw() + scale_x_reverse() + theme(legend.position = "bottom") + ggtitle("Linear Decay in Weights") weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "none", missThreshold = 0) weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "exponential", alpha = 0, missThreshold = 0) weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "linear", alpha = 0, missThreshold = 0) weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "exponential", alpha = .1, missThreshold = 0) weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "exponential", alpha = .5, missThreshold = 0) weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "linear", alpha = .1, missThreshold = 0) weighted.sma(c(1, 2, 3, 4, 5), window = 3L, decay = "linear", alpha = .3, missThreshold = 0) weighted.sma(c(1, NA, NA, 4, 5), window = 4L, decay = "exponential", alpha = .4, missThreshold = .4) ## clean up rm(dweights)