The purpose of this function is, for each individual predictor variable in the model, to construct a reduced model that drops all terms involving that predictor (i.e. main effect and all interactions involving it) and return the test between the full model and the reduced model.
This function provides one solution to the difficulties of
testing terms in a statistical model while respecting marginality.
Consider a model (in Wilkinson-Rogers notation) y~x+z+x:z (which can also be written as y~x*z).
set.seed(101)
d <- data.frame(x = runif(100), y = runif(100), z = runif(100), f1 = sample(LETTERS[1:2],
100, replace = TRUE), f2 = sample(letters[1:2], 100, replace = TRUE))
m0 <- lm(y ~ x + z + x:z, data = d)
m1 <- lm(y ~ f1 + f2 + f1:f2, data = d)
If we want to test the effect of x, we have (at least) the following choices:
x:z, testing the fit of y~x+z+x:z against y~x+z. This is the default behaviour of drop1 in R.drop1(m0)
## Single term deletions
##
## Model:
## y ~ x + z + x:z
## Df Sum of Sq RSS AIC
## <none> 8.66 -237
## x:z 1 0.22 8.88 -236
x in the presence of x:z; this is also referred to as a “type III” test in honour of SAS's “type III sums of squares”. This can be done via drop1(model,.~.)drop1(m0, . ~ .)
## Single term deletions
##
## Model:
## y ~ x + z + x:z
## Df Sum of Sq RSS AIC
## <none> 8.66 -237
## x 1 0.374 9.03 -234
## z 1 0.079 8.74 -238
## x:z 1 0.220 8.88 -236
drop1(update(m0, data = transform(d, x = x - mean(x), z = z - mean(z))), . ~
.)
## Single term deletions
##
## Model:
## y ~ x + z + x:z
## Df Sum of Sq RSS AIC
## <none> 8.66 -237
## x 1 0.1254 8.78 -237
## z 1 0.0779 8.73 -238
## x:z 1 0.2203 8.88 -236
drop1(m1, . ~ .)
## Single term deletions
##
## Model:
## y ~ f1 + f2 + f1:f2
## Df Sum of Sq RSS AIC
## <none> 9.04 -232
## f1 1 0.02480 9.07 -234
## f2 1 0.00051 9.04 -234
## f1:f2 1 0.00423 9.05 -234
drop1(update(m1, contrasts = list(f2 = contr.sum)), . ~ .)
## Single term deletions
##
## Model:
## y ~ f1 + f2 + f1:f2
## Df Sum of Sq RSS AIC
## <none> 9.04 -232
## f1 1 0.02760 9.07 -234
## f2 1 0.00051 9.04 -234
## f1:f2 1 0.00423 9.05 -234
drop1(update(m1, contrasts = list(f2 = contr.SAS)), . ~ .)
## Single term deletions
##
## Model:
## y ~ f1 + f2 + f1:f2
## Df Sum of Sq RSS AIC
## <none> 9.04 -232
## f1 1 0.00553 9.05 -234
## f2 1 0.00051 9.04 -234
## f1:f2 1 0.00423 9.05 -234
The main effects depend on the centering (for continuous variables) or contrasts (for categorical predictors).
x from the model. This is what we develop here.## (INCOMPLETE/NOT WORKING YET) utility function to drop offsets from a
## formula
dropOffset <- function(term) {
browser()
if (deparse(term[[1]]) == "~") {
term[[3]] <- dropOffset(term[[3]])
term
} else return(term)
}
dropvar <- function(object, ..., debug = FALSE) {
form0 <- form <- formula(object)
## hacks for lme4: don't include vars from random-effects terms somewhat
## unstable due to development status ...
if (inherits(object, "merMod")) {
form <- formula(object, fixed.only = TRUE)
}
if (inherits(object, "mer")) {
merpkg <- attr(class(object), "package")
form <- (getFromNamespace("nobars", merpkg))(form)
}
vars <- all.vars(form[[3]])
res <- setNames(vector(length(vars), mode = "list"), vars)
labs <- attr(terms(object), "term.labels")
## if (debug) cat('labs:',labs,'\n')
for (i in seq_along(res)) {
v <- vars[i]
## tdrop <- grep(paste0('(^|:)',v,'(:|$)'),labs,value=TRUE)
tdrop <- grep(paste0("\\b", v, "\\b"), labs, value = TRUE)
newform <- reformulate(paste(". -", paste(tdrop, collapse = "-")), response = ".")
if (debug)
cat(i, v, "**", tdrop, "**", deparse(newform), "\n")
m_reduced <- update(object, newform)
res[[i]] <- anova(object, m_reduced, ...)
}
## allow switch to return res here?
res2 <- do.call(rbind, c(list(head(res[[1]], 1)), lapply(res, tail, 1)))
rownames(res2) <- c("<none>", vars)
attr(res2, "heading") <- c("Complete deletions of variables", "\nModel:",
deparse(form0))
res2
}
Example:
library("brglm") ## just for lizards data set
data(lizards)
m1 <- glm(grahami ~ (height + diameter + light + time)^3, data = lizards, family = "poisson")
dropvar(m1, test = "Chisq")
## Complete deletions of variables
##
## Model:
## grahami ~ (height + diameter + light + time)^3
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## <none> 1 3.1
## height 11 33.2 -10 -30.1 0.00081 ***
## diameter 11 60.2 -10 -57.1 1.3e-08 ***
## light 11 182.5 -10 -179.4 < 2e-16 ***
## time 15 146.1 -14 -143.0 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
It should work for a fairly broad range of models – for example
it works on brglm models as well as glm models.
m2 <- brglm(cbind(opalinus, grahami) ~ (height + diameter + light + time)^3,
data = lizards, family = "binomial")
dropvar(m2, test = "Chisq")
## Complete deletions of variables
##
## Model:
## cbind(opalinus, grahami) ~ (height + diameter + light + time)^3
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## <none> 1 4.0
## height 11 35.4 -10 -31.4 0.00051 ***
## diameter 11 22.8 -10 -18.8 0.04336 *
## light 11 15.0 -10 -10.9 0.36135
## time 15 22.4 -14 -18.4 0.18891
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
And it works for lme4 etc, too, with some minor hacks.
library(lme4.0)
gm1 <- glmer(cbind(incidence, size - incidence) ~ period + (1 | herd), family = binomial,
data = cbpp)
dropvar(gm1)
## Complete deletions of variables
##
## Model:
## cbind(incidence, size - incidence) ~ period + (1 | herd)
## Df AIC BIC logLik Chisq Chi Df Pr(>Chisq)
## <none> 2 130 134 -62.9
## period 5 110 120 -50.0 25.6 3 1.2e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lme4)
gm2 <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd),
family = binomial, data = cbpp)
dropvar(gm2)
## Complete deletions of variables
##
## Model:
## cbind(incidence, size - incidence) ~ period + (1 | herd)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## <none> 2 214 218 -105 210
## period 5 194 204 -92 184 25.6 3 1.2e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(101)
d <- data.frame(x = runif(100), y = runif(100), z = runif(100))
m3 <- lm(y ~ log(x) + z, d)
dropvar(m3)
## Complete deletions of variables
##
## Model:
## y ~ log(x) + z
## Res.Df RSS Df Sum of Sq F Pr(>F)
## <none> 97 8.72
## x 98 9.04 -1 -0.319 3.55 0.062 .
## z 98 8.78 -1 -0.067 0.75 0.389
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(splines)
m4 <- lm(y ~ ns(x, 3) + z, d)
dropvar(m4)
## Complete deletions of variables
##
## Model:
## y ~ ns(x, 3) + z
## Res.Df RSS Df Sum of Sq F Pr(>F)
## <none> 95 8.43
## x 98 9.04 -3 -0.604 2.27 0.086 .
## z 96 8.52 -1 -0.090 1.02 0.316
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m5 <- lm(y ~ ns(x, 3) + x:z, d)
dropvar(m5, debug = TRUE)
## 1 x ** ns(x, 3) x:z ** . ~ . - ns(x, 3) - x:z
## 2 z ** x:z ** . ~ . - x:z
## Complete deletions of variables
##
## Model:
## y ~ ns(x, 3) + x:z
## Res.Df RSS Df Sum of Sq F Pr(>F)
## <none> 95 8.25
## x 99 9.08 -4 -0.833 2.40 0.055 .
## z 96 8.52 -1 -0.277 3.19 0.077 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m6 <- lm(y ~ ns(x, 3) + x * z, d)
dropvar(m6, debug = TRUE)
## 1 x ** ns(x, 3) x x:z ** . ~ . - ns(x, 3) - x - x:z
## 2 z ** z x:z ** . ~ . - z - x:z
## Complete deletions of variables
##
## Model:
## y ~ ns(x, 3) + x * z
## Res.Df RSS Df Sum of Sq F Pr(>F)
## <none> 94 8.17
## x 98 9.04 -4 -0.869 2.50 0.048 *
## z 96 8.52 -2 -0.355 2.04 0.135
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## not yet: m5 <- lm(y~ns(x,3)+offset(z),d) dropvar(m5,debug=TRUE)