A utility function to drop variables from statistical models

The purpose of this function is, for each individual predictor variable in the model, to construct a reduced model that drops all terms involving that predictor (i.e. main effect and all interactions involving it) and return the test between the full model and the reduced model.

This function provides one solution to the difficulties of testing terms in a statistical model while respecting marginality. Consider a model (in Wilkinson-Rogers notation) y~x+z+x:z (which can also be written as y~x*z).

set.seed(101)
d <- data.frame(x = runif(100), y = runif(100), z = runif(100), f1 = sample(LETTERS[1:2], 
    100, replace = TRUE), f2 = sample(letters[1:2], 100, replace = TRUE))
m0 <- lm(y ~ x + z + x:z, data = d)
m1 <- lm(y ~ f1 + f2 + f1:f2, data = d)

If we want to test the effect of x, we have (at least) the following choices:

drop1(m0)
## Single term deletions
## 
## Model:
## y ~ x + z + x:z
##        Df Sum of Sq  RSS  AIC
## <none>              8.66 -237
## x:z     1      0.22 8.88 -236
drop1(m0, . ~ .)
## Single term deletions
## 
## Model:
## y ~ x + z + x:z
##        Df Sum of Sq  RSS  AIC
## <none>              8.66 -237
## x       1     0.374 9.03 -234
## z       1     0.079 8.74 -238
## x:z     1     0.220 8.88 -236
drop1(update(m0, data = transform(d, x = x - mean(x), z = z - mean(z))), . ~ 
    .)
## Single term deletions
## 
## Model:
## y ~ x + z + x:z
##        Df Sum of Sq  RSS  AIC
## <none>              8.66 -237
## x       1    0.1254 8.78 -237
## z       1    0.0779 8.73 -238
## x:z     1    0.2203 8.88 -236
drop1(m1, . ~ .)
## Single term deletions
## 
## Model:
## y ~ f1 + f2 + f1:f2
##        Df Sum of Sq  RSS  AIC
## <none>              9.04 -232
## f1      1   0.02480 9.07 -234
## f2      1   0.00051 9.04 -234
## f1:f2   1   0.00423 9.05 -234
drop1(update(m1, contrasts = list(f2 = contr.sum)), . ~ .)
## Single term deletions
## 
## Model:
## y ~ f1 + f2 + f1:f2
##        Df Sum of Sq  RSS  AIC
## <none>              9.04 -232
## f1      1   0.02760 9.07 -234
## f2      1   0.00051 9.04 -234
## f1:f2   1   0.00423 9.05 -234
drop1(update(m1, contrasts = list(f2 = contr.SAS)), . ~ .)
## Single term deletions
## 
## Model:
## y ~ f1 + f2 + f1:f2
##        Df Sum of Sq  RSS  AIC
## <none>              9.04 -232
## f1      1   0.00553 9.05 -234
## f2      1   0.00051 9.04 -234
## f1:f2   1   0.00423 9.05 -234

The main effects depend on the centering (for continuous variables) or contrasts (for categorical predictors).

## (INCOMPLETE/NOT WORKING YET) utility function to drop offsets from a
## formula
dropOffset <- function(term) {
    browser()
    if (deparse(term[[1]]) == "~") {
        term[[3]] <- dropOffset(term[[3]])
        term
    } else return(term)
}
dropvar <- function(object, ..., debug = FALSE) {
    form0 <- form <- formula(object)
    ## hacks for lme4: don't include vars from random-effects terms somewhat
    ## unstable due to development status ...
    if (inherits(object, "merMod")) {
        form <- formula(object, fixed.only = TRUE)
    }
    if (inherits(object, "mer")) {
        merpkg <- attr(class(object), "package")
        form <- (getFromNamespace("nobars", merpkg))(form)
    }
    vars <- all.vars(form[[3]])
    res <- setNames(vector(length(vars), mode = "list"), vars)
    labs <- attr(terms(object), "term.labels")
    ## if (debug) cat('labs:',labs,'\n')
    for (i in seq_along(res)) {
        v <- vars[i]
        ## tdrop <- grep(paste0('(^|:)',v,'(:|$)'),labs,value=TRUE)
        tdrop <- grep(paste0("\\b", v, "\\b"), labs, value = TRUE)
        newform <- reformulate(paste(". -", paste(tdrop, collapse = "-")), response = ".")
        if (debug) 
            cat(i, v, "**", tdrop, "**", deparse(newform), "\n")
        m_reduced <- update(object, newform)
        res[[i]] <- anova(object, m_reduced, ...)
    }
    ## allow switch to return res here?
    res2 <- do.call(rbind, c(list(head(res[[1]], 1)), lapply(res, tail, 1)))
    rownames(res2) <- c("<none>", vars)
    attr(res2, "heading") <- c("Complete deletions of variables", "\nModel:", 
        deparse(form0))
    res2
}

Example:

library("brglm")  ## just for lizards data set
data(lizards)
m1 <- glm(grahami ~ (height + diameter + light + time)^3, data = lizards, family = "poisson")
dropvar(m1, test = "Chisq")
## Complete deletions of variables
## 
## Model:
## grahami ~ (height + diameter + light + time)^3
##          Resid. Df Resid. Dev  Df Deviance Pr(>Chi)    
## <none>           1        3.1                          
## height          11       33.2 -10    -30.1  0.00081 ***
## diameter        11       60.2 -10    -57.1  1.3e-08 ***
## light           11      182.5 -10   -179.4  < 2e-16 ***
## time            15      146.1 -14   -143.0  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

It should work for a fairly broad range of models – for example it works on brglm models as well as glm models.

m2 <- brglm(cbind(opalinus, grahami) ~ (height + diameter + light + time)^3, 
    data = lizards, family = "binomial")
dropvar(m2, test = "Chisq")
## Complete deletions of variables
## 
## Model:
## cbind(opalinus, grahami) ~ (height + diameter + light + time)^3
##          Resid. Df Resid. Dev  Df Deviance Pr(>Chi)    
## <none>           1        4.0                          
## height          11       35.4 -10    -31.4  0.00051 ***
## diameter        11       22.8 -10    -18.8  0.04336 *  
## light           11       15.0 -10    -10.9  0.36135    
## time            15       22.4 -14    -18.4  0.18891    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

And it works for lme4 etc, too, with some minor hacks.

library(lme4.0)
gm1 <- glmer(cbind(incidence, size - incidence) ~ period + (1 | herd), family = binomial, 
    data = cbpp)
dropvar(gm1)
## Complete deletions of variables
## 
## Model:
## cbind(incidence, size - incidence) ~ period + (1 | herd)
##        Df AIC BIC logLik Chisq Chi Df Pr(>Chisq)    
## <none>  2 130 134  -62.9                            
## period  5 110 120  -50.0  25.6      3    1.2e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lme4)
gm2 <- lme4::glmer(cbind(incidence, size - incidence) ~ period + (1 | herd), 
    family = binomial, data = cbpp)
dropvar(gm2)
## Complete deletions of variables
## 
## Model:
## cbind(incidence, size - incidence) ~ period + (1 | herd)
##        Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)    
## <none>  2 214 218   -105      210                            
## period  5 194 204    -92      184  25.6      3    1.2e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(101)
d <- data.frame(x = runif(100), y = runif(100), z = runif(100))
m3 <- lm(y ~ log(x) + z, d)
dropvar(m3)
## Complete deletions of variables
## 
## Model:
## y ~ log(x) + z
##        Res.Df  RSS Df Sum of Sq    F Pr(>F)  
## <none>     97 8.72                           
## x          98 9.04 -1    -0.319 3.55  0.062 .
## z          98 8.78 -1    -0.067 0.75  0.389  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(splines)
m4 <- lm(y ~ ns(x, 3) + z, d)
dropvar(m4)
## Complete deletions of variables
## 
## Model:
## y ~ ns(x, 3) + z
##        Res.Df  RSS Df Sum of Sq    F Pr(>F)  
## <none>     95 8.43                           
## x          98 9.04 -3    -0.604 2.27  0.086 .
## z          96 8.52 -1    -0.090 1.02  0.316  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m5 <- lm(y ~ ns(x, 3) + x:z, d)
dropvar(m5, debug = TRUE)
## 1 x ** ns(x, 3) x:z ** . ~ . - ns(x, 3) - x:z 
## 2 z ** x:z ** . ~ . - x:z
## Complete deletions of variables
## 
## Model:
## y ~ ns(x, 3) + x:z
##        Res.Df  RSS Df Sum of Sq    F Pr(>F)  
## <none>     95 8.25                           
## x          99 9.08 -4    -0.833 2.40  0.055 .
## z          96 8.52 -1    -0.277 3.19  0.077 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m6 <- lm(y ~ ns(x, 3) + x * z, d)
dropvar(m6, debug = TRUE)
## 1 x ** ns(x, 3) x x:z ** . ~ . - ns(x, 3) - x - x:z 
## 2 z ** z x:z ** . ~ . - z - x:z
## Complete deletions of variables
## 
## Model:
## y ~ ns(x, 3) + x * z
##        Res.Df  RSS Df Sum of Sq    F Pr(>F)  
## <none>     94 8.17                           
## x          98 9.04 -4    -0.869 2.50  0.048 *
## z          96 8.52 -2    -0.355 2.04  0.135  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## not yet: m5 <- lm(y~ns(x,3)+offset(z),d) dropvar(m5,debug=TRUE)

To do