Understanding simple regression models with additive linear terms in OLS is a relatively incomplicated affair. However, what about when there are interactions or nonlinear terms? Then suddenly, it can get difficult and the outputted numerical values for do not give a proper understanding of the model workings. A very good way to gain an understanding of a regression model is to look at the marginal predictions, i.e. predictions generated by a model when one holds the non-focal variables constant and varies the focal variable(s). The ggeffects package provides excellent ability to do this. Below I give a number of example. For more, see:
options(digits = 3)
library(pacman)
#install latest dev versions
devtools::install_dev("insight")
## Skipping install of 'insight' from a github remote, the SHA1 (b9bbed5a) has not changed since last install.
## Use `force = TRUE` to force installation
devtools::install_dev("ggeffects")
## Skipping install of 'ggeffects' from a github remote, the SHA1 (6af0d309) has not changed since last install.
## Use `force = TRUE` to force installation
#load
p_load(tidyverse, ggeffects, splines, rms)
#iris flower data
iris
#simple model using iris flower data
model1 = lm(Sepal.Length ~ Petal.Length * Species, data = iris)
model1 %>% summary()
##
## Call:
## lm(formula = Sepal.Length ~ Petal.Length * Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7348 -0.2279 -0.0313 0.2437 0.9361
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.213 0.407 10.34 < 2e-16 ***
## Petal.Length 0.542 0.277 1.96 0.052 .
## Speciesversicolor -1.806 0.598 -3.02 0.003 **
## Speciesvirginica -3.154 0.634 -4.97 1.8e-06 ***
## Petal.Length:Speciesversicolor 0.286 0.295 0.97 0.334
## Petal.Length:Speciesvirginica 0.453 0.290 1.56 0.120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.336 on 144 degrees of freedom
## Multiple R-squared: 0.84, Adjusted R-squared: 0.835
## F-statistic: 152 on 5 and 144 DF, p-value: <2e-16
#plot marginal effect using ggplot manually
#data generated
ggpredict(model1, terms = "Petal.Length")
#plot it simply
ggpredict(model1, terms = "Petal.Length") %>%
ggplot(aes(x, predicted)) +
geom_line()
#using built-in plot() for better results
ggeffect(model1, terms = "Petal.Length") %>%
plot()
#with interaction effect
ggeffect(model1, terms = c("Petal.Length", "Species")) %>%
plot()
#fit a nonlinear model
model2a = lm(Sepal.Length ~ bs(Petal.Length, knots = 3), data = iris)
model2a %>% summary()
##
## Call:
## lm(formula = Sepal.Length ~ bs(Petal.Length, knots = 3), data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0540 -0.2483 0.0039 0.1845 0.9313
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.731 0.194 24.44 < 2e-16 ***
## bs(Petal.Length, knots = 3)1 0.508 0.389 1.31 0.19
## bs(Petal.Length, knots = 3)2 0.446 0.278 1.60 0.11
## bs(Petal.Length, knots = 3)3 1.827 0.336 5.43 2.3e-07 ***
## bs(Petal.Length, knots = 3)4 3.112 0.261 11.93 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.364 on 145 degrees of freedom
## Multiple R-squared: 0.812, Adjusted R-squared: 0.807
## F-statistic: 157 on 4 and 145 DF, p-value: <2e-16
#with interaction
model2b = lm(Sepal.Length ~ bs(Petal.Width, knots = 2) * Species, data = iris)
model2b %>% summary()
##
## Call:
## lm(formula = Sepal.Length ~ bs(Petal.Width, knots = 2) * Species,
## data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.323 -0.298 -0.032 0.249 1.221
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) 4.852 0.209 23.25
## bs(Petal.Width, knots = 2)1 0.285 2.301 0.12
## bs(Petal.Width, knots = 2)2 11.025 22.954 0.48
## bs(Petal.Width, knots = 2)3 -120.398 204.900 -0.59
## bs(Petal.Width, knots = 2)4 -28.306 41.379 -0.68
## Speciesversicolor 6.833 12.231 0.56
## Speciesvirginica 29.999 41.429 0.72
## bs(Petal.Width, knots = 2)1:Speciesversicolor -12.807 19.251 -0.67
## bs(Petal.Width, knots = 2)2:Speciesversicolor -10.035 23.907 -0.42
## bs(Petal.Width, knots = 2)3:Speciesversicolor 109.869 205.587 0.53
## bs(Petal.Width, knots = 2)4:Speciesversicolor NA NA NA
## bs(Petal.Width, knots = 2)1:Speciesvirginica -32.877 46.678 -0.70
## bs(Petal.Width, knots = 2)2:Speciesvirginica -39.120 46.211 -0.85
## bs(Petal.Width, knots = 2)3:Speciesvirginica 92.491 209.089 0.44
## bs(Petal.Width, knots = 2)4:Speciesvirginica NA NA NA
## Pr(>|t|)
## (Intercept) <2e-16 ***
## bs(Petal.Width, knots = 2)1 0.90
## bs(Petal.Width, knots = 2)2 0.63
## bs(Petal.Width, knots = 2)3 0.56
## bs(Petal.Width, knots = 2)4 0.50
## Speciesversicolor 0.58
## Speciesvirginica 0.47
## bs(Petal.Width, knots = 2)1:Speciesversicolor 0.51
## bs(Petal.Width, knots = 2)2:Speciesversicolor 0.68
## bs(Petal.Width, knots = 2)3:Speciesversicolor 0.59
## bs(Petal.Width, knots = 2)4:Speciesversicolor NA
## bs(Petal.Width, knots = 2)1:Speciesvirginica 0.48
## bs(Petal.Width, knots = 2)2:Speciesvirginica 0.40
## bs(Petal.Width, knots = 2)3:Speciesvirginica 0.66
## bs(Petal.Width, knots = 2)4:Speciesvirginica NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.478 on 137 degrees of freedom
## Multiple R-squared: 0.693, Adjusted R-squared: 0.667
## F-statistic: 25.8 on 12 and 137 DF, p-value: <2e-16
#plot nonlinear
model2a %>%
ggeffect(terms = "Petal.Length") %>%
plot()
## Warning in bs(Petal.Length, degree = 3L, knots = 3, Boundary.knots = c(1, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
#plot nonlinear with interaction
model2b %>%
ggeffect(terms = c("Petal.Width", "Species")) %>%
plot()
## Warning in bs(Petal.Width, degree = 3L, knots = 2, Boundary.knots =
## c(0.1, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning: Removed 6 rows containing missing values (geom_path).
#mtcars data
mtcars
#fit model with double interaction
lm(mpg ~ disp * cyl * gear, data = mtcars) %>%
ggeffect(terms = c("disp", "cyl", "gear")) %>%
plot()
#can also leave out terms to get 3 individual plots
lm(mpg ~ disp * cyl * gear, data = mtcars) %>%
ggeffect() %>%
plot()
## $disp
##
## $cyl
##
## $gear
Restricted cubic spline via rcs()
.
#simple model
ols(mpg ~ cyl * disp, data = mtcars) %>%
ggeffect(terms = c("cyl", "disp")) %>%
plot()
#spline
ols(mpg ~ rcs(hp, 3), data = mtcars) %>%
ggeffect(terms = c("hp")) %>%
plot()
#spline and main
ols(mpg ~ rcs(hp, 3) + cyl, data = mtcars) %>%
ggeffect(terms = c("hp", "cyl")) %>%
plot()
#simple
lrm(vs==1 ~ cyl, data = mtcars) %>%
ggeffect(terms = c("cyl")) %>%
plot()
#simple: 2 main effects
lrm(vs==1 ~ cyl + hp, data = mtcars) %>%
ggeffect(terms = c("cyl", "hp")) %>%
plot()
#interaction
lrm(vs==1 ~ cyl * disp, data = mtcars) %>%
ggeffect(terms = c("disp", "cyl")) %>%
plot()