The slides for Lab 9 session are now available to review here.

9 Interaction Effects

Before adding interaction effects to the model, test the model for potential violations of OLS assumptions. To correct for violations: center age, add a squared age term, and log transform income variable in a new temporary dataset.

options(scipen=999, digits = 2)
library(ggplot2)

Create a clean data frame with the selected variables first

good <- read.csv("good.csv")
goodMini <- na.omit(good[,c("CHID","AGE97","faminc97","HOME97","CHRACE","read97","WICpreg")])
str(goodMini)
## 'data.frame':    1384 obs. of  7 variables:
##  $ CHID    : int  4180 5032 6034 7041 7039 7040 10034 10033 14030 14031 ...
##  $ AGE97   : int  12 12 13 6 9 7 6 7 11 8 ...
##  $ faminc97: num  5828 89109 115002 24635 12778 ...
##  $ HOME97  : num  19.1 21 23.1 13.5 18.6 16.6 25.4 20.4 20.6 20.7 ...
##  $ CHRACE  : int  1 1 1 1 1 1 2 2 1 1 ...
##  $ read97  : int  68 81 79 16 41 38 54 36 70 58 ...
##  $ WICpreg : int  0 1 0 1 1 1 1 1 0 0 ...
##  - attr(*, "na.action")= 'omit' Named int  1 2 3 4 5 6 7 10 13 14 ...
##   ..- attr(*, "names")= chr  "1" "2" "3" "4" ...

9.1 Variable Transformation and Centering

Log-Transformation and/or Centering of Continuous Predictors: AGE97, faminc97, HOME97

goodMini$ageC <- goodMini$AGE97 - 8
goodMini$ageC2 <- goodMini$ageC^2

goodMini$logfaminc <- ifelse(goodMini$faminc97 <= 1, 1, goodMini$faminc97)
goodMini$logfaminc <- log(goodMini$logfaminc)
goodMini$incomeC <- goodMini$logfaminc - mean(goodMini$logfaminc)

goodMini$homeC <- goodMini$HOME97 - mean(goodMini$HOME97)

Handling Categorical Predictors: race and WICpreg

Keep dummy or categorical variables (e.g., race) intact, do not transform or center them.

# recoding Race from 9 categories to 2 categories
  # -.5 = black
  # .5 = white
goodMini$race <- ifelse(goodMini$CHRACE == 1, .5,
                    ifelse(goodMini$CHRACE == 2, -.5, NA))
summary(goodMini$race)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       0       0       0       0       0       0     128
# We can choose the base category as we want. here `black` is the base category:
goodMini$race<-factor(goodMini$race, levels=c(-.5, .5), 
                         labels=c("black","white"))  
table(goodMini$race)
## 
## black white 
##   601   655
# We can choose the base category as we want. Here, we set up `0` i.e., "non-participant" as the base category:
goodMini$WICpreg<-factor(goodMini$WICpreg, levels=c(0,1), 
                         labels=c("non-participant","participant"))  
table(goodMini$WICpreg)
## 
## non-participant     participant 
##             848             536

Cleaned Data Ready for looking into Interaction Effects.

goodMini <- na.omit(goodMini)
summary(goodMini)
##       CHID             AGE97       faminc97          HOME97    
##  Min.   :   4180   Min.   : 6   Min.   :     0   Min.   : 8.8  
##  1st Qu.:1311783   1st Qu.: 7   1st Qu.: 20697   1st Qu.:18.7  
##  Median :2509036   Median : 9   Median : 43938   Median :21.0  
##  Mean   :3338092   Mean   : 9   Mean   : 55053   Mean   :20.7  
##  3rd Qu.:5643781   3rd Qu.:11   3rd Qu.: 71464   3rd Qu.:23.0  
##  Max.   :6872031   Max.   :13   Max.   :646743   Max.   :27.0  
##      CHRACE         read97              WICpreg         ageC   
##  Min.   :1.00   Min.   : 2   non-participant:787   Min.   :-2  
##  1st Qu.:1.00   1st Qu.:45   participant    :469   1st Qu.:-1  
##  Median :1.00   Median :61                         Median : 1  
##  Mean   :1.48   Mean   :56                         Mean   : 1  
##  3rd Qu.:2.00   3rd Qu.:71                         3rd Qu.: 3  
##  Max.   :2.00   Max.   :97                         Max.   : 5  
##      ageC2        logfaminc       incomeC          homeC          race    
##  Min.   : 0.0   Min.   : 0.0   Min.   :-10.5   Min.   :-11.9   black:601  
##  1st Qu.: 1.0   1st Qu.: 9.9   1st Qu.: -0.5   1st Qu.: -2.0   white:655  
##  Median : 4.0   Median :10.7   Median :  0.2   Median :  0.3              
##  Mean   : 5.2   Mean   :10.5   Mean   :  0.0   Mean   :  0.0              
##  3rd Qu.: 9.0   3rd Qu.:11.2   3rd Qu.:  0.7   3rd Qu.:  2.3              
##  Max.   :25.0   Max.   :13.4   Max.   :  2.9   Max.   :  6.3
attach(goodMini)

9.2 Creating Interaction Terms between two or more variables

One way is to create separate interaction terms stored in the dataset. Alternatively, we can include them in the regression model directly as shown in 9.3.

# goodMini$WICpreg <- as.numeric(goodMini$WICpreg)
# goodMini$race <- as.numeric(goodMini$race)
# `HOME97` x `faminc`
goodMini$home_inc <- goodMini$HOME97 * goodMini$logfaminc 
goodMini$homeC_incomeC <- goodMini$homeC * goodMini$incomeC  
#  `race` x `AGE97`
goodMini$ageC_race <- goodMini$ageC * goodMini$race
goodMini$ageC2_race <- goodMini$ageC2 * goodMini$race  
# `race` x `faminc`
goodMini$income_race <- goodMini$logfaminc * goodMini$race
goodMini$incomeC_race <- goodMini$incomeC * goodMini$race 
# `race` x `HOME97`
goodMini$homeC_race <- goodMini$homeC * goodMini$race  # need to be numeric
# `WICpreg` x other variables
goodMini$homeC_WIC <- goodMini$homeC * goodMini$WICpreg # need to be numeric
goodMini$incomeC_WIC <- goodMini$incomeC * goodMini$WICpreg
goodMini$ageC_WIC <- goodMini$ageC * goodMini$WICpreg
goodMini$ageC2WIC <- goodMini$ageC2 * goodMini$WICpreg

# Three-way Interactions
# HOME97` x `faminc` x `race`
goodMini$homeC_incomeC_race <- goodMini$homeC * goodMini$incomeC * goodMini$race 

9.3 Regression Model with an Interaction term and Centered Predictors

Because we have centered all of our variables, the intercept term in the main-effect only model tells us the expected reading score (54)for the “average” child (i.e. a child who comes from a family with the average income in the sample, a child having the average age of children in the sample, a child with parents having the average score on the home97 scale) whose mothers did NOT participate in WIC. Given the contrast coding for RACE, the intercept is the unweighted mean for the two groups (Black and White).

Run two regressions: one without interaction terms (main effects only) and one regression with interaction terms, controlling for incomeC, ageC, and ageC2

Main Effects Model

For a model without interaction, we assume that the effect of HOME97 on read97 is the same regardless of whether children’s mother participated in the WIC program.

\[\hat{read97}= \alpha + \beta_1 {incomeC} + \beta_2{ageC} + \beta_3{ageC2} + \beta_4{homeC} + \beta_5{WICpreg} + \epsilon\]

lm1<-lm(read97~incomeC + ageC + ageC2 + homeC + WICpreg, data=goodMini)
summary(lm1)
## 
## Call:
## lm(formula = read97 ~ incomeC + ageC + ageC2 + homeC + WICpreg, 
##     data = goodMini)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -41.78  -6.79   0.51   7.54  45.68 
## 
## Coefficients:
##                    Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)         54.4373     0.5306  102.59 < 0.0000000000000002 ***
## incomeC              1.1886     0.2980    3.99             0.000070 ***
## ageC                 9.6368     0.2372   40.64 < 0.0000000000000002 ***
## ageC2               -1.3040     0.0866  -15.05 < 0.0000000000000002 ***
## homeC                1.0307     0.1201    8.58 < 0.0000000000000002 ***
## WICpregparticipant  -3.0998     0.7441   -4.17             0.000033 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11 on 1250 degrees of freedom
## Multiple R-squared:  0.67,   Adjusted R-squared:  0.669 
## F-statistic:  508 on 5 and 1250 DF,  p-value: <0.0000000000000002

Interaction Model: Include a two-way interaction term of HOME x WICpreg

For a model with interaction, we want to examine if HOME97 effects on read97 depend on WICpreg.

Model with interactions: fit \(read97\) vs \(homeC\) by \(WICpreg\).

\[\hat{read97}= \alpha + \beta_1 {incomeC} + \beta_2{ageC} + \beta_3{ageC2} + \beta_4{homeC} + \beta_5{WICpreg} + \beta_6{WICpreg \cdot homeC} + \epsilon\]

# We can include both main effects of homeC and WICpreg, as well as the interaction term using `homeC*WICpreg`:
lm2<-lm(read97~incomeC + ageC + ageC2 + homeC*WICpreg, data=goodMini)
summary(lm2)
## 
## Call:
## lm(formula = read97 ~ incomeC + ageC + ageC2 + homeC * WICpreg, 
##     data = goodMini)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -42.69  -6.77   0.37   7.57  44.22 
## 
## Coefficients:
##                          Estimate Std. Error t value             Pr(>|t|)
## (Intercept)               54.0801     0.5372  100.67 < 0.0000000000000002
## incomeC                    1.1431     0.2968    3.85              0.00012
## ageC                       9.6951     0.2366   40.98 < 0.0000000000000002
## ageC2                     -1.3092     0.0862  -15.18 < 0.0000000000000002
## homeC                      1.3827     0.1540    8.98 < 0.0000000000000002
## WICpregparticipant        -3.5414     0.7504   -4.72            0.0000026
## homeC:WICpregparticipant  -0.8306     0.2290   -3.63              0.00030
##                             
## (Intercept)              ***
## incomeC                  ***
## ageC                     ***
## ageC2                    ***
## homeC                    ***
## WICpregparticipant       ***
## homeC:WICpregparticipant ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11 on 1249 degrees of freedom
## Multiple R-squared:  0.674,  Adjusted R-squared:  0.672 
## F-statistic:  430 on 6 and 1249 DF,  p-value: <0.0000000000000002

9.4 Plotting the Interaction Effect

a) Create a new dataset with predicted fit values

First, we create a new data frame with all of the WIC and homeC variables used in lm2 as well as the reading scores, CHILD ID numbers.

goodMiniplot<-goodMini[,c("CHID", "ageC","ageC2","WICpreg","homeC",
                          "read97","incomeC")]

b) Control Other Variables at a Specified Value

We want to control for several other covariates. However, we have to choose at what value/level to hold these covariates constant at. In this code, incomeC = 0 (or mean), ageC/ageC2 = 0 (8 year old child)

#Keep this in mind when interpretting the plot
goodMiniplot$incomeC<-0
goodMiniplot$ageC<-0
goodMiniplot$ageC2<-0

c) Predict the Fitted Value of read97

Now we can use the predict function to produce fitted values for our goodMiniplot data set using coefficients from the lm2 model (above).

goodMiniplot$lm2.fit<-predict(lm2,goodMiniplot)

d) Plot the Model With Interaction between homeC and WICpreg

Note: we are plotting the centered parenting practice (homeC) against the predicted Y values (fit).

library(ggplot2)
summary(goodMiniplot$homeC)  # Min = -11.9 ; Max = 6.3
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   -11.9    -2.0     0.3     0.0     2.3     6.3
summary(goodMiniplot$read97)  # Min = 2 ; Max = 97
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2      45      61      56      71      97
ggplot(goodMiniplot,aes(x = homeC, y = lm2.fit)) + 
  geom_smooth(method="lm", aes(colour= WICpreg))+    # or "loess"
  #the scale x/y continuous funcitons allow you to set
  #the minimum/maximum values and major units the x and
  #y axes
  scale_x_continuous(breaks=c(-12,-10,-8,-6,-4,-2,0,2,4,6,8))+
  scale_y_continuous(breaks=c(40,45,50,55,60,65,70,75,80,85,90))+
  #labs is used to set the x/y axis labels
  labs(x="Centered Home Cognitive Stimulation", 
       y="Reading Scores in 1997")+
  #ggtitle is used to set the title of the plot
  #the final line centers the title
  ggtitle("Model With Interaction between WIC and Centered Home on Reading Scores") +
  theme(plot.title = element_text(hjust = 0.5))