In this section we formalize the multi-level model used to analyze the SAT data. In this case the first level of this model consists of the linear relationship between each participant’s individual scores and that score’s reporting status (MODE: self- or official-report) and type (VM: verbal or math) and their interaction: \[ score_{ij} = \beta_{0j} + \beta_{ij}*VM_{ij} + \beta_{2j}*MODE_{ij} + \beta_{3j}*VM_{ij}*MODE{ij} + \epsilon_{ij} \]
The \(\beta 's\) in this model can in turn be modeled at the second-level with the following equations, which included participant sex (SEX: male or female) as a between-unit predictor: \[ \beta_{0j} = \alpha_{00} + \alpha_{01}*sex_{j} + \mu_{0j} \] \[ \beta_{1j} = \alpha_{10} + \alpha_{11}*sex_{j} + \mu_{1j} \] \[ \beta_{2j} = \alpha_{20} + \alpha_{21}*sex_{j} + \mu_{2j} \] \[ \beta_{3j} = \alpha_{30} + \alpha_{31}*sex_{j} + \mu_{3j} \]
Substituting the second-level equations into the first-level equations yields the following full model: \[ score_{ij} = \alpha_{00} + \alpha_{01}*sex_{j} + \mu_{0j} + (\alpha_{10} + \alpha_{11}*sex_{j} + \mu_{1j})*VM_{ij} + (\alpha_{20} + \alpha_{21}*sex_{j} + \mu_{2j})*MODE_{ij} + (\alpha_{30} + \alpha_{31}*sex_{j} + \mu_{3j})*VM_{ij}*MODE_{ij} + \epsilon_{ij} \]
The following sections will estimate the fixed effects (e.g. all \(\alpha's\)) and random effects (e.g. all \(\mu 's\)) in this model and a repeated measures ANOVA using the sat dataset.
In this section we test both the linear mixed model formalized above and the repeated measures ANOVA from last week’s homework.
The code below estimates parameters for the linear mixed model approach:
sat = read.csv("sat_long.csv", header = TRUE)
#recode SEX as contrast code - 1 if male, -1 if female
sat$SEX = 1*(sat$SEX==1)-1*(sat$SEX==2)
#VM*MODE interaction code
sat$inter = sat$VM*sat$MODE
LMM_sat <- lmer(SCORE ~ VM + MODE + inter + SEX + VM:SEX + MODE:SEX + inter:SEX + (VM + MODE + inter || P), data = sat)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00471223
## (tol = 0.002, component 1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
summary(LMM_sat)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## SCORE ~ VM + MODE + inter + SEX + VM:SEX + MODE:SEX + inter:SEX +
## ((1 | P) + (0 + VM | P) + (0 + MODE | P) + (0 + inter | P))
## Data: sat
##
## REML criterion at convergence: 3965.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.80727 -0.19478 0.01683 0.20858 2.68902
##
## Random effects:
## Groups Name Variance Std.Dev.
## P (Intercept) 34.11868 5.8411
## P.1 VM 17.14463 4.1406
## P.2 MODE 0.08135 0.2852
## P.3 inter 0.75143 0.8669
## Residual 2.50103 1.5815
## Number of obs: 680, groups: P, 170
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 54.74635 0.45286 120.889
## VM -3.09340 0.32387 -9.551
## MODE -0.18420 0.06458 -2.852
## inter -0.31111 0.09015 -3.451
## SEX 1.16302 0.45286 2.568
## VM:SEX -1.28785 0.32387 -3.976
## MODE:SEX 0.09358 0.06458 1.449
## inter:SEX 0.21111 0.09015 2.342
##
## Correlation of Fixed Effects:
## (Intr) VM MODE inter SEX VM:SEX MODE:S
## VM 0.000
## MODE 0.000 0.000
## inter 0.000 0.000 0.000
## SEX 0.059 0.000 0.000 0.000
## VM:SEX 0.000 0.059 0.000 0.000 0.000
## MODE:SEX 0.000 0.000 0.059 0.000 0.000 0.000
## inter:SEX 0.000 0.000 0.000 0.059 0.000 0.000 0.000
## convergence code: 0
## Model failed to converge with max|grad| = 0.00471223 (tol = 0.002, component 1)
## Model is nearly unidentifiable: large eigenvalue ratio
## - Rescale variables?
The code below estimates parameters for the repeated measures approach:
sat = read.csv("sat1.csv", header = TRUE)
repSATs = c(sat$RSATV, sat$RSATM)
actSATs = c(sat$SATV, sat$SATM)
mathSATs = c(sat$SATM, sat$RSATM)
verbalSATs = c(sat$SATV, sat$RSATV)
mathverbMeans = mean(mathSATs)-mean(verbalSATs)
#####################
#construct data frame
#####################
sat_wide = data.frame(
partID = seq(1, 170),
sex = sat$SEX,
satv = sat$SATV,
satm = sat$SATM,
rsatv = sat$RSATV,
rsatm = sat$RSATM)
######################
#create contrast codes
######################
#ave: +1 for all sat variables each participant
#rep: +1 if actual, -1 if reported
#mathverb: +1 if math, -1 if verbal
#mathverbXrep: -1 if math rep/verbal act, +1 if verbal rep/math act
#sex: +1 if male, -1 if female
sat_wide$ave = (sat_wide$satv + sat_wide$satm + sat_wide$rsatv + sat_wide$rsatm)/4
sat_wide$rep = (-sat_wide$satv - sat_wide$satm + sat_wide$rsatv + sat_wide$rsatm)/4
sat_wide$mathverb = (-sat_wide$satm - sat_wide$rsatm + sat_wide$satv + sat_wide$rsatv)/4
sat_wide$mathverbXrep = (sat_wide$rsatv + sat_wide$satm - sat_wide$rsatm - sat_wide$satv)/4
sat_wide$sexOnes = +1*(sat_wide$sex==1)-1*(sat_wide$sex==2)
model.sex = lm(sat_wide$ave ~ sat_wide$sexOnes)
mcSummary(model.sex)
## Loading required package: car
## Warning: package 'car' was built under R version 3.6.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 3.6.3
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
## lm(formula = sat_wide$ave ~ sat_wide$sexOnes)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 229.149 1 229.149 0.038 6.595 0.011
## Error 5837.030 168 34.744
## Corr Total 6066.180 169 35.895
##
## RMSE AdjEtaSq
## 5.894 0.032
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5
## (Intercept) 54.746 0.453 120.889 507754.723 0.989 NA 53.852 55.640
## sat_wide$sexOnes 1.163 0.453 2.568 229.149 0.038 NA 0.269 2.057
## p
## (Intercept) 0.000
## sat_wide$sexOnes 0.011
model.rep = lm(sat_wide$rep ~ 1 + sat_wide$sexOnes)
mcSummary(model.rep)
## lm(formula = sat_wide$rep ~ 1 + sat_wide$sexOnes)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 1.483 1 1.483 0.012 2.099 0.149
## Error 118.711 168 0.707
## Corr Total 120.194 169 0.711
##
## RMSE AdjEtaSq
## 0.841 0.006
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) -0.184 0.065 -2.852 5.748 0.046 NA -0.312 -0.057 0.005
## sat_wide$sexOnes 0.094 0.065 1.449 1.483 0.012 NA -0.034 0.221 0.149
model.mathverb = lm(sat_wide$mathverb ~ 1 + sat_wide$sexOnes)
mcSummary(model.mathverb)
## lm(formula = sat_wide$mathverb ~ 1 + sat_wide$sexOnes)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 280.978 1 280.978 0.086 15.812 0
## Error 2985.344 168 17.770
## Corr Total 3266.322 169 19.327
##
## RMSE AdjEtaSq
## 4.215 0.081
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) -3.093 0.324 -9.551 1621.125 0.352 NA -3.733 -2.454 0
## sat_wide$sexOnes -1.288 0.324 -3.976 280.978 0.086 NA -1.927 -0.648 0
model.mathverbXrep = lm(sat_wide$mathverbXrep ~ 1 + sat_wide$sexOnes)
mcSummary(model.mathverbXrep)
## lm(formula = sat_wide$mathverbXrep ~ 1 + sat_wide$sexOnes)
##
## Omnibus ANOVA
## SS df MS EtaSq F p
## Model 7.550 1 7.550 0.032 5.484 0.02
## Error 231.281 168 1.377
## Corr Total 238.831 169 1.413
##
## RMSE AdjEtaSq
## 1.173 0.026
##
## Coefficients
## Est StErr t SSR(3) EtaSq tol CI_2.5 CI_97.5 p
## (Intercept) -0.311 0.09 -3.451 16.397 0.066 NA -0.489 -0.133 0.001
## sat_wide$sexOnes 0.211 0.09 2.342 7.550 0.032 NA 0.033 0.389 0.020
In this section we compare the results of the LMM and RM estimation procedures. As can be seen in the tables below, the fixed-effect t-values are identical for these two approaches:
#################
#display t-values
#################
T_intercept_LMM = 120.889
T_VM_LMM = -9.551
T_MODE_LMM = -2.852
T_inter_LMM = -3.451
T_SEX_LMM = 2.568
T_VMxSEX_LMM = -3.976
T_MODExSEX_LMM = 1.449
T_interxSEX_LMM = 2.342
T_intercept_RM = 120.889
T_VM_RM = -9.551
T_MODE_RM = -2.852
T_inter_RM = -3.451
T_SEX_RM = 2.568
T_VMxSEX_RM = -3.976
T_MODExSEX_RM = 1.449
T_interxSEX_RM = 2.342
sexLabel = c("LMM", "RM")
intercept_t = c(T_intercept_LMM, T_intercept_RM)
VM_t = c(T_VM_LMM, T_VM_RM)
mode_t = c(T_MODE_LMM, T_MODE_RM)
inter_t = c(T_inter_LMM, T_inter_RM)
sex_t = c(T_SEX_LMM, T_SEX_RM)
VMxSex_t = c(T_VMxSEX_LMM, T_VMxSEX_RM)
modexSex_t = c(T_MODExSEX_LMM, T_MODExSEX_RM)
interxSex_t = c(T_interxSEX_LMM, T_interxSEX_RM)
tvalues = data.frame(sexLabel, intercept_t, VM_t, mode_t, inter_t, sex_t, VMxSex_t, modexSex_t, interxSex_t)
colnames(tvalues) = c("", "intercept_t", "VM_t", "mode_t", "inter_t", "sex_t", "VMxSex_t", "modexSex_t", "interxSex_t")
kable(tvalues, digits = 4, align = "lccc", caption = "Fixed effect t-values estimated with linear mixed model (LMM) and repeated measures (RM) approaches") %>% kable_styling("striped")
| intercept_t | VM_t | mode_t | inter_t | sex_t | VMxSex_t | modexSex_t | interxSex_t | |
|---|---|---|---|---|---|---|---|---|
| LMM | 120.889 | -9.551 | -2.852 | -3.451 | 2.568 | -3.976 | 1.449 | 2.342 |
| RM | 120.889 | -9.551 | -2.852 | -3.451 | 2.568 | -3.976 | 1.449 | 2.342 |
Likewise, the means for each of the eight groups (2 sex * 2 test type * 2 reporting mode) are identical between the modeling approaches:
#compute predicted means from LMM
#VM: V = verbal (+1), M = math (-1)
#MODE: A = actual (+1), S = self-report (-1)
#SEX: male (+1), female (-1)
mean_V_A_male = 54.746-3.09*(1)-.184*(1)-.3111*(1) + 1.16*(1)-1.28*(1)+.0935*(1)+.2111*(1) #51.34
#mean(sat[sat$VM==1 & sat$MODE==1 & sat$SEX==1, ]$SCORE) #51.33
mean_V_S_male = 54.746-3.09*(1)-.184*(-1)-.3111*(-1) + 1.16*(1)-1.28*(1)+.0935*(-1)+.2111*(-1) #51.718
#mean(sat[sat$VM==1 & sat$MODE==-1 & sat$SEX==1, ]$SCORE) #51.72
mean_M_A_male = 54.746-3.09*(-1)-.184*(1)-.3111*(-1) + 1.16*(1)-1.28*(-1)+.0935*(1)+.2111*(-1) #60.2855
#mean(sat[sat$VM==-1 & sat$MODE==1 & sat$SEX==1, ]$SCORE) #60.3
mean_M_S_male = 54.746-3.09*(-1)-.184*(-1)-.3111*(1) + 1.16*(1)-1.28*(-1)+.0935*(-1)+.2111*(1) #60.28
#mean(sat[sat$VM==-1 & sat$MODE==-1 & sat$SEX==1, ]$SCORE) #60.266
mean_V_A_female = 54.746-3.09*(1)-.184*(1)-.3111*(1) + 1.16*(-1)-1.28*(-1)+.0935*(-1)+.2111*(-1) #59.977
#mean(sat[sat$VM==1 & sat$MODE==1 & sat$SEX==-1, ]$SCORE) #50.97
mean_V_S_female = 54.746-3.09*(1)-.184*(-1)-.3111*(-1) + 1.16*(-1)-1.28*(-1)+.0935*(1)+.2111*(1) #52.577
#mean(sat[sat$VM==1 & sat$MODE==-1 & sat$SEX==-1, ]$SCORE) #52.57
mean_M_A_female = 54.746-3.09*(-1)-.184*(1)-.3111*(-1) + 1.16*(-1)-1.28*(1)+.0935*(-1)+.2111*(1) #55.633
#mean(sat[sat$VM==-1 & sat$MODE==1 & sat$SEX==-1, ]$SCORE) #55.64
mean_M_S_female = 54.746-3.09*(-1)-.184*(-1)-.3111*(1) + 1.16*(-1)-1.28*(1)+.0935*(1)+.2111*(-1) #55.144
#mean(sat[sat$VM==-1 & sat$MODE==-1 & sat$SEX==-1, ]$SCORE) #55.15
##################
################################
#display means from LMM approach
################################
sexLabel = c("Male", "Female")
SAT_V_A = c(mean_V_A_male, mean_V_A_female)
SAT_V_S = c(mean_V_S_male, mean_V_S_female)
SAT_M_A = c(mean_M_A_male, mean_M_A_female)
SAT_M_S = c(mean_M_S_male, mean_M_S_female)
scoreTable1 = data.frame(sexLabel, SAT_V_A, SAT_V_S, SAT_M_A, SAT_M_S)
colnames(scoreTable1) = c("", "SAT_V_A", "SAT_V_S", "SAT_M_A", "SAT_M_S")
kable(scoreTable1, digits = 4, align = "lccc", caption = "Group means by reporting status, test type and sex - mixed model approach") %>% kable_styling("striped")
| SAT_V_A | SAT_V_S | SAT_M_A | SAT_M_S | |
|---|---|---|---|---|
| Male | 51.3455 | 51.7265 | 60.2855 | 60.2665 |
| Female | 50.9763 | 52.5757 | 55.6407 | 55.1513 |
################################
######################################
#means from repeated measures appraoch
#(copied from HW18)
mean_V_A_male = 51.33
mean_V_S_male = 51.72
mean_M_A_male = 60.3
mean_M_S_male = 60.28
mean_V_A_female = 50.97
mean_V_S_female = 52.57
mean_M_A_female = 55.63
mean_M_S_female = 55.14
sexLabel = c("Male", "Female")
SAT_V_A = c(mean_V_A_male, mean_V_A_female)
SAT_V_S = c(mean_V_S_male, mean_V_S_female)
SAT_M_A = c(mean_M_A_male, mean_M_A_female)
SAT_M_S = c(mean_M_S_male, mean_M_S_female)
scoreTable2 = data.frame(sexLabel, SAT_V_A, SAT_V_S, SAT_M_A, SAT_M_S)
colnames(scoreTable2) = c("", "SAT_V_A", "SAT_V_S", "SAT_M_A", "SAT_M_S")
kable(scoreTable2, digits = 4, align = "lccc", caption = "Group means by reporting status, test type and sex - repeated measures approach") %>% kable_styling("striped")
| SAT_V_A | SAT_V_S | SAT_M_A | SAT_M_S | |
|---|---|---|---|---|
| Male | 51.33 | 51.72 | 60.30 | 60.28 |
| Female | 50.97 | 52.57 | 55.63 | 55.14 |
Therefore, since the fixed-effect results of the two approaches are identical, the LMM can be understood as a valuable alternative to the repeated measures ANOVA design.