New and improved method for calculating predicted scores.

Here's an example of a mixed model with age and sex. We're interested in whether women and men change at the different rates (is the age by sex interaction significant?).

First, get data and mixed-mode package (lme4).

library(lme4)
(load('/prj/hnd/git/dataMgr/Projects/chapter for Shari/zrdata/HEPESE.rdata'))
[1] "HEPESE"
zQuick(HEPESE)
Dimensions: 11238 18 

     HEPid              Visit               Age                AgeCen                DOV1                EthBck     
 Min.   :10001.00   Min.   :1.000000   Min.   : 65.00000   Min.   :-1.5000000   Min.   :1993-07-01   Mexican:10532  
 1st Qu.:10766.00   1st Qu.:1.000000   1st Qu.: 71.00000   1st Qu.:-0.9000000   1st Qu.:1993-11-01   Hispano:  706  
 Median :11804.00   Median :3.000000   Median : 77.00000   Median :-0.3000000   Median :1994-01-17                  
 Mean   :12325.81   Mean   :2.843477   Mean   : 77.00018   Mean   :-0.2999822   Mean   :1993-12-30                  
 3rd Qu.:12545.75   3rd Qu.:4.000000   3rd Qu.: 82.00000   3rd Qu.: 0.2000000   3rd Qu.:1994-03-03                  
 Max.   :20282.00   Max.   :6.000000   Max.   :111.00000   Max.   : 3.1000000   Max.   :1994-07-17                  

    Sex        BornUS         Grade                 compEng      Smoke1          Marital        Diabetes       
 Men  :4506   No  :4910   Min.   : 0.000000   No        :3297   No  :4620   Married  :5537   Min.   :0.000000  
 Women:6732   Yes :6324   1st Qu.: 2.000000   notWell   :3232   Yes :6576   Separated: 281   1st Qu.:1.000000  
              NA's:   4   Median : 4.000000   prettyWell:1906   NA's:  42   Divorced : 450   Median :2.000000  
                          Mean   : 4.869122   veryWell  :2672               Widowed  :4448   Mean   :2.020208  
                          3rd Qu.: 7.000000   NA's      : 131               Never    : 513   3rd Qu.:3.000000  
                          Max.   :20.000000                                 NA's     :   9   Max.   :9.000000  
                          NA's   :159                                                        NA's   :5         
      CESD            Smoke           SBP                DBP                 MAP           
 Min.   : 0.000000   No  :1088   Min.   : 70.0000   Min.   : 15.00000   Min.   : 51.33333  
 1st Qu.: 2.000000   Yes :7249   1st Qu.:122.0000   1st Qu.: 70.00000   1st Qu.: 89.33333  
 Median : 6.000000   NA's:2901   Median :132.0000   Median : 78.00000   Median : 95.66667  
 Mean   : 8.518208               Mean   :133.9994   Mean   : 77.30569   Mean   : 96.20310  
 3rd Qu.:13.000000               3rd Qu.:142.0000   3rd Qu.: 85.00000   3rd Qu.:103.00000  
 Max.   :57.000000               Max.   :238.0000   Max.   :158.00000   Max.   :165.33333  
 NA's   :955                     NA's   :1178       NA's   :1182        NA's   :1182       

Do the mixed model. Notice that age is measured by AgeCen, so age is in decade units centered at 80 (something I calculated when I created the file).

mer.1 = lmer(MAP ~ AgeCen*Sex + I(AgeCen^2)*Sex + (AgeCen|HEPid), data=HEPESE, na.action=na.omit)
summary(mer.1)
Linear mixed model fit by REML ['lmerMod']
Formula: MAP ~ AgeCen * Sex + I(AgeCen^2) * Sex + (AgeCen | HEPid)
   Data: HEPESE

REML criterion at convergence: 75879.8

Scaled residuals: 
       Min         1Q     Median         3Q        Max 
-3.6557626 -0.5993939 -0.0367944  0.5523970  6.8109108 

Random effects:
 Groups   Name        Variance  Std.Dev. Corr   
 HEPid    (Intercept) 24.630835 4.962946        
          AgeCen       1.783732 1.335564 0.01389
 Residual             91.835480 9.583083        
Number of obs: 10056, groups:  HEPid, 2933

Fixed effects:
                       Estimate Std. Error  t value
(Intercept)          95.2206852  0.2707477 351.6953
AgeCen               -3.7807774  0.3096099 -12.2114
SexWomen             -0.1684614  0.3490108  -0.4827
I(AgeCen^2)           0.3872820  0.3046037   1.2714
AgeCen:SexWomen       0.9261614  0.3904005   2.3723
SexWomen:I(AgeCen^2) -0.2120562  0.3821331  -0.5549

Correlation of Fixed Effects:
            (Intr) AgeCen SexWmn I(AC^2 AgC:SW
AgeCen       0.114                            
SexWomen    -0.776 -0.088                     
I(AgeCen^2) -0.473  0.455  0.367              
AgeCn:SxWmn -0.090 -0.793  0.108 -0.361       
SxW:I(AC^2)  0.377 -0.363 -0.477 -0.797  0.432

The significance of individual coefficients is controversial in lmer (and mixed models in general). Coefficients with abs(t)>2 are usually significant at p<.05. There are other ways to get 'exact' p values in R.

Although it's straightforward to visualize the results in a simple model, plotting the predicted values is usually the only way to see the direction of the associations especially with lots of covariates or higher-order interactions.

Notice zHat, the new way to compute predicted values.

plotData = zHat(HEPESE, mer.1, ID='HEPid', xAxis='AgeCen', xIncr='.1', factors='Sex')

par(las=1, lwd=2)       # Y labels horizontal, line widths=2x)

with(plotData[plotData$Sex=='Women',], plot (AgeCen, Hat, lty=1, col='red',  typ='l', ylim=c(80,110), ylab='MAP', xlab='Age'))
with(plotData[plotData$Sex=='Men',],   lines(AgeCen, Hat, lty=1, col='blue'))

legend(-2, 110, zQ(Women,Men), lty=1, col=zQ(red,blue))

plot of chunk plt.1a

This looks great, except that age is censored. To return age to its actual values:

plotData$Age = 10 * plotData$AgeCen + 80

par(las=1, lwd=2)

with(plotData[plotData$Sex=='Women',], plot (Age, Hat, lty=1, col='red',  typ='l', ylim=c(80,110), ylab='MAP', xlab='Age'))
with(plotData[plotData$Sex=='Men',],   lines(Age, Hat, lty=1, col='blue'))

legend(60, 110, zQ(Women,Men), lty=1, col=zQ(red,blue))

plot of chunk plt.1b

Add covariates high school education status (HSeduc) and *CESD. Plot the results.

HEPESE$HSeduc = factor(HEPESE$Grade>=12, levels=c(F,T), labels=c('<HS','HS+'))

mer.2 = lmer(MAP ~ HSeduc*AgeCen*Sex + CESD + (AgeCen|HEPid), data=HEPESE, na.action=na.omit)
summary(mer.2)
Linear mixed model fit by REML ['lmerMod']
Formula: MAP ~ HSeduc * AgeCen * Sex + CESD + (AgeCen | HEPid)
   Data: HEPESE

REML criterion at convergence: 73330.1

Scaled residuals: 
       Min         1Q     Median         3Q        Max 
-3.6696725 -0.5965510 -0.0368395  0.5515201  6.8144570 

Random effects:
 Groups   Name        Variance  Std.Dev. Corr    
 HEPid    (Intercept) 24.644941 4.964367         
          AgeCen       1.485765 1.218920 -0.01865
 Residual             91.394845 9.560065         
Number of obs: 9724, groups:  HEPid, 2864

Fixed effects:
                              Estimate   Std. Error  t value
(Intercept)               95.583618463  0.274787300 347.8458
HSeducHS+                 -1.814475304  0.835971668  -2.1705
AgeCen                    -3.881785192  0.297330262 -13.0555
SexWomen                  -0.358479421  0.329579042  -1.0877
CESD                       0.006072622  0.012818879   0.4737
HSeducHS+:AgeCen          -0.544977590  0.964381936  -0.5651
HSeducHS+:SexWomen         0.455808639  1.104227294   0.4128
AgeCen:SexWomen            1.037208557  0.377646538   2.7465
HSeducHS+:AgeCen:SexWomen -0.350361765  1.258661784  -0.2784

Correlation of Fixed Effects:
            (Intr) HSdHS+ AgeCen SexWmn CESD   HSdHS+:AC HSHS+:S AgC:SW
HSeducHS+   -0.295                                                     
AgeCen       0.403 -0.129                                              
SexWomen    -0.688  0.235 -0.319                                       
CESD        -0.363  0.026 -0.043 -0.099                                
HSdcHS+:AgC -0.117  0.539 -0.307  0.100 -0.006                         
HSdcHS+:SxW  0.211 -0.756  0.096 -0.297  0.013 -0.408                  
AgeCn:SxWmn -0.308  0.101 -0.786  0.391  0.008  0.242    -0.117        
HSHS+:AC:SW  0.089 -0.413  0.235 -0.118  0.007 -0.766     0.509  -0.300
plotData = zHat(HEPESE, mer.2, ID='HEPid', xAxis='AgeCen', xIncr='.1', factors=zQ(Sex,HSeduc))
plotData$Age = 10 * plotData$AgeCen + 80

par(las=1, lwd=2)

with(plotData[plotData$Sex=='Women' & plotData$HSeduc=='<HS',], plot (Age, Hat, lty=1, col='red',  typ='l', ylim=c(75,110), ylab='MAP', xlab='Age'))
with(plotData[plotData$Sex=='Men'   & plotData$HSeduc=='<HS',], lines(Age, Hat, lty=1, col='blue'))

with(plotData[plotData$Sex=='Women' & plotData$HSeduc=='HS+',], lines(Age, Hat, lty=2, col='red'))
with(plotData[plotData$Sex=='Men'   & plotData$HSeduc=='HS+',], lines(Age, Hat, lty=2, col='blue'))

legend(100, 110, c('Women,<HS','Men,<HS','Women,HS+','Men,HS+'), lty=c(1,2,1,2), col=zQ(red,blue))

plot of chunk mer.2

Notice that CESD isn't in the zHat call. Any predictor variables not mentioned as added as mean values in evaluating the regression coefficients.