1) Estimate the random intercept model with just the higher level units as your predictor (NULL model)

For this HW I’ll be using the 2017 PSID data using state as my grouping variable. I’m going to investigate predicting income by state using demographic variables:
age^2 continuous
sex dichotomous
health categorical (very good, good, fair and poor)
race standard four (nhw, nhb, hisp, other) educ continuous number of yrs of school

All numerical values will be z-scaled.

h7 <- read_excel("~/Google Drive/Stat 2/hw 7/J257751.xlsx")
#table(h7$sex)
myscale <- function(x){as.numeric(scale(x))}
h7 <- h7 %>% 
  transmute(
    state = ER66004,
    age = ER66017,
    sex = ER66018,
    health = ER68420,
    race = ifelse(ER70881 >= 1 & ER70881 <= 7, "hisp",
                  ifelse(ER70882 == 1, "nhw",
                         ifelse(ER70882 == 2, "nhb",
                                ifelse(ER70882 >= 4 & ER70882 <= 7, "other", NA)))),
    hhinc = ER71426,
    educ = ER71538,
    wt = ER71570
  ) %>% 
  filter(
    state <= 52,
    hhinc  <= 999999,
    age <=120,
    educ <= 17,
    health <= 5
  )
wt <- h7[,8 ]
h7 <- h7[,1:7] %>% 
  transmute(
    state = state,
    hhinc = hhinc,
    age2 = age^2,
    sex = ifelse(sex == 1, "male", "female"),
    health = ifelse(health <= 2, "very good",
                    ifelse(health == 3, "good",
                           ifelse(health == 4, "fair",
                                  ifelse(health == 5, "poor", NA)))),
    educ = educ,
    race = race
  ) %>% 
  mutate_if(is.numeric, myscale)
h7 <- cbind(h7,wt)
#str(h7)
h7 <- h7[complete.cases(h7), ]
a)Report the variance components and ICC
fit.an <- lm(hhinc~as.factor(state), h7)
summary(fit.an)
## 
## Call:
## lm(formula = hhinc ~ as.factor(state), data = h7)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2814 -0.6033 -0.2470  0.3120 12.7003 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)                          0.0993090  0.1388858   0.715  0.47460
## as.factor(state)-1.7571028333034    -0.1954940  0.1621053  -1.206  0.22786
## as.factor(state)-1.68960151483192    0.5174985  0.2969504   1.743  0.08142
## as.factor(state)-1.55459887788897   -0.1034111  0.1624220  -0.637  0.52435
## as.factor(state)-1.48709755941749   -0.2524552  0.1569949  -1.608  0.10786
## as.factor(state)-1.41959624094601    0.0060279  0.1428390   0.042  0.96634
## as.factor(state)-1.28459360400306    0.1205276  0.1597236   0.755  0.45051
## as.factor(state)-1.21709228553158   -0.1765834  0.1910807  -0.924  0.35544
## as.factor(state)-1.1495909670601    -0.2026948  0.3401994  -0.596  0.55132
## as.factor(state)-1.08208964858862   -0.3966216  0.1853619  -2.140  0.03240
## as.factor(state)-1.01458833011715   -0.1337970  0.1469578  -0.910  0.36261
## as.factor(state)-0.947087011645669  -0.1782021  0.1481922  -1.203  0.22920
## as.factor(state)-0.812084374702714  -0.3812917  0.4243032  -0.899  0.36887
## as.factor(state)-0.744583056231236  -0.2731333  0.2757217  -0.991  0.32190
## as.factor(state)-0.677081737759759  -0.1694257  0.1506561  -1.125  0.26079
## as.factor(state)-0.609580419288281  -0.2399895  0.1507773  -1.592  0.11149
## as.factor(state)-0.542079100816804  -0.0999026  0.1582091  -0.631  0.52776
## as.factor(state)-0.474577782345326   0.0358132  0.1887753   0.190  0.84954
## as.factor(state)-0.407076463873849  -0.1604969  0.1594752  -1.006  0.31425
## as.factor(state)-0.339575145402371  -0.3267992  0.1610613  -2.029  0.04248
## as.factor(state)-0.272073826930894   0.0431501  0.2318068   0.186  0.85233
## as.factor(state)-0.204572508459416  -0.0072835  0.1499077  -0.049  0.96125
## as.factor(state)-0.137071189987939   0.5599835  0.1610613   3.477  0.00051
## as.factor(state)-0.069569871516461  -0.1987590  0.1462549  -1.359  0.17418
## as.factor(state)-0.0020685530449835  0.1981989  0.1661434   1.193  0.23292
## as.factor(state)0.065432765426494   -0.4600021  0.1485018  -3.098  0.00196
## as.factor(state)0.132934083897972   -0.1998949  0.1511561  -1.322  0.18606
## as.factor(state)0.200435402369449   -0.0782690  0.3270593  -0.239  0.81087
## as.factor(state)0.267936720840927    0.0291544  0.1715853   0.170  0.86508
## as.factor(state)0.335438039312404    0.0727625  0.1793008   0.406  0.68489
## as.factor(state)0.402939357783882    0.3261445  0.2345424   1.391  0.16439
## as.factor(state)0.470440676255359    0.4333485  0.1574817   2.752  0.00594
## as.factor(state)0.537941994726837   -0.0352849  0.3156917  -0.112  0.91101
## as.factor(state)0.605443313198314    0.1072858  0.1487199   0.721  0.47069
## as.factor(state)0.672944631669792   -0.2329328  0.1455476  -1.600  0.10955
## as.factor(state)0.740445950141269    0.5078383  0.3156917   1.609  0.10773
## as.factor(state)0.807947268612747   -0.2062429  0.1474142  -1.399  0.16183
## as.factor(state)0.875448587084224   -0.3630567  0.1910807  -1.900  0.05746
## as.factor(state)0.942949905555702    0.0072576  0.1601076   0.045  0.96385
## as.factor(state)1.01045122402718    -0.1037956  0.1483963  -0.699  0.48429
## as.factor(state)1.14545386097013    -0.1316714  0.3963203  -0.332  0.73972
## as.factor(state)1.21295517944161    -0.3391215  0.1471501  -2.305  0.02121
## as.factor(state)1.28045649791309     0.1011244  0.2245017   0.450  0.65240
## as.factor(state)1.34795781638457    -0.2270014  0.1568073  -1.448  0.14775
## as.factor(state)1.41545913485604     0.0442615  0.1445385   0.306  0.75944
## as.factor(state)1.48296045332752     0.2809761  0.1797848   1.563  0.11812
## as.factor(state)1.550461771799      -0.0004115  0.3556008  -0.001  0.99908
## as.factor(state)1.61796309027048    -0.0467762  0.1500857  -0.312  0.75530
##                                        
## (Intercept)                            
## as.factor(state)-1.7571028333034       
## as.factor(state)-1.68960151483192   .  
## as.factor(state)-1.55459887788897      
## as.factor(state)-1.48709755941749      
## as.factor(state)-1.41959624094601      
## as.factor(state)-1.28459360400306      
## as.factor(state)-1.21709228553158      
## as.factor(state)-1.1495909670601       
## as.factor(state)-1.08208964858862   *  
## as.factor(state)-1.01458833011715      
## as.factor(state)-0.947087011645669     
## as.factor(state)-0.812084374702714     
## as.factor(state)-0.744583056231236     
## as.factor(state)-0.677081737759759     
## as.factor(state)-0.609580419288281     
## as.factor(state)-0.542079100816804     
## as.factor(state)-0.474577782345326     
## as.factor(state)-0.407076463873849     
## as.factor(state)-0.339575145402371  *  
## as.factor(state)-0.272073826930894     
## as.factor(state)-0.204572508459416     
## as.factor(state)-0.137071189987939  ***
## as.factor(state)-0.069569871516461     
## as.factor(state)-0.0020685530449835    
## as.factor(state)0.065432765426494   ** 
## as.factor(state)0.132934083897972      
## as.factor(state)0.200435402369449      
## as.factor(state)0.267936720840927      
## as.factor(state)0.335438039312404      
## as.factor(state)0.402939357783882      
## as.factor(state)0.470440676255359   ** 
## as.factor(state)0.537941994726837      
## as.factor(state)0.605443313198314      
## as.factor(state)0.672944631669792      
## as.factor(state)0.740445950141269      
## as.factor(state)0.807947268612747      
## as.factor(state)0.875448587084224   .  
## as.factor(state)0.942949905555702      
## as.factor(state)1.01045122402718       
## as.factor(state)1.14545386097013       
## as.factor(state)1.21295517944161    *  
## as.factor(state)1.28045649791309       
## as.factor(state)1.34795781638457       
## as.factor(state)1.41545913485604       
## as.factor(state)1.48296045332752       
## as.factor(state)1.550461771799         
## as.factor(state)1.61796309027048       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9821 on 9010 degrees of freedom
## Multiple R-squared:  0.03677,    Adjusted R-squared:  0.03175 
## F-statistic: 7.319 on 47 and 9010 DF,  p-value: < 2.2e-16

From the ANOVA we can that there is indeed variation between the states, so we will proceed to multi level modeling

fit <- lmer(hhinc~(1|state), data = h7, na.action = na.omit)
arm::display(fit, detail = T)
## lmer(formula = hhinc ~ (1 | state), data = h7, na.action = na.omit)
##             coef.est coef.se t value
## (Intercept) 0.03     0.03    1.02   
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  state    (Intercept) 0.20    
##  Residual             0.98    
## ---
## number of obs: 9058, groups: state, 48
## AIC = 25472.1, DIC = 25456.1
## deviance = 25461.1
summary(fit)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: hhinc ~ (1 | state)
##    Data: h7
## 
## REML criterion at convergence: 25466.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2980 -0.6180 -0.2550  0.3201 12.9447 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  state    (Intercept) 0.04036  0.2009  
##  Residual             0.96432  0.9820  
## Number of obs: 9058, groups:  state, 48
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)  0.03446    0.03365 42.49450   1.024    0.312
sjstats::icc(fit)
## 
## Intraclass Correlation Coefficient for Linear mixed model
## 
## Family : gaussian (identity)
## Formula: hhinc ~ (1 | state)
## 
##   ICC (state): 0.0402

Residual 0.964
ICC: 0.0402
And from here we can see that the variance within the states does not appear to be significant, and the variance between the states is about 4%. nonethe less we will press forward

2) Now fit the mulltilevel model for your outcome and include both the higher level term and appropriate individual level predictors

fit2 <- lmer(hhinc ~ age2 + sex + health + educ + race + (1 | state), data =h7, na.action = na.omit)
arm::display(fit2)
## lmer(formula = hhinc ~ age2 + sex + health + educ + race + (1 | 
##     state), data = h7, na.action = na.omit)
##                 coef.est coef.se
## (Intercept)     -0.60     0.05  
## age2             0.05     0.01  
## sexmale          0.53     0.02  
## healthgood       0.13     0.03  
## healthpoor      -0.12     0.05  
## healthvery good  0.26     0.03  
## educ             0.29     0.01  
## racenhb         -0.14     0.03  
## racenhw          0.21     0.03  
## raceother        0.15     0.07  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  state    (Intercept) 0.13    
##  Residual             0.85    
## ---
## number of obs: 9058, groups: state, 48
## AIC = 22995.1, DIC = 22856.8
## deviance = 22914.0
summary(fit2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: hhinc ~ age2 + sex + health + educ + race + (1 | state)
##    Data: h7
## 
## REML criterion at convergence: 22971.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1191 -0.5596 -0.1453  0.3312 14.1315 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  state    (Intercept) 0.01771  0.1331  
##  Residual             0.73015  0.8545  
## Number of obs: 9058, groups:  state, 48
## 
## Fixed effects:
##                   Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)     -5.966e-01  4.522e-02  4.778e+02 -13.192  < 2e-16 ***
## age2             5.314e-02  9.473e-03  9.044e+03   5.609 2.09e-08 ***
## sexmale          5.317e-01  1.997e-02  9.030e+03  26.617  < 2e-16 ***
## healthgood       1.323e-01  2.874e-02  9.026e+03   4.603 4.22e-06 ***
## healthpoor      -1.205e-01  4.950e-02  9.027e+03  -2.435   0.0149 *  
## healthvery good  2.570e-01  2.789e-02  9.031e+03   9.213  < 2e-16 ***
## educ             2.905e-01  9.796e-03  9.048e+03  29.659  < 2e-16 ***
## racenhb         -1.364e-01  3.424e-02  8.060e+03  -3.984 6.83e-05 ***
## racenhw          2.061e-01  3.372e-02  8.666e+03   6.111 1.03e-09 ***
## raceother        1.489e-01  6.552e-02  9.030e+03   2.273   0.0230 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) age2   sexmal hlthgd hlthpr hlthvg educ   racnhb racnhw
## age2        -0.009                                                        
## sexmale     -0.289  0.042                                                 
## healthgood  -0.437  0.079 -0.028                                          
## healthpoor  -0.241 -0.083  0.025  0.380                                   
## healthvrygd -0.440  0.162 -0.072  0.732  0.381                            
## educ         0.197  0.076  0.036 -0.080  0.064 -0.146                     
## racenhb     -0.615 -0.033  0.103  0.016  0.001  0.012 -0.150              
## racenhw     -0.588 -0.143 -0.041 -0.013 -0.020 -0.039 -0.278  0.775       
## raceother   -0.269 -0.061 -0.038  0.002 -0.034 -0.018 -0.173  0.368  0.404
sjstats::icc(fit2)
## 
## Intraclass Correlation Coefficient for Linear mixed model
## 
## Family : gaussian (identity)
## Formula: hhinc ~ age2 + sex + health + educ + race + (1 | state)
## 
##   ICC (state): 0.0237
a)Report the variance components of the model and describe the results of the model

Residual 0.7315
ICC: 0.0237

It appears that all variables are significant. Not suprisingly it appears as if the strongest positive outcomes are being male, nhw or other race, in good or very good health and have more education than less. The largest negative effects were being in poor health and nhb. While age2 was significant and positive, the coefficient was fairly minimal.

b) Report the results for the fixed effects

see above

3) Compare the models from 1 and 2 using LRT. Does the inclusion of teh individual level variables increase the overall model fit?

anova(fit, fit2, test = "Chisq")
## refitting model(s) with ML (instead of REML)
## Data: h7
## Models:
## fit: hhinc ~ (1 | state)
## fit2: hhinc ~ age2 + sex + health + educ + race + (1 | state)
##      Df   AIC   BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
## fit   3 25467 25488 -12731    25461                             
## fit2 12 22938 23023 -11457    22914 2547.1      9  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Absolutely it does. We see an AIC drop by approx 2,500 and a significantt result from the Chi Sq test.