For this HW I’ll be using the 2017 PSID data using state as my grouping variable. I’m going to investigate predicting income by state using demographic variables:
age^2 continuous
sex dichotomous
health categorical (very good, good, fair and poor)
race standard four (nhw, nhb, hisp, other) educ continuous number of yrs of school
All numerical values will be z-scaled.
h7 <- read_excel("~/Google Drive/Stat 2/hw 7/J257751.xlsx")
#table(h7$sex)
myscale <- function(x){as.numeric(scale(x))}
h7 <- h7 %>%
transmute(
state = ER66004,
age = ER66017,
sex = ER66018,
health = ER68420,
race = ifelse(ER70881 >= 1 & ER70881 <= 7, "hisp",
ifelse(ER70882 == 1, "nhw",
ifelse(ER70882 == 2, "nhb",
ifelse(ER70882 >= 4 & ER70882 <= 7, "other", NA)))),
hhinc = ER71426,
educ = ER71538,
wt = ER71570
) %>%
filter(
state <= 52,
hhinc <= 999999,
age <=120,
educ <= 17,
health <= 5
)
wt <- h7[,8 ]
h7 <- h7[,1:7] %>%
transmute(
state = state,
hhinc = hhinc,
age2 = age^2,
sex = ifelse(sex == 1, "male", "female"),
health = ifelse(health <= 2, "very good",
ifelse(health == 3, "good",
ifelse(health == 4, "fair",
ifelse(health == 5, "poor", NA)))),
educ = educ,
race = race
) %>%
mutate_if(is.numeric, myscale)
h7 <- cbind(h7,wt)
#str(h7)
h7 <- h7[complete.cases(h7), ]
fit.an <- lm(hhinc~as.factor(state), h7)
summary(fit.an)
##
## Call:
## lm(formula = hhinc ~ as.factor(state), data = h7)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2814 -0.6033 -0.2470 0.3120 12.7003
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0993090 0.1388858 0.715 0.47460
## as.factor(state)-1.7571028333034 -0.1954940 0.1621053 -1.206 0.22786
## as.factor(state)-1.68960151483192 0.5174985 0.2969504 1.743 0.08142
## as.factor(state)-1.55459887788897 -0.1034111 0.1624220 -0.637 0.52435
## as.factor(state)-1.48709755941749 -0.2524552 0.1569949 -1.608 0.10786
## as.factor(state)-1.41959624094601 0.0060279 0.1428390 0.042 0.96634
## as.factor(state)-1.28459360400306 0.1205276 0.1597236 0.755 0.45051
## as.factor(state)-1.21709228553158 -0.1765834 0.1910807 -0.924 0.35544
## as.factor(state)-1.1495909670601 -0.2026948 0.3401994 -0.596 0.55132
## as.factor(state)-1.08208964858862 -0.3966216 0.1853619 -2.140 0.03240
## as.factor(state)-1.01458833011715 -0.1337970 0.1469578 -0.910 0.36261
## as.factor(state)-0.947087011645669 -0.1782021 0.1481922 -1.203 0.22920
## as.factor(state)-0.812084374702714 -0.3812917 0.4243032 -0.899 0.36887
## as.factor(state)-0.744583056231236 -0.2731333 0.2757217 -0.991 0.32190
## as.factor(state)-0.677081737759759 -0.1694257 0.1506561 -1.125 0.26079
## as.factor(state)-0.609580419288281 -0.2399895 0.1507773 -1.592 0.11149
## as.factor(state)-0.542079100816804 -0.0999026 0.1582091 -0.631 0.52776
## as.factor(state)-0.474577782345326 0.0358132 0.1887753 0.190 0.84954
## as.factor(state)-0.407076463873849 -0.1604969 0.1594752 -1.006 0.31425
## as.factor(state)-0.339575145402371 -0.3267992 0.1610613 -2.029 0.04248
## as.factor(state)-0.272073826930894 0.0431501 0.2318068 0.186 0.85233
## as.factor(state)-0.204572508459416 -0.0072835 0.1499077 -0.049 0.96125
## as.factor(state)-0.137071189987939 0.5599835 0.1610613 3.477 0.00051
## as.factor(state)-0.069569871516461 -0.1987590 0.1462549 -1.359 0.17418
## as.factor(state)-0.0020685530449835 0.1981989 0.1661434 1.193 0.23292
## as.factor(state)0.065432765426494 -0.4600021 0.1485018 -3.098 0.00196
## as.factor(state)0.132934083897972 -0.1998949 0.1511561 -1.322 0.18606
## as.factor(state)0.200435402369449 -0.0782690 0.3270593 -0.239 0.81087
## as.factor(state)0.267936720840927 0.0291544 0.1715853 0.170 0.86508
## as.factor(state)0.335438039312404 0.0727625 0.1793008 0.406 0.68489
## as.factor(state)0.402939357783882 0.3261445 0.2345424 1.391 0.16439
## as.factor(state)0.470440676255359 0.4333485 0.1574817 2.752 0.00594
## as.factor(state)0.537941994726837 -0.0352849 0.3156917 -0.112 0.91101
## as.factor(state)0.605443313198314 0.1072858 0.1487199 0.721 0.47069
## as.factor(state)0.672944631669792 -0.2329328 0.1455476 -1.600 0.10955
## as.factor(state)0.740445950141269 0.5078383 0.3156917 1.609 0.10773
## as.factor(state)0.807947268612747 -0.2062429 0.1474142 -1.399 0.16183
## as.factor(state)0.875448587084224 -0.3630567 0.1910807 -1.900 0.05746
## as.factor(state)0.942949905555702 0.0072576 0.1601076 0.045 0.96385
## as.factor(state)1.01045122402718 -0.1037956 0.1483963 -0.699 0.48429
## as.factor(state)1.14545386097013 -0.1316714 0.3963203 -0.332 0.73972
## as.factor(state)1.21295517944161 -0.3391215 0.1471501 -2.305 0.02121
## as.factor(state)1.28045649791309 0.1011244 0.2245017 0.450 0.65240
## as.factor(state)1.34795781638457 -0.2270014 0.1568073 -1.448 0.14775
## as.factor(state)1.41545913485604 0.0442615 0.1445385 0.306 0.75944
## as.factor(state)1.48296045332752 0.2809761 0.1797848 1.563 0.11812
## as.factor(state)1.550461771799 -0.0004115 0.3556008 -0.001 0.99908
## as.factor(state)1.61796309027048 -0.0467762 0.1500857 -0.312 0.75530
##
## (Intercept)
## as.factor(state)-1.7571028333034
## as.factor(state)-1.68960151483192 .
## as.factor(state)-1.55459887788897
## as.factor(state)-1.48709755941749
## as.factor(state)-1.41959624094601
## as.factor(state)-1.28459360400306
## as.factor(state)-1.21709228553158
## as.factor(state)-1.1495909670601
## as.factor(state)-1.08208964858862 *
## as.factor(state)-1.01458833011715
## as.factor(state)-0.947087011645669
## as.factor(state)-0.812084374702714
## as.factor(state)-0.744583056231236
## as.factor(state)-0.677081737759759
## as.factor(state)-0.609580419288281
## as.factor(state)-0.542079100816804
## as.factor(state)-0.474577782345326
## as.factor(state)-0.407076463873849
## as.factor(state)-0.339575145402371 *
## as.factor(state)-0.272073826930894
## as.factor(state)-0.204572508459416
## as.factor(state)-0.137071189987939 ***
## as.factor(state)-0.069569871516461
## as.factor(state)-0.0020685530449835
## as.factor(state)0.065432765426494 **
## as.factor(state)0.132934083897972
## as.factor(state)0.200435402369449
## as.factor(state)0.267936720840927
## as.factor(state)0.335438039312404
## as.factor(state)0.402939357783882
## as.factor(state)0.470440676255359 **
## as.factor(state)0.537941994726837
## as.factor(state)0.605443313198314
## as.factor(state)0.672944631669792
## as.factor(state)0.740445950141269
## as.factor(state)0.807947268612747
## as.factor(state)0.875448587084224 .
## as.factor(state)0.942949905555702
## as.factor(state)1.01045122402718
## as.factor(state)1.14545386097013
## as.factor(state)1.21295517944161 *
## as.factor(state)1.28045649791309
## as.factor(state)1.34795781638457
## as.factor(state)1.41545913485604
## as.factor(state)1.48296045332752
## as.factor(state)1.550461771799
## as.factor(state)1.61796309027048
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9821 on 9010 degrees of freedom
## Multiple R-squared: 0.03677, Adjusted R-squared: 0.03175
## F-statistic: 7.319 on 47 and 9010 DF, p-value: < 2.2e-16
From the ANOVA we can that there is indeed variation between the states, so we will proceed to multi level modeling
fit <- lmer(hhinc~(1|state), data = h7, na.action = na.omit)
arm::display(fit, detail = T)
## lmer(formula = hhinc ~ (1 | state), data = h7, na.action = na.omit)
## coef.est coef.se t value
## (Intercept) 0.03 0.03 1.02
##
## Error terms:
## Groups Name Std.Dev.
## state (Intercept) 0.20
## Residual 0.98
## ---
## number of obs: 9058, groups: state, 48
## AIC = 25472.1, DIC = 25456.1
## deviance = 25461.1
summary(fit)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: hhinc ~ (1 | state)
## Data: h7
##
## REML criterion at convergence: 25466.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2980 -0.6180 -0.2550 0.3201 12.9447
##
## Random effects:
## Groups Name Variance Std.Dev.
## state (Intercept) 0.04036 0.2009
## Residual 0.96432 0.9820
## Number of obs: 9058, groups: state, 48
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 0.03446 0.03365 42.49450 1.024 0.312
sjstats::icc(fit)
##
## Intraclass Correlation Coefficient for Linear mixed model
##
## Family : gaussian (identity)
## Formula: hhinc ~ (1 | state)
##
## ICC (state): 0.0402
Residual 0.964
ICC: 0.0402
And from here we can see that the variance within the states does not appear to be significant, and the variance between the states is about 4%. nonethe less we will press forward
fit2 <- lmer(hhinc ~ age2 + sex + health + educ + race + (1 | state), data =h7, na.action = na.omit)
arm::display(fit2)
## lmer(formula = hhinc ~ age2 + sex + health + educ + race + (1 |
## state), data = h7, na.action = na.omit)
## coef.est coef.se
## (Intercept) -0.60 0.05
## age2 0.05 0.01
## sexmale 0.53 0.02
## healthgood 0.13 0.03
## healthpoor -0.12 0.05
## healthvery good 0.26 0.03
## educ 0.29 0.01
## racenhb -0.14 0.03
## racenhw 0.21 0.03
## raceother 0.15 0.07
##
## Error terms:
## Groups Name Std.Dev.
## state (Intercept) 0.13
## Residual 0.85
## ---
## number of obs: 9058, groups: state, 48
## AIC = 22995.1, DIC = 22856.8
## deviance = 22914.0
summary(fit2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: hhinc ~ age2 + sex + health + educ + race + (1 | state)
## Data: h7
##
## REML criterion at convergence: 22971.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1191 -0.5596 -0.1453 0.3312 14.1315
##
## Random effects:
## Groups Name Variance Std.Dev.
## state (Intercept) 0.01771 0.1331
## Residual 0.73015 0.8545
## Number of obs: 9058, groups: state, 48
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -5.966e-01 4.522e-02 4.778e+02 -13.192 < 2e-16 ***
## age2 5.314e-02 9.473e-03 9.044e+03 5.609 2.09e-08 ***
## sexmale 5.317e-01 1.997e-02 9.030e+03 26.617 < 2e-16 ***
## healthgood 1.323e-01 2.874e-02 9.026e+03 4.603 4.22e-06 ***
## healthpoor -1.205e-01 4.950e-02 9.027e+03 -2.435 0.0149 *
## healthvery good 2.570e-01 2.789e-02 9.031e+03 9.213 < 2e-16 ***
## educ 2.905e-01 9.796e-03 9.048e+03 29.659 < 2e-16 ***
## racenhb -1.364e-01 3.424e-02 8.060e+03 -3.984 6.83e-05 ***
## racenhw 2.061e-01 3.372e-02 8.666e+03 6.111 1.03e-09 ***
## raceother 1.489e-01 6.552e-02 9.030e+03 2.273 0.0230 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) age2 sexmal hlthgd hlthpr hlthvg educ racnhb racnhw
## age2 -0.009
## sexmale -0.289 0.042
## healthgood -0.437 0.079 -0.028
## healthpoor -0.241 -0.083 0.025 0.380
## healthvrygd -0.440 0.162 -0.072 0.732 0.381
## educ 0.197 0.076 0.036 -0.080 0.064 -0.146
## racenhb -0.615 -0.033 0.103 0.016 0.001 0.012 -0.150
## racenhw -0.588 -0.143 -0.041 -0.013 -0.020 -0.039 -0.278 0.775
## raceother -0.269 -0.061 -0.038 0.002 -0.034 -0.018 -0.173 0.368 0.404
sjstats::icc(fit2)
##
## Intraclass Correlation Coefficient for Linear mixed model
##
## Family : gaussian (identity)
## Formula: hhinc ~ age2 + sex + health + educ + race + (1 | state)
##
## ICC (state): 0.0237
Residual 0.7315
ICC: 0.0237
It appears that all variables are significant. Not suprisingly it appears as if the strongest positive outcomes are being male, nhw or other race, in good or very good health and have more education than less. The largest negative effects were being in poor health and nhb. While age2 was significant and positive, the coefficient was fairly minimal.
see above
anova(fit, fit2, test = "Chisq")
## refitting model(s) with ML (instead of REML)
## Data: h7
## Models:
## fit: hhinc ~ (1 | state)
## fit2: hhinc ~ age2 + sex + health + educ + race + (1 | state)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit 3 25467 25488 -12731 25461
## fit2 12 22938 23023 -11457 22914 2547.1 9 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Absolutely it does. We see an AIC drop by approx 2,500 and a significantt result from the Chi Sq test.