##For this homework I again use the public version of nationally representative cohort of U.S. children enrolled in the Early Childhood Longitudinal Study, Kindergarten Class of 2010–11 (ECLS-K:2011). However, the research question now concerns the association between child's health and such predictors as race/ethnicity, income and health insurance coverage. By the means of non-proportionate odds ordinal and multinomial logistic regression models I examine the probabilities of different groups according to race/ethnicity, income level and health insurance status to be in excellent health. Therefore the dependent variable is child's health, while predictors include race/ethnicity, income and health insurance coverage.
## Dependent variable
#Child's health
sub$ch_health<-Recode(sub$p2hscale,recodes="1=1;2:3=2;4=3;else=NA",as.factor=TRUE) #1 - excellent, 2 - good, 3 - fair/poor
head(sub$ch_health)
## [1] 2 1 1 1 1 2
## Levels: 1 2 3
levels(sub$ch_health)
## [1] "1" "2" "3"
## Predictors
#Race(NH-White,NH-Black,Hispanic,Other)
sub$race<-ifelse(sub$x2par1rac==1 ,"NH-White",ifelse(sub$x2par1rac==2,"NH-Black",
ifelse(sub$x2par1rac==3 | sub$x2par1rac==4,"Hispanic","Other")))
sub$race<-as.factor(sub$race)
levels(sub$race)
## [1] "Hispanic" "NH-Black" "NH-White" "Other"
head(sub$race)
## [1] NH-White NH-White NH-White NH-White NH-White NH-White
## Levels: Hispanic NH-Black NH-White Other
#Income
sub$income<-Recode(sub$p2hilow,recodes="1='less_eq25000';2='more25000';else=NA",as.factor=TRUE)
head(sub$income)
## [1] more25000 more25000 more25000 more25000 more25000 more25000
## Levels: less_eq25000 more25000
#Health insurance
sub$h_insure<-Recode(sub$p2cover,recodes="2=0;1=1;else=NA",as.factor=TRUE) #0 uninsured, 1 insured
head(sub$h_insure)
## [1] 1 1 1 1 0 1
## Levels: 0 1
levels(sub$h_insure)
## [1] "0" "1"
##Selecting variables
sub<-sub%>%
filter( complete.cases(.))
#Survey design object
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~w2p0psu, strata=~w2p0str, weights=~w2p0, data =sub , nest=T)
##Ordinal logit - Non-proportional odds
#We assume coefficients not to be the same across transitions
fit.vgam<-vglm(formula = as.ordered(ch_health)~income+race++h_insure, sub, weights=w2p0/mean(w2p0, na.rm=T), family=cumulative(parallel = F, reverse = T))
summary(fit.vgam)
##
## Call:
## vglm(formula = as.ordered(ch_health) ~ income + race + +h_insure,
## family = cumulative(parallel = F, reverse = T), data = sub,
## weights = w2p0/mean(w2p0, na.rm = T))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -1.9756 -0.7711 -0.5819 1.04646 2.666
## logitlink(P[Y>=3]) -0.6067 -0.1603 -0.1132 -0.08188 18.059
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.31527 0.08716 3.617 0.000298 ***
## (Intercept):2 -2.88396 0.26219 -10.999 < 2e-16 ***
## incomemore25000:1 -0.55171 0.04346 -12.696 < 2e-16 ***
## incomemore25000:2 -1.17763 0.13514 -8.714 < 2e-16 ***
## raceNH-Black:1 -0.23753 0.06683 -3.554 0.000379 ***
## raceNH-Black:2 -0.37091 0.18521 -2.003 0.045219 *
## raceNH-White:1 -0.38077 0.04912 -7.752 9.04e-15 ***
## raceNH-White:2 -0.73477 0.15189 -4.838 1.31e-06 ***
## raceOther:1 -0.00125 0.07519 -0.017 0.986741
## raceOther:2 -0.14952 0.21979 -0.680 0.496313
## h_insure1:1 -0.13007 0.08410 -1.547 0.121929
## h_insure1:2 0.18553 0.26520 0.700 0.484189
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 18169.61 on 24614 degrees of freedom
##
## Log-likelihood: -9084.807 on 24614 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):2'
##
##
## Exponentiated coefficients:
## incomemore25000:1 incomemore25000:2 raceNH-Black:1 raceNH-Black:2
## 0.5759617 0.3080068 0.7885748 0.6901055
## raceNH-White:1 raceNH-White:2 raceOther:1 raceOther:2
## 0.6833335 0.4796180 0.9987513 0.8611176
## h_insure1:1 h_insure1:2
## 0.8780309 1.2038546
#If we look at different income groups, we can see that as expected respondents with higher income are less likely to report good health that excellent health than low income respondents, and even less likely to report poor helth compared to low income respondents. As for ethnic groups, results show us that Blacks and Whites are less likely to report good or poor health than excellent health that Hispanics. The coefficient for health insurance coverage are not significant in this model.
##Multinomial logistic regression
mfit<-vglm(formula=ch_health~income+race++h_insure,
family=multinomial(refLevel = 1),
data=sub,
weights=w2p0/mean(w2p0, na.rm=T))
summary(mfit)
##
## Call:
## vglm(formula = ch_health ~ income + race + +h_insure, family = multinomial(refLevel = 1),
## data = sub, weights = w2p0/mean(w2p0, na.rm = T))
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## log(mu[,2]/mu[,1]) -1.8515 -0.7568 -0.5836 1.07356 2.675
## log(mu[,3]/mu[,1]) -0.7881 -0.1710 -0.1214 -0.03369 18.057
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 0.21521 0.08846 2.433 0.01499 *
## (Intercept):2 -2.08394 0.26614 -7.830 4.86e-15 ***
## incomemore25000:1 -0.49837 0.04433 -11.243 < 2e-16 ***
## incomemore25000:2 -1.38388 0.13626 -10.157 < 2e-16 ***
## raceNH-Black:1 -0.21894 0.06820 -3.210 0.00133 **
## raceNH-Black:2 -0.47401 0.18838 -2.516 0.01186 *
## raceNH-White:1 -0.34882 0.05004 -6.971 3.16e-12 ***
## raceNH-White:2 -0.87734 0.15313 -5.729 1.01e-08 ***
## raceOther:1 0.01704 0.07649 0.223 0.82370
## raceOther:2 -0.17940 0.22580 -0.795 0.42690
## h_insure1:1 -0.14430 0.08521 -1.693 0.09037 .
## h_insure1:2 0.11050 0.26871 0.411 0.68090
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: log(mu[,2]/mu[,1]), log(mu[,3]/mu[,1])
##
## Residual deviance: 18170.22 on 24614 degrees of freedom
##
## Log-likelihood: -9085.112 on 24614 degrees of freedom
##
## Number of Fisher scoring iterations: 7
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Reference group is level 1 of the response
#The interpretation of the results for this model turns out to be very similar to the previous model. Again , the soefficients are significant for income and race/ethnicity but not significant at 5% significant level for health insurance coverage. Since we code the unisured as a reference group, we can say that those who are insured are slightly less likely to have good health than excellent health (10% significance level), those who are insured are more likely to report poor health, however the coefficient is not significant.
##Calculating the odds ratios for each covariate's effect on each model
round(exp(coef(mfit)), 3)
## (Intercept):1 (Intercept):2 incomemore25000:1 incomemore25000:2
## 1.240 0.124 0.608 0.251
## raceNH-Black:1 raceNH-Black:2 raceNH-White:1 raceNH-White:2
## 0.803 0.623 0.706 0.416
## raceOther:1 raceOther:2 h_insure1:1 h_insure1:2
## 1.017 0.836 0.866 1.117
#confidence intervals
round(exp(confint(mfit)), 3)
## 2.5 % 97.5 %
## (Intercept):1 1.043 1.475
## (Intercept):2 0.074 0.210
## incomemore25000:1 0.557 0.663
## incomemore25000:2 0.192 0.327
## raceNH-Black:1 0.703 0.918
## raceNH-Black:2 0.430 0.901
## raceNH-White:1 0.640 0.778
## raceNH-White:2 0.308 0.561
## raceOther:1 0.876 1.182
## raceOther:2 0.537 1.301
## h_insure1:1 0.732 1.023
## h_insure1:2 0.660 1.891
##Geting Fitted Probabilities from models
dat<-expand.grid(income=levels(sub$income),race=levels(sub$race),h_insure=levels(sub$h_insure))
##Generating the fitted values
fitm<-predict(mfit, newdat=dat,type="response")
##Adding the values to the data
dat<-cbind(dat, round(fitm,3))
##Printing the fitted probabilities
head(dat, n=10)
## income race h_insure 1 2 3
## 1 less_eq25000 Hispanic 0 0.423 0.524 0.053
## 2 more25000 Hispanic 0 0.560 0.422 0.017
## 3 less_eq25000 NH-Black 0 0.482 0.480 0.037
## 4 more25000 NH-Black 0 0.616 0.373 0.012
## 5 less_eq25000 NH-White 0 0.519 0.454 0.027
## 6 more25000 NH-White 0 0.647 0.344 0.008
## 7 less_eq25000 Other 0 0.423 0.533 0.044
## 8 more25000 Other 0 0.558 0.428 0.015
## 9 less_eq25000 Hispanic 1 0.452 0.485 0.063
## 10 more25000 Hispanic 1 0.593 0.387 0.021
#Interpretation of odds ratios. Respondents in higher income group are 40% less likely to report good health rather than excellent health than low income group respondents, and 75% less likely to report poor health vs excellent health than low income individuals. NH Blacks are 20% less likely to report good health vs excellent health than Hispanic respondents, (NH WHites are 30% less likely to report good vs excellent health compared to Hispanics.
#Model comparison
AIC(fit.vgam) #Non-proportional odds
## [1] 18193.61
AIC(mfit) #Multinomial
## [1] 18194.22
#According to AIC first model is better, but the difference in AIC is too small to give preference to this model (<10).