Using ECLS-K data, I will assess the relationship between perceived child health and poverty status in children in the United States, using parent education, parent marital status, and child race/ethnicity as control variables.
#load required packages
library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tableone)
library(VGAM)
## Loading required package: stats4
## Loading required package: splines
##
## Attaching package: 'VGAM'
## The following object is masked from 'package:survey':
##
## calibrate
## The following object is masked from 'package:car':
##
## logit
#load eclsk data
load("~/OneDrive/UTSA/7283 Stats II/eclsk_k5.Rdata")
#extract variables for assignment
mydat<-c("childid", "x_chsex_r", "x_raceth_r", "p2hscale", "x2povty", "x12par1ed_i", "p2curmar", "w2p0", "w2p0str", "w2p0psu", "w1c0psu", "w1c0str" )
eclsk<-data.frame(eclskk5[,mydat])
rm(eclskk5)
#recode sex
eclsk$chsex<-Recode(eclsk$x_chsex_r, recodes = "1=1; 2=2; -9=NA")
#recode perceived child health
eclsk$chhealth<-Recode(eclsk$p2hscale, recodes = "1:2='0Exc/VGood'; 3='1Good'; 4='2Poor'; else=NA", as.factor=TRUE)
#parent 1 education
eclsk$educ<-Recode(eclsk$x12par1ed_i, recodes = "1='0Prim'; 2='1somehs'; 3='2hsgrad'; 4:5='3somecol'; 6:9='4colgrad'; -9=NA", as.factor=TRUE)
eclsk$educ<-relevel(eclsk$educ, ref='2hsgrad')
#race
eclsk$raceth<-Recode(eclsk$x_raceth_r, recodes="1='nh white'; 2='nh black'; 3:4='hispanic'; 5:7='nh other'; 8='nh multirace'; -9=NA", as.factor=TRUE)
eclsk$raceth<-relevel(eclsk$raceth, ref='nh white')
#poverty
eclsk$pov<-Recode(eclsk$x2povty, recodes ="1=1; 2:3=0; -9=NA", as.factor=TRUE)
#single parent household
eclsk$curmar<-Recode(eclsk$p2curmar, recodes="2:3='Sep/Div'; 4='Single'; -9:-7=NA; else='Union'", as.factor=TRUE)
eclsk$curmar<-relevel(eclsk$curmar, ref='Union')
#child health as number
eclsk$chhealthnum<-Recode(eclsk$p2hscale, recodes = "1:2=1;3=2;4=3;else=NA", as.factor=F)
sub<-eclsk%>%
select(chhealth, educ, raceth, pov, curmar, w2p0, w2p0str, chhealthnum)%>%
filter(complete.cases(.))
#survey design
options(survey.lonely.psu="adjust")
des<-svydesign(ids=~1, strata=~w2p0str, weights=~w2p0, data=sub)
fit.solr1<-svyolr(chhealth~pov+educ+raceth+curmar, des)
summary(fit.solr1)
## Call:
## svyolr(chhealth ~ pov + educ + raceth + curmar, des)
##
## Coefficients:
## Value Std. Error t value
## pov1 0.28132167 0.07237813 3.8868323
## educ0Prim 0.53590369 0.12053176 4.4461616
## educ1somehs 0.22041201 0.10670202 2.0656779
## educ3somecol -0.16897745 0.07951238 -2.1251714
## educ4colgrad -0.52584190 0.09122682 -5.7641149
## racethhispanic 0.44582468 0.07546508 5.9076953
## racethnh black 0.14999078 0.10157982 1.4765805
## racethnh multirace 0.09807859 0.15205969 0.6450006
## racethnh other 0.41414199 0.10870102 3.8099182
## curmarSep/Div 0.10170616 0.08889822 1.1440741
## curmarSingle 0.13857761 0.08400287 1.6496772
##
## Intercepts:
## Value Std. Error t value
## 0Exc/VGood|1Good 2.0312 0.0775 26.2004
## 1Good|2Poor 3.9374 0.0973 40.4839
#calculate AIC
fit.solr1$deviance+2*length(fit.solr1$coefficients)
## [1] 11462.13
#Examining the proportional odds assumption by fitting logits for each change
ex1<-svyglm(I(chhealthnum>1)~pov+educ+raceth+curmar, des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
ex2<-svyglm(I(chhealthnum>2)~pov+educ+raceth+curmar, des, family="binomial")
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
plot(coef(ex1)[-1], ylim=c(-1, 1), type="l",xaxt="n",
ylab="Beta", main=c("Comparison of betas for", " proportional odds assumption"))
lines(coef(ex2)[-1], col=1, lty=2)
axis(side=1, at=1:11, labels=F)
text(x=1:11, y=-1.3, srt = 90, pos = 1, xpd = TRUE,cex=.8,
labels = c( "Poverty", "PrimarySch","SomeHS", "SomeColl", "CollGrad",
"Hispanic","Black","Multirace", "Other", "Sep/Div", "Single" ))
legend("bottomright",col=c(1,1),lty=c(1,2), legend=c(">1", ">2"))
lines(coef(fit.solr1)[c(-13:-16)], col=4, lwd=2)
Using a visual graph, it doesn’t appear the proportional odds assumption was met. ## Odds Ratios
round(exp(rbind(coef(ex1)[-1], coef(ex2)[-1])),3)
## pov1 educ0Prim educ1somehs educ3somecol educ4colgrad racethhispanic
## [1,] 1.323 1.630 1.249 0.850 0.593 1.564
## [2,] 1.457 2.703 1.200 0.705 0.473 1.528
## racethnh black racethnh multirace racethnh other curmarSep/Div
## [1,] 1.161 1.096 1.519 1.099
## [2,] 1.310 1.473 1.619 1.452
## curmarSingle
## [1,] 1.136
## [2,] 1.314
#Proportional Odds
fit.vgam<-vglm(as.ordered(chhealth)~pov+educ+raceth+curmar, sub,
weights = w2p0,
family=cumulative(parallel = T, reverse = T)) #<-parallel = T == proportional odds
summary(fit.vgam)
##
## Call:
## vglm(formula = as.ordered(chhealth) ~ pov + educ + raceth + curmar,
## family = cumulative(parallel = T, reverse = T), data = sub,
## weights = w2p0)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -20.69 -6.888 -5.334 -4.083 121.2
## logitlink(P[Y>=3]) -17.27 -2.295 -1.690 -1.335 274.8
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -2.031209 0.003947 -514.62 <2e-16 ***
## (Intercept):2 -3.937355 0.004995 -788.18 <2e-16 ***
## pov1 0.281322 0.003728 75.47 <2e-16 ***
## educ0Prim 0.535902 0.006111 87.69 <2e-16 ***
## educ1somehs 0.220410 0.005376 41.00 <2e-16 ***
## educ3somecol -0.168978 0.004114 -41.08 <2e-16 ***
## educ4colgrad -0.525842 0.004719 -111.43 <2e-16 ***
## racethhispanic 0.445831 0.003933 113.37 <2e-16 ***
## racethnh black 0.149992 0.005169 29.02 <2e-16 ***
## racethnh multirace 0.098076 0.007853 12.49 <2e-16 ***
## racethnh other 0.414142 0.006437 64.34 <2e-16 ***
## curmarSep/Div 0.101706 0.004615 22.04 <2e-16 ***
## curmarSingle 0.138579 0.004318 32.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 3408226 on 25927 degrees of freedom
##
## Log-likelihood: -1704113 on 25927 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## No Hauck-Donner effect found in any of the estimates
##
##
## Exponentiated coefficients:
## pov1 educ0Prim educ1somehs
## 1.3248801 1.7089896 1.2465880
## educ3somecol educ4colgrad racethhispanic
## 0.8445277 0.5910578 1.5617869
## racethnh black racethnh multirace racethnh other
## 1.1618247 1.1030462 1.5130712
## curmarSep/Div curmarSingle
## 1.1070577 1.1486402
#Non Proportional Odds
fit.vgam2<-vglm(as.ordered(chhealth)~pov+educ+raceth+curmar, sub,
weights = w2p0,
family=cumulative(parallel = F, reverse = T)) #<-parallel = F == non proportional odds
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in Deviance.categorical.data.vgam(mu = mu, y = y, w = w, residuals
## = residuals, : fitted values close to 0 or 1
## Warning in slot(family, "validparams")(eta, y, extra = extra): It seems
## that the nonparallelism assumption has resulted in intersecting linear/
## additive predictors. Try propodds() or fitting a partial nonproportional
## odds model or choosing some other link function, etc.
## Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 = Xm2, :
## iterations terminated because half-step sizes are very small
## Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 =
## Xm2, : some quantities such as z, residuals, SEs may be inaccurate due to
## convergence at a half-step
## Warning in log(prob): NaNs produced
summary(fit.vgam2)
##
## Call:
## vglm(formula = as.ordered(chhealth) ~ pov + educ + raceth + curmar,
## family = cumulative(parallel = F, reverse = T), data = sub,
## weights = w2p0)
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## logitlink(P[Y>=2]) -5.515e+03 -7.094 -5.532 -4.276 1.102e+09
## logitlink(P[Y>=3]) -1.119e+09 -2.417 -1.677 -1.322 6.107e+03
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept):1 -2.0025126 0.0035746 -560.20 <2e-16 ***
## (Intercept):2 -3.9834656 0.0065464 -608.50 <2e-16 ***
## pov1:1 0.3270470 0.0018321 178.51 <2e-16 ***
## pov1:2 0.4803280 0.0017150 280.08 <2e-16 ***
## educ0Prim:1 0.8008928 0.0035263 227.12 <2e-16 ***
## educ0Prim:2 2.5270505 0.0064829 389.81 <2e-16 ***
## educ1somehs:1 0.3213465 0.0051777 62.06 <2e-16 ***
## educ1somehs:2 0.3158788 0.0102057 30.95 <2e-16 ***
## educ3somecol:1 -0.1690064 0.0040388 -41.85 <2e-16 ***
## educ3somecol:2 -0.3345375 0.0089554 -37.36 <2e-16 ***
## educ4colgrad:1 -0.4270146 0.0044465 -96.03 <2e-16 ***
## educ4colgrad:2 -0.4938786 0.0099267 -49.75 <2e-16 ***
## racethhispanic:1 0.4577472 0.0029980 152.69 <2e-16 ***
## racethhispanic:2 0.3662095 0.0029500 124.14 <2e-16 ***
## racethnh black:1 0.1067705 0.0002186 488.39 <2e-16 ***
## racethnh black:2 0.1608509 0.0002073 776.06 <2e-16 ***
## racethnh multirace:1 0.0637587 0.0030761 20.73 <2e-16 ***
## racethnh multirace:2 0.2928314 0.0028275 103.57 <2e-16 ***
## racethnh other:1 0.3696913 0.0036549 101.15 <2e-16 ***
## racethnh other:2 0.3542516 0.0036083 98.18 <2e-16 ***
## curmarSep/Div:1 0.0867624 0.0032014 27.10 <2e-16 ***
## curmarSep/Div:2 0.3932389 0.0030848 127.48 <2e-16 ***
## curmarSingle:1 0.1433863 0.0031969 44.85 <2e-16 ***
## curmarSingle:2 0.3294964 0.0030213 109.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Names of linear predictors: logitlink(P[Y>=2]), logitlink(P[Y>=3])
##
## Residual deviance: 6247308 on 25916 degrees of freedom
##
## Log-likelihood: NA on 25916 degrees of freedom
##
## Number of Fisher scoring iterations: 2
##
## Warning: Hauck-Donner effect detected in the following estimate(s):
## '(Intercept):2', 'pov1:1', 'educ0Prim:1', 'racethhispanic:1', 'racethnh black:1', 'racethnh multirace:1', 'racethnh other:1', 'curmarSep/Div:1', 'curmarSingle:1'
##
##
## Exponentiated coefficients:
## pov1:1 pov1:2 educ0Prim:1
## 1.3868667 1.6166046 2.2275287
## educ0Prim:2 educ1somehs:1 educ1somehs:2
## 12.5165337 1.3789833 1.3714640
## educ3somecol:1 educ3somecol:2 educ4colgrad:1
## 0.8445035 0.7156690 0.6524540
## educ4colgrad:2 racethhispanic:1 racethhispanic:2
## 0.6102548 1.5805094 1.4422574
## racethnh black:1 racethnh black:2 racethnh multirace:1
## 1.1126789 1.1745098 1.0658352
## racethnh multirace:2 racethnh other:1 racethnh other:2
## 1.3402168 1.4472878 1.4251137
## curmarSep/Div:1 curmarSep/Div:2 curmarSingle:1
## 1.0906376 1.4817723 1.1541756
## curmarSingle:2
## 1.3902678
#Ordinal Model AIC calculation
fit.solr1$deviance+2*length(fit.solr1$coefficients)
## [1] 11462.13
AIC(fit.vgam)
## [1] 3408252
AIC(fit.vgam2)
## [1] NaN
#fit.vgam2$deviance+2*length(fitvgam2$coefficients) <- this didn't work
I’m not sure how to fix this the non proportional odds model, but I am not able to get an AIC from it, and I get a very long error message. Based on the two AICs that are provided, the ordinal model is the best fit.
Based on the model results, those in poverty are more likely to report better health in their children than those who are not. It also appears that as education increases, the liklihood of reporting good health for their children decreases. We also see that Hispanics are more likely to report better health in their children than NH Whites. This doesn’t seem correct, so I may have done something incorrectly.
#round(exp(cbind(coef(fit.solr1)[-1], confint(fit.solr1))),3)
summary(fit.solr1)
## Call:
## svyolr(chhealth ~ pov + educ + raceth + curmar, des)
##
## Coefficients:
## Value Std. Error t value
## pov1 0.28132167 0.07237813 3.8868323
## educ0Prim 0.53590369 0.12053176 4.4461616
## educ1somehs 0.22041201 0.10670202 2.0656779
## educ3somecol -0.16897745 0.07951238 -2.1251714
## educ4colgrad -0.52584190 0.09122682 -5.7641149
## racethhispanic 0.44582468 0.07546508 5.9076953
## racethnh black 0.14999078 0.10157982 1.4765805
## racethnh multirace 0.09807859 0.15205969 0.6450006
## racethnh other 0.41414199 0.10870102 3.8099182
## curmarSep/Div 0.10170616 0.08889822 1.1440741
## curmarSingle 0.13857761 0.08400287 1.6496772
##
## Intercepts:
## Value Std. Error t value
## 0Exc/VGood|1Good 2.0312 0.0775 26.2004
## 1Good|2Poor 3.9374 0.0973 40.4839
round(exp(coef(fit.solr1)[-1]), 3)
## educ0Prim educ1somehs educ3somecol
## 1.709 1.247 0.845
## educ4colgrad racethhispanic racethnh black
## 0.591 1.562 1.162
## racethnh multirace racethnh other curmarSep/Div
## 1.103 1.513 1.107
## curmarSingle 0Exc/VGood|1Good 1Good|2Poor
## 1.149 7.623 51.283
round(exp(confint(fit.solr1)), 3)
## 2.5 % 97.5 %
## pov1 1.150 1.527
## educ0Prim 1.349 2.164
## educ1somehs 1.011 1.537
## educ3somecol 0.723 0.987
## educ4colgrad 0.494 0.707
## racethhispanic 1.347 1.811
## racethnh black 0.952 1.418
## racethnh multirace 0.819 1.486
## racethnh other 1.223 1.872
## curmarSep/Div 0.930 1.318
## curmarSingle 0.974 1.354
## 0Exc/VGood|1Good 6.549 8.874
## 1Good|2Poor 48.202 54.561