##I use nationally representative cohort of U.S. children enrolled in the Early Childhood Longitudinal Study, Kindergarten Class of 2010–11 (ECLS-K:2011) for this homework. My research question concerns the association between insurance coverage and race/ethnicity and income. By using logistic regression model I would like to examine from which ethnical/racial groups children are more likely to have health insurance. Therefore the dependent variable is health insurance coverage, while predictors include race/ethnicity, income and health status of children.

## Dependent variable
sub$h_insure<-Recode(sub$p2cover,recodes="2=0;1=1;else=NA") #0 uninsured, 1 insured
head(sub$h_insure)
## [1] 1 1 1 1 0 1
## Predictors
#Race(NH-White,NH-Black,Hispanic,Other)
sub$race<-ifelse(sub$x2par1rac==1 ,"NH-White",ifelse(sub$x2par1rac==2,"NH-Black",
                        ifelse(sub$x2par1rac==3 |                       sub$x2par1rac==4,"Hispanic","Other")))
sub$race<-as.factor(sub$race)
head(sub$race)
## [1] NH-White NH-White NH-White NH-White NH-White NH-White
## Levels: Hispanic NH-Black NH-White Other
#Income 
sub$income<-Recode(sub$p2hilow,recodes="1='less_eq25000';2='more25000';else=NA",as.factor=TRUE) 
head(sub$income)
## [1] more25000 more25000 more25000 more25000 more25000 more25000
## Levels: less_eq25000 more25000
#Child health 
sub$ch_health<-Recode(sub$p2hscale,recodes="1='excellent';2='very good';3='good';4='fair/poor';else=NA",as.factor=TRUE)
head(sub$ch_health)
## [1] very good excellent excellent excellent excellent very good
## Levels: excellent fair/poor good very good
##Select variables
sub<-sub%>%
  filter( complete.cases(.))
#Survey design object
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~w2p0psu, strata=~w2p0str, weights=~w2p0, data =sub , nest=T)
##Logistic regression

fit.logit<-svyglm(formula=h_insure~income+race+ch_health,design= des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit.logit)
## 
## Call:
## svyglm(formula = h_insure ~ income + race + ch_health, design = des, 
##     family = binomial)
## 
## Survey design:
## svydesign(ids = ~w2p0psu, strata = ~w2p0str, weights = ~w2p0, 
##     data = sub, nest = T)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.14971    0.11989  17.931  < 2e-16 ***
## incomemore25000     0.27796    0.13128   2.117   0.0356 *  
## raceNH-Black        0.80714    0.16784   4.809 3.21e-06 ***
## raceNH-White        0.98468    0.14165   6.952 6.50e-11 ***
## raceOther           0.72687    0.15235   4.771 3.79e-06 ***
## ch_healthfair/poor  0.12672    0.31804   0.398   0.6908    
## ch_healthgood      -0.26300    0.12784  -2.057   0.0411 *  
## ch_healthvery good -0.08672    0.10037  -0.864   0.3887    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9949763)
## 
## Number of Fisher Scoring iterations: 6
#From the logistic regression results we can see that Hispanics are less likely to have health insurance than other ethnic/racial categoties. Likewise, those children with household income more than 25,000 are more likely to be covered. As for health status variable children with reported good health are less likely to have health insurance than those with excellent health status.
##Odds ratios and confidence intervals
knitr::kable(data.frame(OR = exp(coef(fit.logit)), ci = exp(confint(fit.logit))))
OR ci.2.5.. ci.97.5..
(Intercept) 8.5823457 6.7850850 10.8556721
incomemore25000 1.3204278 1.0208700 1.7078860
raceNH-Black 2.2414935 1.6131245 3.1146344
raceNH-White 2.6769650 2.0280230 3.5335604
raceOther 2.0686029 1.5345987 2.7884280
ch_healthfair/poor 1.1350971 0.6085846 2.1171180
ch_healthgood 0.7687439 0.5983667 0.9876338
ch_healthvery good 0.9169332 0.7531870 1.1162786
#Race: NH-Whites are 167% more likely to have health insurance than Hispanics
#Income: Children from high-income households are 32% more likely to have insurance coverage than children from low-income households (however, the difference is not significant)
#Health status: Children with reported good heealth are 23% less likely to have health insurance than children whose health status is excellent.
##Fitted values
dat<-expand.grid(race=levels(sub$race),ch_health=levels(sub$ch_health),
                 income=levels(sub$income))
levels(eclskk5$income)
## Warning: Unknown or uninitialised column: 'income'.
## NULL
fit<-predict(fit.logit, newdat=dat,type="response")
dat$fitted.prob.lrm<-round(fit, 3)
#Print the fitted probabilities for the first 10 cases
knitr::kable(head(dat, n=10))
race ch_health income fitted.prob.lrm
Hispanic excellent less_eq25000 0.896
NH-Black excellent less_eq25000 0.951
NH-White excellent less_eq25000 0.958
Other excellent less_eq25000 0.947
Hispanic fair/poor less_eq25000 0.907
NH-Black fair/poor less_eq25000 0.956
NH-White fair/poor less_eq25000 0.963
Other fair/poor less_eq25000 0.953
Hispanic good less_eq25000 0.868
NH-Black good less_eq25000 0.937
##Comparing probabilities
dat[which(dat$race=="NH-White" & dat$ch_health=="excellent" & dat$income=="more25000"),]
##        race ch_health    income fitted.prob.lrm
## 19 NH-White excellent more25000           0.968
dat[which(dat$race=="Hispanic" & dat$ch_health=="good" & dat$income=="less_eq25000"),]
##       race ch_health       income fitted.prob.lrm
## 9 Hispanic      good less_eq25000           0.868
#Interesting cases
#We comparing NH-White high-income with excellent who reported excellent health to Hispanic with low income who reported good health. The first case has an estimated probability of having health insurance of 97%, while the second case has 87% chance to have insurance coverage.