Homework2

##I use nationally representative cohort of U.S. children enrolled in the Early Childhood Longitudinal Study, Kindergarten Class of 2010–11 (ECLS-K:2011) for this homework. My research question concerns the association between insurance coverage and race/ethnicity and income. By using logistic regression model I would like to examine from which ethnical/racial groups children are more likely to have health insurance. Therefore the dependent variable is health insurance coverage, while predictors include race/ethnicity, income and health status of children.

## Dependent variable
sub$h_insure<-Recode(sub$p2cover,recodes="2=0;1=1;else=NA") #0 uninsured, 1 insured
head(sub$h_insure)

## [1] 1 1 1 1 0 1

## Predictors
#Race(NH-White,NH-Black,Hispanic,Other)
sub$race<-ifelse(sub$x2par1rac==1 ,"NH-White",ifelse(sub$x2par1rac==2,"NH-Black",
                        ifelse(sub$x2par1rac==3 |                       sub$x2par1rac==4,"Hispanic","Other")))
sub$race<-as.factor(sub$race)
head(sub$race)

## [1] NH-White NH-White NH-White NH-White NH-White NH-White
## Levels: Hispanic NH-Black NH-White Other

#Income 
sub$income<-Recode(sub$p2hilow,recodes="1='less_eq25000';2='more25000';else=NA",as.factor=TRUE) 
head(sub$income)

## [1] more25000 more25000 more25000 more25000 more25000 more25000
## Levels: less_eq25000 more25000

#Child health 
sub$ch_health<-Recode(sub$p2hscale,recodes="1='excellent';2='very good';3='good';4='fair/poor';else=NA",as.factor=TRUE)
head(sub$ch_health)

## [1] very good excellent excellent excellent excellent very good
## Levels: excellent fair/poor good very good

##Select variables
sub<-sub%>%
  filter( complete.cases(.))

#Survey design object
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~w2p0psu, strata=~w2p0str, weights=~w2p0, data =sub , nest=T)

##Logistic regression

fit.logit<-svyglm(formula=h_insure~income+race+ch_health,design= des,family=binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

summary(fit.logit)

## 
## Call:
## svyglm(formula = h_insure ~ income + race + ch_health, design = des, 
##     family = binomial)
## 
## Survey design:
## svydesign(ids = ~w2p0psu, strata = ~w2p0str, weights = ~w2p0, 
##     data = sub, nest = T)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.14971    0.11989  17.931  < 2e-16 ***
## incomemore25000     0.27796    0.13128   2.117   0.0356 *  
## raceNH-Black        0.80714    0.16784   4.809 3.21e-06 ***
## raceNH-White        0.98468    0.14165   6.952 6.50e-11 ***
## raceOther           0.72687    0.15235   4.771 3.79e-06 ***
## ch_healthfair/poor  0.12672    0.31804   0.398   0.6908    
## ch_healthgood      -0.26300    0.12784  -2.057   0.0411 *  
## ch_healthvery good -0.08672    0.10037  -0.864   0.3887    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9949763)
## 
## Number of Fisher Scoring iterations: 6

#From the logistic regression results we can see that Hispanics are less likely to have health insurance than other ethnic/racial categoties. Likewise, those children with household income more than 25,000 are more likely to be covered. As for health status variable children with reported good health are less likely to have health insurance than those with excellent health status.

##Odds ratios and confidence intervals
knitr::kable(data.frame(OR = exp(coef(fit.logit)), ci = exp(confint(fit.logit))))

	OR	ci.2.5..	ci.97.5..
(Intercept)	8.5823457	6.7850850	10.8556721
incomemore25000	1.3204278	1.0208700	1.7078860
raceNH-Black	2.2414935	1.6131245	3.1146344
raceNH-White	2.6769650	2.0280230	3.5335604
raceOther	2.0686029	1.5345987	2.7884280
ch_healthfair/poor	1.1350971	0.6085846	2.1171180
ch_healthgood	0.7687439	0.5983667	0.9876338
ch_healthvery good	0.9169332	0.7531870	1.1162786

#Race: NH-Whites are 167% more likely to have health insurance than Hispanics
#Income: Children from high-income households are 32% more likely to have insurance coverage than children from low-income households (however, the difference is not significant)
#Health status: Children with reported good heealth are 23% less likely to have health insurance than children whose health status is excellent.

##Fitted values
dat<-expand.grid(race=levels(sub$race),ch_health=levels(sub$ch_health),
                 income=levels(sub$income))
levels(eclskk5$income)

## Warning: Unknown or uninitialised column: 'income'.

## NULL

fit<-predict(fit.logit, newdat=dat,type="response")
dat$fitted.prob.lrm<-round(fit, 3)
#Print the fitted probabilities for the first 10 cases
knitr::kable(head(dat, n=10))

race	ch_health	income	fitted.prob.lrm
Hispanic	excellent	less_eq25000	0.896
NH-Black	excellent	less_eq25000	0.951
NH-White	excellent	less_eq25000	0.958
Other	excellent	less_eq25000	0.947
Hispanic	fair/poor	less_eq25000	0.907
NH-Black	fair/poor	less_eq25000	0.956
NH-White	fair/poor	less_eq25000	0.963
Other	fair/poor	less_eq25000	0.953
Hispanic	good	less_eq25000	0.868
NH-Black	good	less_eq25000	0.937

##Comparing probabilities
dat[which(dat$race=="NH-White" & dat$ch_health=="excellent" & dat$income=="more25000"),]

##        race ch_health    income fitted.prob.lrm
## 19 NH-White excellent more25000           0.968

dat[which(dat$race=="Hispanic" & dat$ch_health=="good" & dat$income=="less_eq25000"),]

##       race ch_health       income fitted.prob.lrm
## 9 Hispanic      good less_eq25000           0.868

#Interesting cases
#We comparing NH-White high-income with excellent who reported excellent health to Hispanic with low income who reported good health. The first case has an estimated probability of having health insurance of 97%, while the second case has 87% chance to have insurance coverage.

Homework2

Asiya

2/22/2020