Proposed topic: Advserse health conditions associated with opportunity youths
Outcome variabels: Health conditions such Health status, depression, anxiety, smoking etc. The purpose of this assignment I would focus on self rated health status of opportunity youths
Qst: What socio-demographic factors are associated with opportunity youths?. Facors that might affect the outcome variable include gender, country of birth(Us born or not), racial-ethnicity, etc.
– Racial ethnicity – Opportunity youth status
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats)
library(ipumsr)
ddi <- read_ipums_ddi("nhis_00002.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
names(data) <- tolower(gsub(pattern = "_",replacement = "",x = names(data)))
#sex
data$male<-as.factor(ifelse(data$sex==1, "Male", "Female"))
#race/ethnicity
data$black<- car::Recode(data$hisprace,
recodes="03=1; 99=NA; else=0")
data$white<- car::Recode(data$hisprace,
recodes="02=1; 99=NA; else=0")
data$other<- car::Recode(data$hisprace,
recodes="4:7=1; 99=NA; else=0")
data$hispanic<- car::Recode(data$hisprace,
recodes="01=1; 99=NA; else=0")
data$hisprace<- as.factor(data$hisprace)
data$race_eth<-car::Recode(data$hisprace,
recodes="01='hispanic'; 02='nhwhite'; 03='nh black';04:06='nh other'; 07='nh multirace'; else=NA",
as.factor = T)
data$race_eth<-relevel(data$race_eth,
ref = "nhwhite")
#education level
data$educ<- as.factor(data$educ)
data$educ<- car::Recode(data$educ,
recodes="102:103='0Prim'; 116='1somehs'; 201:202='2hsgrad'; 301:303='3somecol'; 400:503='4colgrad';997:999=NA;000=NA",
as.factor=T)
data$educ<-fct_relevel(data$educ,'0Prim','1somehs','2hsgrad','3somecol','4colgrad' )
#Urban-rural classification
data$urbrrl<- as.factor(data$urbrrl)
data$urban_rural<- car::Recode(data$urbrrl,
recodes="1:3='urban'; 4='rural';000=NA",
as.factor=T)
data$urban_rural<-relevel(data$urban_rural, ref='rural')
#employment status
data$unemploy<- car::Recode(data$empstat,
recodes="200=1; 00=NA; 999=NA; else=0")
data$empstat<- as.factor(data$empstat)
data$employ_status<- car::Recode(data$empstat,
recodes="100='Employed'; 200='unemployed';else=NA",
as.factor=T)
data$employ_status<-relevel(data$employ_status, ref='Employed')
# currently in school
data$non_schooling<- car::Recode(data$schoolnow,
recodes="1=1; 0=NA; 7:9=NA; else=0")
data$schoolnow<- as.factor(data$schoolnow)
data$schoolstatus<- car::Recode(data$schoolnow,
recodes="1='no'; 2='yes';else=NA",
as.factor=T)
data$schoolstatus<-relevel(data$schoolstatus, ref='no')
# merging schooling and working
data$opportunity_youth <- paste( data$unemploy, data$non_schooling, sep ="")
data$opportunity_youth<- as.factor(data$opportunity_youth)
data$opportunity_youth_cat<- car::Recode(data$opportunity_youth,
recodes="11='Opportunity youth';00:11='Not opportunity youth';else=NA",
as.factor=T)
data$opportunity_youth_cat<-relevel(data$opportunity_youth_cat, ref='Opportunity youth')
#income grouping
data$familyincome <- data$incfam07on
# born in the US
data$usborn<- car::Recode(data$usborn,
recodes="20=1; 97:98=NA; else=0")
# US Citizen
data$citizen<- car::Recode(data$citizen,
recodes="2=1; 8:9=NA; else=0")
#last employed
data$emplast<- as.factor(data$emplast)
data$employedlast<- car::Recode(data$emplast,
recodes="1='Within past 12months'; 2='1-5years ago'; 3='over 5years ago'; 4='never worked';else=NA",
as.factor=T)
data$employedlast<-relevel(data$employedlast, ref='1-5years ago')
#HEALTH VARIABLES
#Poor or fair self rated health
data$badhealth<-car::Recode(data$health, recodes="4:5=1; 1:3=0; else=NA")
#place for medical care
data$usualpl<- as.factor(data$usualpl)
data$medicalplace<- car::Recode(data$usualpl,
recodes="1='no'; 2:3:='yes';else=NA",
as.factor=T)
data$medicalplace<-relevel(data$medicalplace, ref='no')
# delayed medical care due to cost
data$delaycost<- as.factor(data$delaycost)
data$medical_care_cost<- car::Recode(data$delaycost,
recodes="2='yes'; 1='no';else=NA",
as.factor=T)
data$medical_care_cost<-relevel(data$medical_care_cost, ref='yes')
# worried about paying medical bills
data$wormedbill<- as.factor(data$wormedbill)
data$medical_bill_worried<- car::Recode(data$wormedbill,
recodes="1='very worried'; 2='somewhat worried'; 3='not at all';else=NA",
as.factor=T)
data$medical_bill_worried<-relevel(data$medical_bill_worried, ref='very worried')
# unable to pay medical bills
data$hiunablepay<- as.factor(data$hiunablepay)
data$medical_bill_unabletopay<- car::Recode(data$hiunablepay,
recodes="1='no'; 2='yes';else=NA",
as.factor=T)
data$medical_bill_unabletopay<-relevel(data$medical_bill_unabletopay, ref='yes')
# health insurance coverage
data$hinotcove <- as.factor(data$hinotcove)
data$healthinsurace_coverage<- car::Recode(data$hinotcove,
recodes="1='no, has coverage'; 2='yes, no coverage';else=NA",
as.factor=T)
data$healthinsurace_coverage<-relevel(data$healthinsurace_coverage, ref='yes, no coverage')
# dont have health insurance cuz of cost
data$hinocostr <- as.factor(data$hinocostr)
data$nohealthinsurace_cost<- car::Recode(data$hinocostr,
recodes="1='no'; 2='yes';else=NA",
as.factor=T)
data$nohealthinsurace_cost<-relevel(data$nohealthinsurace_cost, ref='yes')
# used medication in the past year
data$premedyr <- as.factor(data$premedyr)
data$usedmedications<- car::Recode(data$premedyr,
recodes="1='no'; 2='yes';else=NA",
as.factor=T)
data$usedmedications<-relevel(data$usedmedications, ref='yes')
# if they smoked
data$smokfreqnow <- as.factor(data$smokfreqnow)
data$smoke_frequently<- car::Recode(data$smokfreqnow,
recodes="1='no'; 2:3='somedays/everyday';else=NA",
as.factor=T)
data$smoke_frequently<-relevel(data$smoke_frequently, ref='somedays/everyday')
# smoked up to 100 cigarreth in life time
data$smokev <- as.factor(data$smokev)
data$smoke_100cig<- car::Recode(data$smokev,
recodes="1='no'; 2='yes';else=NA",
as.factor=T)
data$smoke_100cig<-relevel(data$smoke_100cig, ref='yes')
#Mental health
# ever had anxiety disoreder
data$anxietyev <- as.factor(data$anxietyev)
data$anxiety_disoreder<- car::Recode(data$anxietyev,
recodes="1='no'; 2='yes';else=NA",
as.factor=T)
data$anxiety_disoreder<-relevel(data$anxiety_disoreder, ref='yes')
# medication for worrying
data$worrx <- as.factor(data$worrx)
data$medication_for_worry<- car::Recode(data$worrx,
recodes="1='no'; 2='yes';else=NA",
as.factor=T)
data$medication_for_worry<-relevel(data$medication_for_worry, ref='yes')
# medication for depression
data$deprx <- as.factor(data$deprx)
data$medication_for_depression<- car::Recode(data$deprx,
recodes="1='no'; 2='yes';else=NA",
as.factor=T)
data$medication_for_depression<-relevel(data$medication_for_depression, ref='yes')
# level of worry
data$worfeelevl <- as.factor(data$worfeelevl)
data$level_of_worry<- car::Recode(data$worfeelevl,
recodes="1='alot'; 2='a little'; 3='btw little and alot';else=NA",
as.factor=T)
data$level_of_worry<-relevel(data$level_of_worry, ref='alot')
# level of depression
data$depfeelevl <- as.factor(data$depfeelevl)
data$level_of_depression<- car::Recode(data$depfeelevl,
recodes="1='alot'; 2='a little'; 3='btw little and alot';else=NA",
as.factor=T)
data$level_of_depression<-relevel(data$level_of_depression, ref='alot')
data <- data%>%
filter(age >=16 & age<=24)
data<-data%>%
filter(is.na(educ)==F,
is.na(badhealth)==F)
table(data$opportunity_youth_cat)
##
## Opportunity youth Not opportunity youth
## 399 3366
table(data$opportunity_youth_cat, data$male)
##
## Female Male
## Opportunity youth 231 168
## Not opportunity youth 1669 1697
table(data$opportunity_youth_cat, data$race_eth)
##
## nhwhite hispanic nh black nh multirace nh other
## Opportunity youth 192 100 68 13 26
## Not opportunity youth 1897 758 340 93 278
table(data$opportunity_youth_cat, data$urban_rural)
##
## rural urban
## Opportunity youth 62 337
## Not opportunity youth 417 2949
table(data$opportunity_youth_cat, data$badhealth)
##
## 0 1
## Opportunity youth 349 50
## Not opportunity youth 3211 155
100*prop.table(table(data$opportunity_youth_cat, data$badhealth), margin=2)
##
## 0 1
## Opportunity youth 9.803371 24.390244
## Not opportunity youth 90.196629 75.609756
#basic chi square test of independence
chisq.test(table(data$badhealth, data$opportunity_youth_cat))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(data$badhealth, data$opportunity_youth_cat)
## X-squared = 42.006, df = 1, p-value = 9.1e-11
library(srvyr)
##
## Attaching package: 'srvyr'
## The following object is masked from 'package:stats':
##
## filter
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~strata, weights=~sampweight, data = data )
des
## Stratified Independent Sampling design (with replacement)
## svydesign(ids = ~1, strata = ~strata, weights = ~sampweight,
## data = data)
#compare that with the original, unweighted proportions
prop.table(table(data$badhealth,
data$opportunity_youth_cat),
margin=2)
##
## Opportunity youth Not opportunity youth
## 0 0.87468672 0.95395128
## 1 0.12531328 0.04604872
cat<-svyby(formula = ~badhealth,
by=~opportunity_youth_cat,
design = des,
FUN=svymean)
svychisq(~badhealth+sex, design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~badhealth + sex, design = des)
## F = 4.4058, ndf = 1, ddf = 3809, p-value = 0.03588
knitr::kable(cat,
caption = "Survey Estimates of Poor SRH by Opportunity Youths",
align = 'c',
format = "html")
opportunity_youth_cat | badhealth | se | |
---|---|---|---|
Opportunity youth | Opportunity youth | 0.1058106 | 0.0167357 |
Not opportunity youth | Not opportunity youth | 0.0430556 | 0.0039799 |
sv.table<-svyby(formula = ~badhealth,
by = ~opportunity_youth_cat,
design = des,
FUN = svymean,
na.rm=T)
knitr::kable(sv.table,
caption = "Survey Estimates of Poor SRH by Opportunity Youths",
align = 'c',
format = "html")
opportunity_youth_cat | badhealth | se | |
---|---|---|---|
Opportunity youth | Opportunity youth | 0.1058106 | 0.0167357 |
Not opportunity youth | Not opportunity youth | 0.0430556 | 0.0039799 |
#Make a survey design that is random sampling - no survey information
nodes<-svydesign(ids = ~1, weights = ~1, data = data)
sv.table<-svyby(formula = ~factor(badhealth),
by = ~opportunity_youth_cat,
design = nodes,
FUN = svymean,
na.rm=T)
knitr::kable(sv.table,
caption = "Estimates of Poor SRH by Opportunity youth - No survey design",
align = 'c',
format = "html")
opportunity_youth_cat | factor(badhealth)0 | factor(badhealth)1 | se.factor(badhealth)0 | se.factor(badhealth)1 | |
---|---|---|---|---|---|
Opportunity youth | Opportunity youth | 0.8746867 | 0.1253133 | 0.0165766 | 0.0165766 |
Not opportunity youth | Not opportunity youth | 0.9539513 | 0.0460487 | 0.0036130 | 0.0036130 |
library(srvyr)
data%>%
as_survey_design(strata = strata,
weights = sampweight)%>%
group_by(opportunity_youth_cat)%>%
summarise(mean_bh = survey_mean(badhealth, na.rm=T))%>%
ungroup()
## # A tibble: 3 × 3
## opportunity_youth_cat mean_bh mean_bh_se
## <fct> <dbl> <dbl>
## 1 Opportunity youth 0.106 0.0167
## 2 Not opportunity youth 0.0431 0.00398
## 3 <NA> 0.0654 0.0256
library(gtsummary)
data%>%
as_survey_design( strata =strata,
weights = sampweight)%>%
select(badhealth, opportunity_youth_cat)%>%
tbl_svysummary(by = opportunity_youth_cat,
label = list(badhealth = "Fair/Poor Health"))%>%
add_p()%>%
add_n()
## 96 observations missing `opportunity_youth_cat` have been removed. To include these observations, use `forcats::fct_explicit_na()` on `opportunity_youth_cat` column before passing to `tbl_svysummary()`.
## ℹ Column(s) badhealth are class "haven_labelled". This is an intermediate datastructure not meant for analysis. Convert columns with `haven::as_factor()`, `labelled::to_factor()`, `labelled::unlabelled()`, and `unclass()`. "haven_labelled" value labels are ignored when columns are not converted. Failure to convert may have unintended consequences or result in error.
## • https://haven.tidyverse.org/articles/semantics.html
## • https://larmarange.github.io/labelled/articles/intro_labelled.html#unlabelled
Characteristic | N | Opportunity youth, N = 6,943,2031 | Not opportunity youth, N = 50,000,9401 | p-value2 |
---|---|---|---|---|
Fair/Poor Health | 56,944,143 | 734,664 (11%) | 2,152,821 (4.3%) | <0.001 |
1
n (%)
2
chi-squared test with Rao & Scott's second-order correction
|
fit1<-lm(badhealth~opportunity_youth_cat+race_eth,
data=data)
fit2<-lm(badhealth~opportunity_youth_cat+race_eth,
data=data,
weights = sampweight)
fit3<-svyglm(badhealth~opportunity_youth_cat+race_eth,
design = des,
family=gaussian)
Ans: There are no much substantive differences in the descriptive statistics
library(ggplot2)
library(dplyr)
coefs<-data.frame(coefs=c(coef(fit1)[-1], coef(fit3)[-1]),
mod=c(rep("Non Survey Model", 10),rep("Survey Model", 10)),
effect=rep(names(coef(fit1)[-1]), 2))
coefs%>%
ggplot()+
geom_point(aes( x=effect,
y=coefs,
group=effect,
color=effect,
shape=mod),
position=position_jitterdodge(jitter.width = 1),
size=2)+
ylab("Regression Coefficient")+
xlab("Beta")+
geom_abline(intercept = 0, slope=0)+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
ggtitle(label = "Comparison of Survey and Non-Survey Regression effects")