library(ipumsr)
ddi <- read_ipums_ddi("nhis_00002.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
data<- haven::zap_labels(data)
names(data) <- tolower(gsub(pattern = "_",replacement = "",x = names(data)))
data <- data%>%
filter(age >=16 & age<=24)
data2 <- data%>%
filter(complete.cases(sampweight, strata, badhealth, medical_care_cost, healthinsurace_coverage,smoke_100cig, anxiety_disoreder, medication_for_worry,medication_for_depression, educ,opportunity_youth_cat_num, opportunity_youth_cat, urban_rural, familyincome,poverty2, sex ))%>%
select(sampweight, strata, badhealth, medical_care_cost,smoke_100cig, anxiety_disoreder, medication_for_worry,medication_for_depression, healthinsurace_coverage, opportunity_youth_cat_num, opportunity_youth_cat, urban_rural, familyincome,poverty2, sex, educ, race_eth) %>%
mutate_at(vars(badhealth, smoke_100cig, anxiety_disoreder, medication_for_worry, medication_for_depression, medical_care_cost, healthinsurace_coverage), scale)
data.pc <- PCA(data2 [,3:8],
scale.unit = T,
graph= F
)
#Report the results of the PCA, being sure to include the eigenvalues and corresponding vectors. Interpret your component(s) if possible
eigenvalues <- data.pc$eig
head(eigenvalues[, 1:2])
## eigenvalue percentage of variance
## comp 1 2.2525429 37.542382
## comp 2 1.1684750 19.474583
## comp 3 0.9099391 15.165652
## comp 4 0.8655742 14.426236
## comp 5 0.5515383 9.192305
## comp 6 0.2519305 4.198842
fviz_screeplot(data.pc, ncp=10)
desc<- dimdesc(data.pc)
desc$Dim.1
## $quanti
## correlation p.value
## medication_for_worry 0.8822963 0.000000e+00
## medication_for_depression 0.8539053 0.000000e+00
## anxiety_disoreder 0.7763685 0.000000e+00
## badhealth 0.2724535 2.876779e-64
## smoke_100cig 0.1914909 4.954563e-32
## medical_care_cost 0.1769015 1.648817e-27
##
## attr(,"class")
## [1] "condes" "list"
desc$Dim.2
## $quanti
## correlation p.value
## medical_care_cost 0.6574857 0.000000e+00
## smoke_100cig 0.6512687 0.000000e+00
## badhealth 0.4695618 3.201458e-203
## medication_for_depression -0.2046424 1.958925e-36
## medication_for_worry -0.2228455 4.752133e-43
##
## attr(,"class")
## [1] "condes" "list"
data2$pc1 <- data.pc$ind$coord[, 1]
options(survey.lonely.psu = "adjust")
des<- svydesign( ids= ~1,
strata = ~strata,
weights= ~sampweight,
data= data2)
library(ggplot2)
ggplot(aes(x=opportunity_youth_cat, y=pc1, group=opportunity_youth_cat),
data=data2) +
geom_boxplot()
data2$educ<- forcats:: fct_relevel(data2$educ, .x= c("1Less than HS", "2hsgrad", "3More than HS" ))
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
ggplot(aes(x=educ, y=pc1),
data=data2) +
geom_boxplot()
ggplot(aes(x=urban_rural, y=pc1),
data=data2) +
geom_boxplot()
ggplot(aes(x=sex, y=pc1),
data=data2) +
geom_boxplot()
data2$pc1 <- data.pc$ind$coord[, 1]
options(survey.lonely.psu = "adjust")
des<- svydesign( ids= ~1,
strata = ~strata,
weights= ~sampweight,
data= data2)
#If deemed appropriate, conduct some testing of your index/components/latent variables.
fit.1 <- svyglm(pc1~ factor(opportunity_youth_cat) +educ + sex + (race_eth),
des,
family= gaussian)
summary(fit.1)
##
## Call:
## svyglm(formula = pc1 ~ factor(opportunity_youth_cat) + educ +
## sex + (race_eth), design = des, family = gaussian)
##
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~sampweight,
## data = data2)
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.66964 0.11447 5.850
## factor(opportunity_youth_cat)Not opportunity youth -0.24141 0.08739 -2.762
## educ2hsgrad -0.08300 0.08616 -0.963
## educ3More than HS -0.07134 0.08606 -0.829
## sexMale -0.47140 0.05268 -8.948
## race_ethotherminority -0.45384 0.05168 -8.782
## Pr(>|t|)
## (Intercept) 5.35e-09 ***
## factor(opportunity_youth_cat)Not opportunity youth 0.00577 **
## educ2hsgrad 0.33540
## educ3More than HS 0.40717
## sexMale < 2e-16 ***
## race_ethotherminority < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1.976604)
##
## Number of Fisher Scoring iterations: 2