library(forcats)
library(stargazer, quietly = T)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey, quietly = T)
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(car, quietly = T)
library(questionr, quietly = T)
library(dplyr, quietly = T)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats, quietly = T)
library(tidyverse, quietly = T)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v readr 2.1.2
## v tibble 3.1.6 v purrr 0.3.4
## v tidyr 1.2.0 v stringr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x dplyr::recode() masks car::recode()
## x purrr::some() masks car::some()
## x tidyr::unpack() masks Matrix::unpack()
library(srvyr, quietly = T)
##
## Attaching package: 'srvyr'
## The following object is masked from 'package:stats':
##
## filter
library( gtsummary, quietly = T)
## #Uighur
library(caret, quietly = T)
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
## The following object is masked from 'package:survival':
##
## cluster
library(tableone, quietly = T)
library(stargazer, quietly = T)
library(survey, quietly = T)
library(ggplot2, quietly = T)
library(pander, quietly = T)
## Warning: package 'pander' was built under R version 4.1.3
library(knitr, quietly = T)
library(dplyr, quietly = T)
library(factoextra, quietly = T)
## Warning: package 'factoextra' was built under R version 4.1.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(FactoMineR, quietly = T)
## Warning: package 'FactoMineR' was built under R version 4.1.3
library(ipumsr)
## Warning: package 'ipumsr' was built under R version 4.1.3
library(haven)
ddi <- read_ipums_ddi("C:/Users/spara/OneDrive/Desktop/project/nhis_00008.xml")
data <- read_ipums_micro(ddi)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
data<- haven::zap_labels(data)
names(data) <- tolower(gsub(pattern = "_",replacement = "",x = names(data)))
Recode variables
#depression level
data$depfeelevl<-car::Recode (data$depfeelevl,
recodes="1=1; 2=2; 3=0; else=NA")
# medication for depression
data$deprx<- car::Recode(data$deprx,
recodes="1=0; 2=1;else=NA")
data$deprx_cat <- as.factor(data$deprx)
data$deprx_cat<- car::Recode(data$deprx,
recodes="0='No'; 1='Yes';else=NA",
as.factor=T)
# ever had anxiety disorder
data$anxietydisorder<- car::Recode(data$anxietyev,
recodes="1=0; 2=1;else=NA")
# medication for worrying
data$worrymed<- car::Recode(data$worrx,
recodes="1=0; 2=1;else=NA")
# level of worry
data$worfeelevl<- car::Recode (data$worfeelevl,
recodes="1=1; 2=2; 3=0; else=NA")
# feel afraid past two weeks
data$gadfear<- car::Recode (data$gadfear,
recodes="1=1; 2=2; 3=0; else=NA")
#currently Pregnant
data$curpreg<-car::Recode(data$pregnantnow,
recodes="0='Yes';else=NA",
as.factor=T)
#education level
data$educ<-Recode(data$educ,
recodes="102 =1; 201=2; 301=3;
400= 4; 501= 5;else=NA")
data$educ_cat<-Recode(data$educ,
recodes="1 ='NoSchool'; 2='HS Diploma'; 3='Some college';
4= 'Undergrad'; 5= 'Masters';else=NA", as.factor = T)
#employment status
data$empstat<- Recode(data$empstat,
recodes="100=1; 200=2; else=NA")
data$empstat_cat<-Recode(data$empstat,
recodes="1 ='Employed'; 2='Unemployed';else=NA", as.factor = T)
#last time employed
data$employedlasttime<- car::Recode(data$emplast,
recodes="1=1; 2=2; 3=3; 4=4;else=NA")
data$employedlasttime_cat<- car::Recode(data$emplast,
recodes="1='Within past 12months'; 2='1-5years ago'; 3='over 5years ago'; 4='never worked';else=NA",
as.factor=T)
# total combined family income
data$familyincome <- data$incfam07on
data$familyincome<- car::Recode(data$incfam07on,
recodes="11=1;12=2;22=3;23=4;24=5;else=NA")
data$familyincome_cat<- car::Recode(data$incfam07on,
recodes="11='$0 - $34,999';12='$35,000 - $49,999';22='$50,000 - $74,999';23='$75,000 - $99,999';24='$100,000 and over';else=NA",
as.factor=T)
# health insurance coverage
data$healthinsuracecov<- car::Recode(data$hinotcove,
recodes="1=0; 2=1;else=NA")
#poverty
data$poverty<- car::Recode(data$poverty,
recodes="10:14=1; 20:25=2; 30:37=3;else=NA")
##race/ethnicity
data$race<- car::Recode(data$racea,
recodes="100 ='White'; 200 ='African American';
400:434= 'Asian'; 500:590 = 'Other'; else=NA",
as.factor=T)
## marital status
data$mars<- car::Recode(data$marstat,
recodes ="10:13='Married'; 20='Widowed'; 30='Divorced';
40='Separated'; 50='Never Married'; else=NA",
as.factor=T)
data$mars<- car::Recode(data$marstat,
recodes ="10:13=1; 20=2; 30=3;
40=4; 50=5; else=NA")
data<-data%>%
filter(is.na(curpreg)==F)
data2 <- data%>%
filter(complete.cases(sampweight, strata, educ,deprx,depfeelevl,anxietyev,worrx,worfeelevl,gadfear, empstat,healthinsuracecov ,race, mars, poverty, incfam07on, familyincome_cat))%>%
select(sampweight, strata, educ, deprx, depfeelevl,anxietyev,worrx,worfeelevl,gadfear,healthinsuracecov, employedlasttime, poverty, race, mars, deprx_cat, educ_cat, familyincome_cat, empstat) %>%
mutate_at(vars(deprx, depfeelevl,anxietyev,worrx,worfeelevl,), scale)
data.pc <- PCA(data2 [,4:8],
scale.unit = T,
graph= F
)
eigenvalues <- data.pc$eig
head(eigenvalues[, 1:2])
## eigenvalue percentage of variance
## comp 1 1.9405196 38.810393
## comp 2 1.2092131 24.184261
## comp 3 0.7767927 15.535854
## comp 4 0.6425498 12.850996
## comp 5 0.4309248 8.618496
The first two components account for 63% of the variation in the input variable. In addition, the first two eigenvalues are greater than 1 (1.94 and 1.20 respectively)indicating that only these two values satisfied the condition.
fviz_screeplot(data.pc, ncp=10)
data.pc$var
## $coord
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## deprx 0.84130712 -0.048444839 0.08422886 -0.2045955 -0.49081726
## depfeelevl 0.21857790 0.754079373 -0.59059041 -0.1795183 0.05063735
## anxietyev 0.73179985 0.006441197 -0.11862182 0.6662048 0.08079282
## worrx 0.80396980 -0.105814519 0.22146826 -0.3360751 0.42478373
## worfeelevl -0.05520147 0.791828422 0.59814868 0.1081127 -0.02214401
##
## $cor
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## deprx 0.84130712 -0.048444839 0.08422886 -0.2045955 -0.49081726
## depfeelevl 0.21857790 0.754079373 -0.59059041 -0.1795183 0.05063735
## anxietyev 0.73179985 0.006441197 -0.11862182 0.6662048 0.08079282
## worrx 0.80396980 -0.105814519 0.22146826 -0.3360751 0.42478373
## worfeelevl -0.05520147 0.791828422 0.59814868 0.1081127 -0.02214401
##
## $cos2
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## deprx 0.707797677 2.346902e-03 0.007094501 0.04185934 0.2409015818
## depfeelevl 0.047776300 5.686357e-01 0.348797036 0.03222682 0.0025641410
## anxietyev 0.535531026 4.148902e-05 0.014071136 0.44382887 0.0065274802
## worrx 0.646367439 1.119671e-02 0.049048189 0.11294644 0.1804412210
## worfeelevl 0.003047202 6.269923e-01 0.357781844 0.01168835 0.0004903573
##
## $contrib
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## deprx 36.4746463 0.194085104 0.9133068 6.514567 55.9033948
## depfeelevl 2.4620364 47.025269770 44.9022028 5.015459 0.5950321
## anxietyev 27.5972999 0.003431076 1.8114403 69.073068 1.5147609
## worrx 33.3089871 0.925950335 6.3141928 17.577850 41.8730203
## worfeelevl 0.1570302 51.851263715 46.0588573 1.819057 0.1137919
Interestingly, level of worry, nervous, or anxious feelings (worfeelevl) is not very correlated with the principal component variables (-0.05.
fviz_pca_var(data.pc,
col.var= "contrib")+
theme_minimal()
fviz_pca_ind(data.pc,
label= "none",
col.ind= "cos2") +
scale_color_gradient2(low= "blue",
mid= "white",
high= "red",
midpoint = .5) +
theme_minimal()
desc<- dimdesc(data.pc)
desc$Dim.1
## $quanti
## correlation p.value
## deprx 0.84130712 0.000000e+00
## worrx 0.80396980 1.808576e-293
## anxietyev 0.73179985 4.666106e-217
## depfeelevl 0.21857790 1.937054e-15
## worfeelevl -0.05520147 4.728083e-02
##
## attr(,"class")
## [1] "condes" "list"
desc$Dim.2
## $quanti
## correlation p.value
## worfeelevl 0.7918284 1.606004e-278
## depfeelevl 0.7540794 8.778035e-238
## worrx -0.1058145 1.387085e-04
##
## attr(,"class")
## [1] "condes" "list"
data2$pc1 <- data.pc$ind$coord[, 1]
options(survey.lonely.psu = "adjust")
des<- svydesign( ids= ~1,
strata = ~strata,
weights= ~sampweight,
data= data2)
Now, the analysis will look at variation in mental health index across education level, race, and family income.
ggplot(aes(x=educ_cat, y=pc1),
data=data2) +
geom_boxplot()
ggplot(aes(x=race, y=pc1),
data=data2) +
geom_boxplot()
ggplot(aes(x=familyincome_cat, y=pc1),
data=data2) +
geom_boxplot()
fit.1 <- svyglm(pc1~educ_cat +familyincome_cat + (race),
des,
family= gaussian)
summary(fit.1)
##
## Call:
## svyglm(formula = pc1 ~ educ_cat + familyincome_cat + (race),
## design = des, family = gaussian)
##
## Survey design:
## svydesign(ids = ~1, strata = ~strata, weights = ~sampweight,
## data = data2)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.09009 0.15076 -0.598 0.55022
## educ_catMasters -0.07536 0.14906 -0.506 0.61327
## educ_catNoSchool 0.16885 0.67110 0.252 0.80139
## educ_catSome college 0.15290 0.12403 1.233 0.21789
## educ_catUndergrad -0.07237 0.11691 -0.619 0.53600
## familyincome_cat$100,000 and over -0.40961 0.12980 -3.156 0.00164 **
## familyincome_cat$35,000 - $49,999 -0.22344 0.13756 -1.624 0.10456
## familyincome_cat$50,000 - $74,999 0.03570 0.15171 0.235 0.81402
## familyincome_cat$75,000 - $99,999 -0.48484 0.15787 -3.071 0.00218 **
## raceAsian -0.59131 0.21155 -2.795 0.00527 **
## raceOther -0.62833 0.29262 -2.147 0.03197 *
## raceWhite 0.27073 0.16347 1.656 0.09795 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1.813777)
##
## Number of Fisher Scoring iterations: 2
Compared to women with HS Diploma, mental health index is lower among undergraduate and Masters degree whereas it is higher among women with no school and some college degree. In regard to family income, the mental health index is lower among women who have higher family income but relatively higher with income starting from 35-74k. Finally, compared to Black women, mental health index is higher among White women whereas lower among Asian and Other women.