library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(ggplot2)
library(pander)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(haven)
NSDUH_2019 <- read_sav("NSDUH_2019.SAV")
View(NSDUH_2019)
nams<-names(NSDUH_2019)
head(nams, n=10)
## [1] "QUESTID2" "FILEDATE" "CIGEVER" "CIGOFRSM" "CIGWILYR" "CIGTRY"
## [7] "CIGYFU" "CIGMFU" "CIGREC" "CIG30USE"
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(NSDUH_2019)<-newnames
NSDUH_2019$attempt_suicide<-Recode(NSDUH_2019$ADWRSATP, recodes="1=1; 2=0;else=NA")
## Warning: Unknown or uninitialised column: `ADWRSATP`.
## Main variable hopeless last thirty days
NSDUH_2019$hopelesslst30days<-Recode(NSDUH_2019$dsthop30,
recodes="1:2=3;3:4=2;5=1; else=NA")
NSDUH_2019$fltnervouslst30days<-Recode(NSDUH_2019$dstnrv30,
recodes="1:2=3;3:4=2;5=1; else=NA")
NSDUH_2019$fltrestlesslst30days<-Recode(NSDUH_2019$dstrst30,
recodes="1:2=3;3:4=2;5=1; else=NA")
NSDUH_2019$fltsadlst30days<-Recode(NSDUH_2019$dstchr30,
recodes="1:2=3;3:4=2;5=1; else=NA")
NSDUH_2019$effortlst30days<-Recode(NSDUH_2019$dsteff30,
recodes="1:2=3;3:4=2;5=1; else=NA")
NSDUH_2019$fltdwnlst30days<-Recode(NSDUH_2019$dstngd30,
recodes="1:2=3;3:4=2;5=1; else=NA")
## marital status
NSDUH_2019$marst<-Recode(NSDUH_2019$irmarit, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; else=NA", as.factor=T)
NSDUH_2019$marst<-relevel(NSDUH_2019$marst, ref='married')
## education recodes
NSDUH_2019$educ<-Recode(NSDUH_2019$ireduhighst2, recodes="1:7='LssThnHgh'; 8='highschool'; 9='someCollege'; 10='associates'; 11='colgrad';else=NA", as.factor=T)
NSDUH_2019$educ<-relevel(NSDUH_2019$educ, ref='colgrad')
## sexuality recodes
NSDUH_2019$sexuality<-Recode(NSDUH_2019$sexident, recodes="1='Heterosexual'; 2='Les/Gay'; 3='Bisexual';else=NA", as.factor=T)
NSDUH_2019$sexuality<-relevel(NSDUH_2019$sexuality, ref='Heterosexual')
## gender recodes
NSDUH_2019$male<-as.factor(ifelse(NSDUH_2019$irsex==1, "Male", "Female"))
## Race recoded items
NSDUH_2019$black<-Recode(NSDUH_2019$newrace2, recodes="2=1; 9=NA; else=0")
NSDUH_2019$white<-Recode(NSDUH_2019$newrace2, recodes="1=1; 9=NA; else=0")
NSDUH_2019$other<-Recode(NSDUH_2019$newrace2, recodes="3:4=1; 9=NA; else=0")
NSDUH_2019$mult_race<-Recode(NSDUH_2019$newrace2, recodes="6=1; 9=NA; else=0")
NSDUH_2019$asian<-Recode(NSDUH_2019$newrace2, recodes="5=1; 9=NA; else=0")
NSDUH_2019$hispanic<-Recode(NSDUH_2019$newrace2, recodes="7=1; 9=NA; else=0")
NSDUH_2019$race_eth<-Recode(NSDUH_2019$newrace2,
recodes="1='white'; 2='black'; 3='other'; 4='asian'; 5='mult_race'; 6='hispanic'; else=NA",
as.factor = T)
NSDUH_2019$race_eth<-relevel(NSDUH_2019$race_eth, ref='white')
NSDUH_2019$lst_alc_use2<-Recode(NSDUH_2019$iralcrc, recodes="1='last 30days'; 2='12>1month'; 3='>12months'; else=NA", as.factor=T)
NSDUH_2019$dep_year2<-Recode(NSDUH_2019$amdeyr, recodes="1=1; 2=0;else=NA", as.factor=T)
NSDUH_2019$age_cat<-Recode(NSDUH_2019$age2, recodes="7:8='18-19'; 9:10='20-21'; 11='22-23'; 12='24-25'; 13='26-29'; 14='30-34'; 15='35-49'; 16='50-64'; 17='65+'; else=NA", as.factor=T)
NSDUH_2019b<-NSDUH_2019%>%
filter(complete.cases(analwtc, vestr, age_cat, race_eth,educ, marst, sexuality, income, lst_alc_use2, dep_year2, male, hopelesslst30days, fltnervouslst30days, fltrestlesslst30days, fltsadlst30days, effortlst30days, fltdwnlst30days))%>%
select(analwtc, vestr, age_cat, race_eth,educ, marst, sexuality, income, lst_alc_use2, dep_year2, male, hopelesslst30days, fltnervouslst30days, fltrestlesslst30days, fltsadlst30days, effortlst30days, fltdwnlst30days)%>%
mutate_at(vars(hopelesslst30days, fltnervouslst30days, fltrestlesslst30days, fltsadlst30days, effortlst30days, fltdwnlst30days), scale)
NSDUH.pc<-princomp(~hopelesslst30days+fltnervouslst30days+fltrestlesslst30days+fltsadlst30days+effortlst30days+ fltdwnlst30days,
data=NSDUH_2019b,
scores=T)
#Screeplot
screeplot(NSDUH.pc,
type = "l",
main = "Scree Plot")
abline(h=1)
## Principal Components
summary(NSDUH.pc)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.945812 0.8207626 0.69750355 0.65072422 0.56273335
## Proportion of Variance 0.631052 0.1122790 0.08108794 0.07057606 0.05277992
## Cumulative Proportion 0.631052 0.7433310 0.82441895 0.89499500 0.94777493
## Comp.6
## Standard deviation 0.55976767
## Proportion of Variance 0.05222507
## Cumulative Proportion 1.00000000
loadings(NSDUH.pc)
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## hopelesslst30days 0.429 0.272 0.345 0.625 0.481
## fltnervouslst30days 0.376 -0.626 0.256 0.622
## fltrestlesslst30days 0.386 -0.546 -0.736
## fltsadlst30days 0.430 0.325 0.173 -0.763 0.304
## effortlst30days 0.396 0.107 -0.868 0.250
## fltdwnlst30days 0.429 0.344 0.146 0.103 -0.813
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.167 0.167 0.167 0.167 0.167 0.167
## Cumulative Var 0.167 0.333 0.500 0.667 0.833 1.000
#Report the results of the PCA, being sure to include the eigenvalues and #corresponding vectors. Interpret your component(s) if possible # According to the summary statistics and correlation matrix, only one of the six componentns is useful with an eigenvalue of 1.94, in comp1 this model also consists of about 63% of all the variation across the components, and variance amongs all of the variables. This suggests there is only 1 real component. All other components are too far below 1 to be considered for further analysis. Component 1 has all variables being postiviely correlated with its component.Again, noting that greater numbers equal worse off psychological well-being. In this case the magnitude of the variables do not seem to be tha far off from each other, and in fact half of the variables appear to be pretty consistent magnitude wise. No other component will be considered as, the aigenvalues were below 1. The closest would be component 2 but the cutoff from yesterday was at least near 1, in this case being .82. Correlations ran on components 1 and 2 indicate they are orthogonal or are not correlated. Comp 1 appears to be more right skewed, with more people having better mental health.
scores<-data.frame(NSDUH.pc$scores)
hist(scores$Comp.1)
hist(scores$Comp.2)
cor(scores[,1:2])
## Comp.1 Comp.2
## Comp.1 1.000000e+00 6.231176e-14
## Comp.2 6.231176e-14 1.000000e+00
NSDUHc<-cbind(NSDUH_2019b, scores)
names(NSDUHc)
## [1] "analwtc" "vestr" "age_cat"
## [4] "race_eth" "educ" "marst"
## [7] "sexuality" "income" "lst_alc_use2"
## [10] "dep_year2" "male" "hopelesslst30days"
## [13] "fltnervouslst30days" "fltrestlesslst30days" "fltsadlst30days"
## [16] "effortlst30days" "fltdwnlst30days" "Comp.1"
## [19] "Comp.2" "Comp.3" "Comp.4"
## [22] "Comp.5" "Comp.6"
names(NSDUHc)
## [1] "analwtc" "vestr" "age_cat"
## [4] "race_eth" "educ" "marst"
## [7] "sexuality" "income" "lst_alc_use2"
## [10] "dep_year2" "male" "hopelesslst30days"
## [13] "fltnervouslst30days" "fltrestlesslst30days" "fltsadlst30days"
## [16] "effortlst30days" "fltdwnlst30days" "Comp.1"
## [19] "Comp.2" "Comp.3" "Comp.4"
## [22] "Comp.5" "Comp.6"
data.frame(colnames(NSDUHc))
## colnames.NSDUHc.
## 1 analwtc
## 2 vestr
## 3 age_cat
## 4 race_eth
## 5 educ
## 6 marst
## 7 sexuality
## 8 income
## 9 lst_alc_use2
## 10 dep_year2
## 11 male
## 12 hopelesslst30days
## 13 fltnervouslst30days
## 14 fltrestlesslst30days
## 15 fltsadlst30days
## 16 effortlst30days
## 17 fltdwnlst30days
## 18 Comp.1
## 19 Comp.2
## 20 Comp.3
## 21 Comp.4
## 22 Comp.5
## 23 Comp.6
round(cor(NSDUHc[,c(-1:-11, -18:-24)]), 3)
## hopelesslst30days fltnervouslst30days fltrestlesslst30days
## hopelesslst30days 1.000 0.515 0.514
## fltnervouslst30days 0.515 1.000 0.573
## fltrestlesslst30days 0.514 0.573 1.000
## fltsadlst30days 0.680 0.491 0.515
## effortlst30days 0.549 0.472 0.505
## fltdwnlst30days 0.681 0.485 0.511
## fltsadlst30days effortlst30days fltdwnlst30days
## hopelesslst30days 0.680 0.549 0.681
## fltnervouslst30days 0.491 0.472 0.485
## fltrestlesslst30days 0.515 0.505 0.511
## fltsadlst30days 1.000 0.575 0.685
## effortlst30days 0.575 1.000 0.580
## fltdwnlst30days 0.685 0.580 1.000
round(cor(NSDUHc[,c(-1:-11, -21:-27)]),3)
## hopelesslst30days fltnervouslst30days fltrestlesslst30days
## hopelesslst30days 1.000 0.515 0.514
## fltnervouslst30days 0.515 1.000 0.573
## fltrestlesslst30days 0.514 0.573 1.000
## fltsadlst30days 0.680 0.491 0.515
## effortlst30days 0.549 0.472 0.505
## fltdwnlst30days 0.681 0.485 0.511
## Comp.1 0.834 0.732 0.751
## Comp.2 0.223 -0.514 -0.448
## Comp.3 0.241 0.179 -0.068
## fltsadlst30days effortlst30days fltdwnlst30days Comp.1
## hopelesslst30days 0.680 0.549 0.681 0.834
## fltnervouslst30days 0.491 0.472 0.485 0.732
## fltrestlesslst30days 0.515 0.505 0.511 0.751
## fltsadlst30days 1.000 0.575 0.685 0.836
## effortlst30days 0.575 1.000 0.580 0.771
## fltdwnlst30days 0.685 0.580 1.000 0.835
## Comp.1 0.836 0.771 0.835 1.000
## Comp.2 0.267 0.088 0.283 0.000
## Comp.3 0.121 -0.606 0.102 0.000
## Comp.2 Comp.3
## hopelesslst30days 0.223 0.241
## fltnervouslst30days -0.514 0.179
## fltrestlesslst30days -0.448 -0.068
## fltsadlst30days 0.267 0.121
## effortlst30days 0.088 -0.606
## fltdwnlst30days 0.283 0.102
## Comp.1 0.000 0.000
## Comp.2 1.000 0.000
## Comp.3 0.000 1.000