library(car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(ggplot2)
library(pander)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
library(haven)
NSDUH_2019 <- read_sav("NSDUH_2019.SAV")
View(NSDUH_2019)
nams<-names(NSDUH_2019)
head(nams, n=10)
##  [1] "QUESTID2" "FILEDATE" "CIGEVER"  "CIGOFRSM" "CIGWILYR" "CIGTRY"  
##  [7] "CIGYFU"   "CIGMFU"   "CIGREC"   "CIG30USE"
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(NSDUH_2019)<-newnames
NSDUH_2019$attempt_suicide<-Recode(NSDUH_2019$ADWRSATP, recodes="1=1; 2=0;else=NA")
## Warning: Unknown or uninitialised column: `ADWRSATP`.
## Main variable hopeless last thirty days
NSDUH_2019$hopelesslst30days<-Recode(NSDUH_2019$dsthop30,
                               recodes="1:2=3;3:4=2;5=1; else=NA")
NSDUH_2019$fltnervouslst30days<-Recode(NSDUH_2019$dstnrv30,
                               recodes="1:2=3;3:4=2;5=1; else=NA")
NSDUH_2019$fltrestlesslst30days<-Recode(NSDUH_2019$dstrst30,
                               recodes="1:2=3;3:4=2;5=1; else=NA")
NSDUH_2019$fltsadlst30days<-Recode(NSDUH_2019$dstchr30,
                               recodes="1:2=3;3:4=2;5=1; else=NA")
NSDUH_2019$effortlst30days<-Recode(NSDUH_2019$dsteff30,
                               recodes="1:2=3;3:4=2;5=1; else=NA")
NSDUH_2019$fltdwnlst30days<-Recode(NSDUH_2019$dstngd30,
                               recodes="1:2=3;3:4=2;5=1; else=NA")

## marital status
NSDUH_2019$marst<-Recode(NSDUH_2019$irmarit, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; else=NA", as.factor=T)
NSDUH_2019$marst<-relevel(NSDUH_2019$marst, ref='married')

## education recodes
NSDUH_2019$educ<-Recode(NSDUH_2019$ireduhighst2, recodes="1:7='LssThnHgh'; 8='highschool'; 9='someCollege'; 10='associates'; 11='colgrad';else=NA", as.factor=T)
NSDUH_2019$educ<-relevel(NSDUH_2019$educ, ref='colgrad')

## sexuality recodes
NSDUH_2019$sexuality<-Recode(NSDUH_2019$sexident, recodes="1='Heterosexual'; 2='Les/Gay'; 3='Bisexual';else=NA", as.factor=T)
NSDUH_2019$sexuality<-relevel(NSDUH_2019$sexuality, ref='Heterosexual')

## gender recodes
NSDUH_2019$male<-as.factor(ifelse(NSDUH_2019$irsex==1, "Male", "Female"))

## Race recoded items
NSDUH_2019$black<-Recode(NSDUH_2019$newrace2, recodes="2=1; 9=NA; else=0")
NSDUH_2019$white<-Recode(NSDUH_2019$newrace2, recodes="1=1; 9=NA; else=0")
NSDUH_2019$other<-Recode(NSDUH_2019$newrace2, recodes="3:4=1; 9=NA; else=0")
NSDUH_2019$mult_race<-Recode(NSDUH_2019$newrace2, recodes="6=1; 9=NA; else=0")
NSDUH_2019$asian<-Recode(NSDUH_2019$newrace2, recodes="5=1; 9=NA; else=0")
NSDUH_2019$hispanic<-Recode(NSDUH_2019$newrace2, recodes="7=1; 9=NA; else=0")
NSDUH_2019$race_eth<-Recode(NSDUH_2019$newrace2,
                          recodes="1='white'; 2='black'; 3='other'; 4='asian'; 5='mult_race'; 6='hispanic'; else=NA",
                          as.factor = T)
NSDUH_2019$race_eth<-relevel(NSDUH_2019$race_eth, ref='white')
NSDUH_2019$lst_alc_use2<-Recode(NSDUH_2019$iralcrc, recodes="1='last 30days'; 2='12>1month'; 3='>12months'; else=NA", as.factor=T)
NSDUH_2019$dep_year2<-Recode(NSDUH_2019$amdeyr, recodes="1=1; 2=0;else=NA", as.factor=T)
NSDUH_2019$age_cat<-Recode(NSDUH_2019$age2, recodes="7:8='18-19'; 9:10='20-21'; 11='22-23'; 12='24-25'; 13='26-29'; 14='30-34'; 15='35-49'; 16='50-64'; 17='65+'; else=NA", as.factor=T)
NSDUH_2019b<-NSDUH_2019%>%
  filter(complete.cases(analwtc, vestr, age_cat, race_eth,educ, marst, sexuality, income, lst_alc_use2, dep_year2, male, hopelesslst30days, fltnervouslst30days, fltrestlesslst30days, fltsadlst30days, effortlst30days, fltdwnlst30days))%>%
  select(analwtc, vestr, age_cat, race_eth,educ, marst, sexuality, income, lst_alc_use2, dep_year2, male, hopelesslst30days, fltnervouslst30days, fltrestlesslst30days, fltsadlst30days, effortlst30days, fltdwnlst30days)%>%
  mutate_at(vars(hopelesslst30days, fltnervouslst30days, fltrestlesslst30days, fltsadlst30days, effortlst30days, fltdwnlst30days), scale)
NSDUH.pc<-princomp(~hopelesslst30days+fltnervouslst30days+fltrestlesslst30days+fltsadlst30days+effortlst30days+ fltdwnlst30days,
                   data=NSDUH_2019b,
                   scores=T)

#Screeplot
screeplot(NSDUH.pc,
          type = "l",
          main = "Scree Plot")
abline(h=1)

## Principal Components

If you have an idea for latent construct, state what you believe this is.

The variables choosen for this homework assignment include poor indicators of psychological well-being including feeling hopeless, restless, that things required effort, not being able to be cheered up, feeling down, and nervous within the last 30 days. As such, they have been recoded with lower numbers indicating better health, while higher numbers meant worse off health. This index could be used to indicate poor psychological well-being, with higher levels meaning worse levels of psychological states.

summary(NSDUH.pc)
## Importance of components:
##                          Comp.1    Comp.2     Comp.3     Comp.4     Comp.5
## Standard deviation     1.945812 0.8207626 0.69750355 0.65072422 0.56273335
## Proportion of Variance 0.631052 0.1122790 0.08108794 0.07057606 0.05277992
## Cumulative Proportion  0.631052 0.7433310 0.82441895 0.89499500 0.94777493
##                            Comp.6
## Standard deviation     0.55976767
## Proportion of Variance 0.05222507
## Cumulative Proportion  1.00000000
loadings(NSDUH.pc)
## 
## Loadings:
##                      Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## hopelesslst30days     0.429  0.272  0.345         0.625  0.481
## fltnervouslst30days   0.376 -0.626  0.256  0.622              
## fltrestlesslst30days  0.386 -0.546        -0.736              
## fltsadlst30days       0.430  0.325  0.173        -0.763  0.304
## effortlst30days       0.396  0.107 -0.868  0.250              
## fltdwnlst30days       0.429  0.344  0.146         0.103 -0.813
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.167  0.167  0.167  0.167  0.167  0.167
## Cumulative Var  0.167  0.333  0.500  0.667  0.833  1.000

Report the summary statistics and correlation matrix for your data

#Report the results of the PCA, being sure to include the eigenvalues and #corresponding vectors. Interpret your component(s) if possible # According to the summary statistics and correlation matrix, only one of the six componentns is useful with an eigenvalue of 1.94, in comp1 this model also consists of about 63% of all the variation across the components, and variance amongs all of the variables. This suggests there is only 1 real component. All other components are too far below 1 to be considered for further analysis. Component 1 has all variables being postiviely correlated with its component.Again, noting that greater numbers equal worse off psychological well-being. In this case the magnitude of the variables do not seem to be tha far off from each other, and in fact half of the variables appear to be pretty consistent magnitude wise. No other component will be considered as, the aigenvalues were below 1. The closest would be component 2 but the cutoff from yesterday was at least near 1, in this case being .82. Correlations ran on components 1 and 2 indicate they are orthogonal or are not correlated. Comp 1 appears to be more right skewed, with more people having better mental health.

scores<-data.frame(NSDUH.pc$scores)
hist(scores$Comp.1)

hist(scores$Comp.2)

cor(scores[,1:2])
##              Comp.1       Comp.2
## Comp.1 1.000000e+00 6.231176e-14
## Comp.2 6.231176e-14 1.000000e+00
NSDUHc<-cbind(NSDUH_2019b, scores)
names(NSDUHc)
##  [1] "analwtc"              "vestr"                "age_cat"             
##  [4] "race_eth"             "educ"                 "marst"               
##  [7] "sexuality"            "income"               "lst_alc_use2"        
## [10] "dep_year2"            "male"                 "hopelesslst30days"   
## [13] "fltnervouslst30days"  "fltrestlesslst30days" "fltsadlst30days"     
## [16] "effortlst30days"      "fltdwnlst30days"      "Comp.1"              
## [19] "Comp.2"               "Comp.3"               "Comp.4"              
## [22] "Comp.5"               "Comp.6"
names(NSDUHc)
##  [1] "analwtc"              "vestr"                "age_cat"             
##  [4] "race_eth"             "educ"                 "marst"               
##  [7] "sexuality"            "income"               "lst_alc_use2"        
## [10] "dep_year2"            "male"                 "hopelesslst30days"   
## [13] "fltnervouslst30days"  "fltrestlesslst30days" "fltsadlst30days"     
## [16] "effortlst30days"      "fltdwnlst30days"      "Comp.1"              
## [19] "Comp.2"               "Comp.3"               "Comp.4"              
## [22] "Comp.5"               "Comp.6"
data.frame(colnames(NSDUHc))
##        colnames.NSDUHc.
## 1               analwtc
## 2                 vestr
## 3               age_cat
## 4              race_eth
## 5                  educ
## 6                 marst
## 7             sexuality
## 8                income
## 9          lst_alc_use2
## 10            dep_year2
## 11                 male
## 12    hopelesslst30days
## 13  fltnervouslst30days
## 14 fltrestlesslst30days
## 15      fltsadlst30days
## 16      effortlst30days
## 17      fltdwnlst30days
## 18               Comp.1
## 19               Comp.2
## 20               Comp.3
## 21               Comp.4
## 22               Comp.5
## 23               Comp.6
round(cor(NSDUHc[,c(-1:-11, -18:-24)]), 3)
##                      hopelesslst30days fltnervouslst30days fltrestlesslst30days
## hopelesslst30days                1.000               0.515                0.514
## fltnervouslst30days              0.515               1.000                0.573
## fltrestlesslst30days             0.514               0.573                1.000
## fltsadlst30days                  0.680               0.491                0.515
## effortlst30days                  0.549               0.472                0.505
## fltdwnlst30days                  0.681               0.485                0.511
##                      fltsadlst30days effortlst30days fltdwnlst30days
## hopelesslst30days              0.680           0.549           0.681
## fltnervouslst30days            0.491           0.472           0.485
## fltrestlesslst30days           0.515           0.505           0.511
## fltsadlst30days                1.000           0.575           0.685
## effortlst30days                0.575           1.000           0.580
## fltdwnlst30days                0.685           0.580           1.000
round(cor(NSDUHc[,c(-1:-11, -21:-27)]),3)
##                      hopelesslst30days fltnervouslst30days fltrestlesslst30days
## hopelesslst30days                1.000               0.515                0.514
## fltnervouslst30days              0.515               1.000                0.573
## fltrestlesslst30days             0.514               0.573                1.000
## fltsadlst30days                  0.680               0.491                0.515
## effortlst30days                  0.549               0.472                0.505
## fltdwnlst30days                  0.681               0.485                0.511
## Comp.1                           0.834               0.732                0.751
## Comp.2                           0.223              -0.514               -0.448
## Comp.3                           0.241               0.179               -0.068
##                      fltsadlst30days effortlst30days fltdwnlst30days Comp.1
## hopelesslst30days              0.680           0.549           0.681  0.834
## fltnervouslst30days            0.491           0.472           0.485  0.732
## fltrestlesslst30days           0.515           0.505           0.511  0.751
## fltsadlst30days                1.000           0.575           0.685  0.836
## effortlst30days                0.575           1.000           0.580  0.771
## fltdwnlst30days                0.685           0.580           1.000  0.835
## Comp.1                         0.836           0.771           0.835  1.000
## Comp.2                         0.267           0.088           0.283  0.000
## Comp.3                         0.121          -0.606           0.102  0.000
##                      Comp.2 Comp.3
## hopelesslst30days     0.223  0.241
## fltnervouslst30days  -0.514  0.179
## fltrestlesslst30days -0.448 -0.068
## fltsadlst30days       0.267  0.121
## effortlst30days       0.088 -0.606
## fltdwnlst30days       0.283  0.102
## Comp.1                0.000  0.000
## Comp.2                1.000  0.000
## Comp.3                0.000  1.000

According to the correlation anaylsis, the vairables used are all moderately correlated with each other. On the higher end of that spectrum feeling hopeless, sad, and down showed consistently higher scores. Perhaps because feeling sad, hopeless, or down are more indicative of a depressive epsiode. These correlations were at least at the upper .60 levels, so they correlated quite well positively with each other. It may be wiser to use just those three variables as opposed to all six. While feeling restless, nervous, or the need to exert effort had lower moderate scores in the lower to upper .50s and upper .40s are possibly more indicative of anxiety as opposed to depression. However, the correlation results for component 1 proved rather fruitful, as each of those variables were again postively correlated with one other, and additionally half of them were at .80 or higher. Again, as with what was shown in the initial correlation analysis the post .80 groups included feelings of hopelessness, down, and being sad within the ladt 30 days. However, this does not discount the other variables which still had a strong positive correlation at above .70. Overall, more of these feelings will result in worse off mental well-being. That being said additional analyses will be run.

options(survey.lonely.psu = "adjust")
NSDUHc$pc1q<-cut(NSDUHc$Comp.1,
                    breaks = quantile(NSDUHc$Comp.1,
                                      probs = c(0,.25,.5,.75,1) ,
                                      na.rm=T), 
                    include.lowest=T)
des<-svydesign(ids=~1,
               strata=~vestr,
               weights=~analwtc,
               data=NSDUHc)
library(ggplot2)
ggplot(aes(x=age_cat, y=Comp.1),
       data=NSDUHc)+
  geom_boxplot()

NSDUHc$marst<-factor(NSDUHc$marst, levels(NSDUHc$marst)[c(1,3,2,4,5)])

ggplot(aes(x=marst, y=Comp.1),
       data=NSDUHc)+
  geom_boxplot()

NSDUHc$sexuality<-factor(NSDUHc$sexuality, levels(NSDUHc$sexuality)[c(1,3,2,4,5)])

ggplot(aes(x=sexuality, y=Comp.1),
       data=NSDUHc)+
  geom_boxplot()

NSDUHc$male<-factor(NSDUHc$male, levels(NSDUHc$male)[c(1,3,2,4,5)])

ggplot(aes(x=male, y=Comp.1),
       data=NSDUHc)+
  geom_boxplot()

NSDUHc$race_eth<-factor(NSDUHc$race_eth, levels(NSDUHc$race_eth)[c(1,3,2,4,5)])

ggplot(aes(x=race_eth, y=Comp.1),
       data=NSDUHc)+
  geom_boxplot()

NSDUHc$lst_alc_use2<-factor(NSDUHc$lst_alc_use2, levels(NSDUHc$lst_alc_use2)[c(1,3,2,4,5)])

ggplot(aes(x=lst_alc_use2, y=Comp.1),
       data=NSDUHc)+
  geom_boxplot()

If deemed appropriate, conduct some testing of your index/components/latent variables.

According to the testing of my index, and again only comp 1 was used, the most variance is seen in my, marital status, and age category variables. On average it appears as age increases psychological well-being gets better. After the age variable several of the sub categories within each variable appear to be identical. For instance married and divorced people have similar levels of psychological well-being, while separated and widowed people are worse off. On average bisexuals and lesbians respectively are worse off psychologically than heterosexuals.Males have marginally worse levels of psychological well-being than females. Again, multuple races have similar levels, while hispanics appear to be the most worse off. Recency of alcohol usage does not appear to alter psychological wel-being.