library(car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(questionr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.3     v stringr 1.4.0
## v tidyr   1.1.1     v forcats 0.5.0
## v readr   1.3.1
## -- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x tidyr::pack()   masks Matrix::pack()
## x dplyr::recode() masks car::recode()
## x purrr::some()   masks car::some()
## x tidyr::unpack() masks Matrix::unpack()
library(broom)
library(emmeans)
library(haven)
NSDUH_2019 <- read_sav("NSDUH_2019.SAV")
View(NSDUH_2019)
## attempted suicide
NSDUH_2019$attempt_suicide<-Recode(NSDUH_2019$ADWRSATP, recodes="1=1; 2=0;else=NA")
summary(NSDUH_2019$attempt_suicide, na.rm = TRUE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    0.25    1.00    1.00   52599
## marital status
NSDUH_2019$marst<-Recode(NSDUH_2019$IRMARIT, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; else=NA", as.factor=T)
NSDUH_2019$marst<-relevel(NSDUH_2019$marst, ref='married')

## education recodes
NSDUH_2019$educ<-Recode(NSDUH_2019$IREDUHIGHST2, recodes="1:7='LssThnHgh'; 8='Hs'; 9='SomeCollege'; 10='Associates'; 11='Colgrad';else=NA", as.factor=T)
NSDUH_2019$educ<-relevel(NSDUH_2019$educ, ref='Colgrad')

## sexuality recodes
NSDUH_2019$sexuality<-Recode(NSDUH_2019$SEXIDENT, recodes="1='Heterosexual'; 2='Les/Gay'; 3='Bisexual';else=NA", as.factor=T)
NSDUH_2019$sexuality<-relevel(NSDUH_2019$sexuality, ref='Heterosexual')

## gender recodes
NSDUH_2019$male<-as.factor(ifelse(NSDUH_2019$IRSEX==1, "Male", "Female"))

## Race recoded items
NSDUH_2019$black<-Recode(NSDUH_2019$NEWRACE2, recodes="2=1; 9=NA; else=0")
NSDUH_2019$white<-Recode(NSDUH_2019$NEWRACE2, recodes="1=1; 9=NA; else=0")
NSDUH_2019$NatamAlknat<-Recode(NSDUH_2019$NEWRACE2, recodes="3=1; 9=NA; else=0")
NSDUH_2019$NathaOthnat<-Recode(NSDUH_2019$NEWRACE2, recodes="4=1; 9=NA; else=0")
NSDUH_2019$mult_race<-Recode(NSDUH_2019$NEWRACE2, recodes="6=1; 9=NA; else=0")
NSDUH_2019$asian<-Recode(NSDUH_2019$NEWRACE2, recodes="5=1; 9=NA; else=0")
NSDUH_2019$hispanic<-Recode(NSDUH_2019$NEWRACE2, recodes="7=1; 9=NA; else=0")


NSDUH_2019$race_eth<-Recode(NSDUH_2019$NEWRACE2,
                          recodes="1='white'; 2='black'; 3='NatamAlknat';4='NathaOthnat'; 5='asian'; 6='mult_race'; 7='hispanic'; else=NA",
                          as.factor = T)

1. For this homework, you will perform a logistic regression (or probit regression) of a binary outcome, using the dataset of your choice. I ask you specify a research question for your analysis and generate appropriate predictors in order to examine your question.

## what social characteristics increase the odds of attempting suicide?

##2.Present results from a model with sample weights and design effects, if your data allow for this. Present the results in tabular form, with Parameter estimates, odds ratios (if using the logit model) and confidence intervals for the odds ratios.

## TABULAR DESIGN WITH DESIGN
sub<-NSDUH_2019 %>%
  select(attempt_suicide, marst, educ, sexuality, male, white, black, hispanic, NatamAlknat, NathaOthnat, mult_race, asian, race_eth, ANALWT_C, VESTR) %>%
  filter( complete.cases( . ))

options(survey.lonely.psu = "adjust")
des<-svydesign(ids= ~1,
               strata= ~VESTR,
               weights= ~ANALWT_C
               , data = sub )

Survey design cross tabs gender on attempting suicide

turtle<-svyby(formula = ~attempt_suicide,
           by = ~male,
           design = des,
           FUN = svymean,
           na.rm=T)

svychisq(~attempt_suicide+male,
         design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~attempt_suicide + male, design = des)
## F = 10.337, ndf = 1, ddf = 3454, p-value = 0.001316
turtle%>%
  ggplot()+
  geom_point(aes(x=male,y=attempt_suicide))+
  geom_errorbar(aes(x=male, ymin = attempt_suicide-1.96*se, 
                    ymax= attempt_suicide+1.96*se),
                width=.25)+
   labs(title = "Percent % of Suicide Attempts by Gender", 
        caption = "Source: NSDUH_2019, 2019 \n Calculations by Joshua A. Reyna, MS.",
       x = "Gender",
       y = "%  Suicide Attempts")+
  theme_minimal()

Survey design cross tabs race/ethnicity on attempting suicide

snake<-svyby(formula = ~attempt_suicide,
           by = ~race_eth,
           design = des,
           FUN = svymean,
           na.rm=T)

svychisq(~attempt_suicide+race_eth,
         design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~attempt_suicide + race_eth, design = des)
## F = 3.2632, ndf = 5.4789, ddf = 18924.1297, p-value = 0.004507
snake%>%
  ggplot()+
  geom_point(aes(x=race_eth,y=attempt_suicide))+
  geom_errorbar(aes(x=race_eth, ymin = attempt_suicide-1.96*se, 
                    ymax= attempt_suicide+1.96*se),
                width=.25)+
   labs(title = "Percent % of Suicide Attempts by Race/Ethnicity", 
        caption = "Source: NSDUH_2019, 2019 \n Calculations by Joshua A. Reyna, MS.",
       x = "Race/Ethnicity",
       y = "%  Suicide Attempts")+
  theme_minimal()

Survey design cross tabs marital status on attempting suicide

dog<-svyby(formula = ~attempt_suicide,
           by = ~marst,
           design = des,
           FUN = svymean,
           na.rm=T)

svychisq(~attempt_suicide+marst,
         design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~attempt_suicide + marst, design = des)
## F = 2.2559, ndf = 2.8871, ddf = 9971.9500, p-value = 0.08227
dog%>%
  ggplot()+
  geom_point(aes(x=marst,y=attempt_suicide))+
  geom_errorbar(aes(x=marst, ymin = attempt_suicide-1.96*se, 
                    ymax= attempt_suicide+1.96*se),
                width=.25)+
   labs(title = "Percent % of Suicide Attempts by Marital Status", 
        caption = "Source: NSDUH_2019, 2019 \n Calculations by Joshua A. Reyna, MS.",
       x = "Marital Status",
       y = "%  Suicide Attempts")+
  theme_minimal()

Survey design cross tabs sexuality on attempting suicide

bird<-svyby(formula = ~attempt_suicide,
           by = ~sexuality,
           design = des,
           FUN = svymean,
           na.rm=T)

svychisq(~attempt_suicide+sexuality,
         design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~attempt_suicide + sexuality, design = des)
## F = 4.9784, ndf = 1.9421, ddf = 6708.0272, p-value = 0.007455
bird%>%
  ggplot()+
  geom_point(aes(x=sexuality,y=attempt_suicide))+
  geom_errorbar(aes(x=sexuality, ymin = attempt_suicide-1.96*se, 
                    ymax= attempt_suicide+1.96*se),
                width=.25)+
   labs(title = "Percent % of Suicide Attempts by Sexuality", 
        caption = "Source: NSDUH_2019, 2019 \n Calculations by Joshua A. Reyna, MS.",
       x = "Sexuality",
       y = "%  Suicide Attempts")+
  theme_minimal()

Survey design cross tabs education on attempting suicide

cat<-svyby(formula = ~attempt_suicide,
           by = ~educ,
           design = des,
           FUN = svymean,
           na.rm=T)

svychisq(~attempt_suicide+educ,
         design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~attempt_suicide + educ, design = des)
## F = 9.8711, ndf = 3.948, ddf = 13636.411, p-value = 6.768e-08
cat%>%
  ggplot()+
  geom_point(aes(x=educ,y=attempt_suicide))+
  geom_errorbar(aes(x=educ, ymin = attempt_suicide-1.96*se, 
                    ymax= attempt_suicide+1.96*se),
                width=.25)+
   labs(title = "Percent % of Suicide Attempts by Educational Attainment", 
        caption = "Source: NSDUH_2019, 2019 \n Calculations by Joshua A. Reyna, MS.",
       x = "Educational Attainment",
       y = "%  Suicide Attempts")+
  theme_minimal()

## Discussion Tabulations and Chi squares

## Cross tabulations with chi square analysis were run on each of the independent variables on made a suicide attempt in adults. It should be noted that each variable was chosen based on viewings of both the US center for disease control, and various literature on the subject of suicide attempts. However, the chi square analyses indicated that marital status, sexuality, gender, and race do not have a statistically significant relationship with attempting suicide. That is, these variables do not have an effect on suicide attempts, education however does.

## The tabulations reveal a much more robust picture of percentages of suicide attempts. The most surprising, was the larger percentages of females that have attempted suicide vs males, which runs counter to the nationally representative data mentioned in the previous item. Interestingly, after gender, and of course second to education more disadvantaged groups has higher percentages of suicide attempts. These groups included native Hawaiians, Alaskan natives, American Indians, lesbian/gays, bisexuals, divorced and widowed people all of these groups each hide relatively high margins. The more well off people including married people, Whites, and heterosexuals had lower confidence intervals than the rest in their specific demographic categories. Again, Whites tend to have the highest suicide numbers according to the Us Center for Disease Control. Hispanics and Blacks tended to be relatively similar. Perhaps there is a greater discrepancy between divorced and separated people due to the stigma and relative cost that comes with divorce leading to differences in percentages. More education also led to lower percentages of suicide attempts. The results are presented with 95% confidence the true percentages are within those ranges listed.
## LOGIT MODELS WITH ODDS RATIOS AND CONFIDENCE INTERVALS

## With survey weights
sub<-NSDUH_2019 %>%
  select(attempt_suicide, marst, educ, sexuality, male, white, black, hispanic, NatamAlknat, NathaOthnat, mult_race, asian,  race_eth, ANALWT_C, VESTR) %>%
  filter( complete.cases( . ))

options(survey.lonely.psu = "adjust")
des<-svydesign(ids= ~1,
               strata= ~VESTR,
               weights= ~ANALWT_C
               , data = sub )

fit.logit2<-svyglm(attempt_suicide ~ race_eth + marst + educ + sexuality + male,
                  design = des,
                  family = binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
fit.logit2%>%
  tidy()%>%
  mutate(OR = exp(estimate),
         LowerOR_Ci = exp(estimate - 1.96*std.error),
         UpperOR_Ci = exp(estimate + 1.96*std.error))%>%
  knitr::kable(digits = 3)
term estimate std.error statistic p.value OR LowerOR_Ci UpperOR_Ci
(Intercept) -1.483 0.292 -5.075 0.000 0.227 0.128 0.403
race_ethblack 0.163 0.341 0.479 0.632 1.177 0.604 2.297
race_ethhispanic 0.040 0.330 0.122 0.903 1.041 0.545 1.988
race_ethmult_race 0.815 0.376 2.168 0.030 2.258 1.081 4.717
race_ethNatamAlknat 0.913 0.615 1.484 0.138 2.491 0.746 8.315
race_ethNathaOthnat 1.422 1.160 1.226 0.220 4.144 0.427 40.245
race_ethwhite -0.041 0.278 -0.148 0.882 0.960 0.557 1.654
marstdivorced 0.027 0.411 0.065 0.948 1.027 0.459 2.300
marstseparated -0.184 0.146 -1.263 0.207 0.832 0.625 1.107
marstwidowed 0.321 0.204 1.573 0.116 1.378 0.924 2.056
educAssociates 0.444 0.243 1.828 0.068 1.560 0.969 2.511
educHs 0.581 0.185 3.132 0.002 1.788 1.243 2.571
educLssThnHgh 1.301 0.240 5.429 0.000 3.672 2.296 5.873
educSomeCollege 0.511 0.171 2.986 0.003 1.667 1.192 2.332
sexualityBisexual 0.380 0.133 2.850 0.004 1.463 1.126 1.900
sexualityLes/Gay 0.383 0.294 1.303 0.193 1.467 0.824 2.609
maleMale -0.368 0.135 -2.724 0.006 0.692 0.531 0.902
table(sub$attempt_suicide)
## 
##    0    1 
## 2625  879
table(sub$race_eth)
## 
##       asian       black    hispanic   mult_race NatamAlknat NathaOthnat 
##         113         237         495         195          50           9 
##       white 
##        2405
table(sub$marst)
## 
##   married  divorced separated   widowed 
##       892        43      2195       374
table(sub$educ)
## 
##     Colgrad  Associates          Hs   LssThnHgh SomeCollege 
##         871         337         859         288        1149
table(sub$sexuality)
## 
## Heterosexual     Bisexual      Les/Gay 
##         2616          725          163
table(sub$male)
## 
## Female   Male 
##   2162   1342

Discussion 2 Odds Ratios

##In terms of significant results being multi racial, lesbian/gay, bisexual, and having low amounts of education increasing the odds of attempting suicide noting education used college graduates, and sexuality used heterosexuals as reference categories. Interestingly, being male actually lowered the odds of attempting suicide despite nationally, males actually committing suicide at higher rates. The results are consistent with the cross tabulations. In the case of people having less than a high school education, the odds of those with no hs degree attempting suicide is 1.3 times greater. Bisexuals and lesbians/gays have 38% higher odds of attempting suicide, mutli racial people being 77% more likely, and males being 36% less likely than females to attempt suicide. It would appear that the more marginalized groups commit suicide higher, with racial and marital groups that are disadvantaged having higher odds of attempting suicide.

3.Generate predicted probabilities for some “interesting” cases from your analysis, to highlight the effects from the model and your stated research question.

attach(sub)
rg<-ref_grid(fit.logit2)
marg_logit<-emmeans(object = rg,
              specs =  c("race_eth", "educ") ,
              type="response" ,
              data=sub)
knitr::kable(marg_logit,  digits = 4)
race_eth educ prob SE df asymp.LCL asymp.UCL
asian Colgrad 0.2024 0.0514 Inf 0.1197 0.3213
black Colgrad 0.2300 0.0497 Inf 0.1470 0.3411
hispanic Colgrad 0.2089 0.0465 Inf 0.1321 0.3144
mult_race Colgrad 0.3643 0.0756 Inf 0.2320 0.5208
NatamAlknat Colgrad 0.3872 0.1381 Inf 0.1681 0.6641
NathaOthnat Colgrad 0.5125 0.2859 Inf 0.1004 0.9083
white Colgrad 0.1958 0.0292 Inf 0.1448 0.2594
asian Associates 0.2835 0.0755 Inf 0.1603 0.4505
black Associates 0.3178 0.0700 Inf 0.1984 0.4672
hispanic Associates 0.2918 0.0630 Inf 0.1847 0.4283
mult_race Associates 0.4719 0.0895 Inf 0.3066 0.6436
NatamAlknat Associates 0.4964 0.1500 Inf 0.2332 0.7616
NathaOthnat Associates 0.6212 0.2697 Inf 0.1478 0.9394
white Associates 0.2752 0.0513 Inf 0.1866 0.3859
asian Hs 0.3120 0.0712 Inf 0.1914 0.4649
black Hs 0.3481 0.0604 Inf 0.2406 0.4737
hispanic Hs 0.3207 0.0534 Inf 0.2260 0.4330
mult_race Hs 0.5060 0.0783 Inf 0.3566 0.6543
NatamAlknat Hs 0.5304 0.1424 Inf 0.2692 0.7760
NathaOthnat Hs 0.6527 0.2597 Inf 0.1660 0.9467
white Hs 0.3033 0.0382 Inf 0.2340 0.3828
asian LssThnHgh 0.4823 0.0935 Inf 0.3091 0.6599
black LssThnHgh 0.5231 0.0757 Inf 0.3770 0.6654
hispanic LssThnHgh 0.4924 0.0775 Inf 0.3456 0.6404
mult_race LssThnHgh 0.6778 0.0761 Inf 0.5152 0.8064
NatamAlknat LssThnHgh 0.6989 0.1255 Inf 0.4190 0.8819
NathaOthnat LssThnHgh 0.7943 0.1892 Inf 0.2854 0.9739
white LssThnHgh 0.4720 0.0604 Inf 0.3574 0.5898
asian SomeCollege 0.2973 0.0667 Inf 0.1845 0.4417
black SomeCollege 0.3325 0.0585 Inf 0.2290 0.4551
hispanic SomeCollege 0.3057 0.0524 Inf 0.2135 0.4167
mult_race SomeCollege 0.4886 0.0774 Inf 0.3424 0.6367
NatamAlknat SomeCollege 0.5130 0.1411 Inf 0.2583 0.7612
NathaOthnat SomeCollege 0.6368 0.2648 Inf 0.1568 0.9429
white SomeCollege 0.2887 0.0353 Inf 0.2247 0.3624
## With survey design "interesting cases" Marital Status and education
rg<-ref_grid(fit.logit2)
marg_logit<-emmeans(object = rg,
              specs = c( "marst",  "educ"),
             type="response" ,
              data=sub)
knitr::kable(marg_logit,  digits = 4)
marst educ prob SE df asymp.LCL asymp.UCL
married Colgrad 0.2810 0.0532 Inf 0.1892 0.3956
divorced Colgrad 0.2865 0.0948 Inf 0.1393 0.4990
separated Colgrad 0.2454 0.0469 Inf 0.1653 0.3481
widowed Colgrad 0.3501 0.0692 Inf 0.2289 0.4944
married Associates 0.3787 0.0717 Inf 0.2511 0.5256
divorced Associates 0.3850 0.1172 Inf 0.1918 0.6229
separated Associates 0.3365 0.0647 Inf 0.2232 0.4723
widowed Associates 0.4566 0.0815 Inf 0.3062 0.6153
married Hs 0.4113 0.0651 Inf 0.2920 0.5420
divorced Hs 0.4178 0.1107 Inf 0.2272 0.6365
separated Hs 0.3676 0.0548 Inf 0.2681 0.4798
widowed Hs 0.4906 0.0726 Inf 0.3527 0.6299
married LssThnHgh 0.5894 0.0741 Inf 0.4405 0.7235
divorced LssThnHgh 0.5958 0.1188 Inf 0.3591 0.7950
separated LssThnHgh 0.5442 0.0683 Inf 0.4103 0.6720
widowed LssThnHgh 0.6643 0.0761 Inf 0.5033 0.7944
married SomeCollege 0.3945 0.0620 Inf 0.2815 0.5201
divorced SomeCollege 0.4009 0.1091 Inf 0.2156 0.6198
separated SomeCollege 0.3516 0.0516 Inf 0.2582 0.4579
widowed SomeCollege 0.4732 0.0710 Inf 0.3395 0.6108
## With survey design "interesting cases" Gender and education
rg<-ref_grid(fit.logit2)
marg_logit<-emmeans(object = rg,
              specs = c( "male",  "educ"),
             type="response" ,
              data=sub)
knitr::kable(marg_logit,  digits = 4)
male educ prob SE df asymp.LCL asymp.UCL
Female Colgrad 0.3287 0.0584 Inf 0.2256 0.4514
Male Colgrad 0.2530 0.0543 Inf 0.1617 0.3730
Female Associates 0.4330 0.0742 Inf 0.2969 0.5799
Male Associates 0.3456 0.0736 Inf 0.2182 0.5000
Female Hs 0.4667 0.0661 Inf 0.3422 0.5955
Male Hs 0.3771 0.0629 Inf 0.2638 0.5057
Female LssThnHgh 0.6426 0.0706 Inf 0.4959 0.7666
Male LssThnHgh 0.5543 0.0792 Inf 0.3988 0.6998
Female SomeCollege 0.4494 0.0630 Inf 0.3314 0.5734
Male SomeCollege 0.3609 0.0613 Inf 0.2512 0.4873
## With survey design "interesting cases" Sexuality and education
rg<-ref_grid(fit.logit2)
marg_logit<-emmeans(object = rg,
              specs = c( "sexuality",  "educ"),
              type="response" ,
              data=sub)
knitr::kable(marg_logit,  digits = 4)
sexuality educ prob SE df asymp.LCL asymp.UCL
Heterosexual Colgrad 0.2400 0.0456 Inf 0.1621 0.3400
Bisexual Colgrad 0.3159 0.0598 Inf 0.2117 0.4427
Les/Gay Colgrad 0.3165 0.0813 Inf 0.1814 0.4918
Heterosexual Associates 0.3299 0.0632 Inf 0.2194 0.4631
Bisexual Associates 0.4187 0.0757 Inf 0.2814 0.5698
Les/Gay Associates 0.4193 0.1000 Inf 0.2440 0.6177
Heterosexual Hs 0.3608 0.0550 Inf 0.2612 0.4739
Bisexual Hs 0.4522 0.0655 Inf 0.3296 0.5809
Les/Gay Hs 0.4529 0.0919 Inf 0.2857 0.6313
Heterosexual LssThnHgh 0.5369 0.0728 Inf 0.3950 0.6730
Bisexual LssThnHgh 0.6291 0.0742 Inf 0.4762 0.7598
Les/Gay LssThnHgh 0.6297 0.0934 Inf 0.4368 0.7884
Heterosexual SomeCollege 0.3448 0.0524 Inf 0.2504 0.4534
Bisexual SomeCollege 0.4350 0.0634 Inf 0.3171 0.5608
Les/Gay SomeCollege 0.4356 0.0899 Inf 0.2737 0.6126

Discussion of the “Interesting” results

##To better expand upon the results thus far, it was then decided to do fitted values on the core demographic characteristics (gender, sexuality, race, and marital status) by educational attainment. Much in line with what the results from the logit regression confirmed, as educational attainment increases by marital status, the odds of attempting suicide also decreases. In each educational x marital status category, widowed individuals had the highest odds of attempting suicide, followed by divorced, married, then separated people. The results were largely consistent across the other demographic categories with more of the individuals in marginalized groups having higher levels of suicide as education decreases. However, it is also true for males, females still had higher odds of suicide across each educational output. Again, within racial contexts multi racial individuals, Native Hawaiians, and Alaskan Natives having the highest odds of a suicide attempt, with these percentages increasing the lower the amounts of education they had, in turn more education decreased the likelihood of attempting suicide. Perhaps, the human capital, and financial resources accumulated by education helps in determining suicide attempts. However, as the data collected is largely about mental health, and drug usage more tests need to be run on this population. In summary, the fitted values reflects the results of the logistic regression, and tabulations performed. Perhaps education offers a myriad of benefits that needs to be considered among other mortality items such as attempting suicide.