library(haven)
NSDUH_2019 <- read_sav("NSDUH_2019.SAV")
View(NSDUH_2019)
nams<-names(NSDUH_2019)
head(nams, n=10)
## [1] "QUESTID2" "FILEDATE" "CIGEVER" "CIGOFRSM" "CIGWILYR" "CIGTRY"
## [7] "CIGYFU" "CIGMFU" "CIGREC" "CIG30USE"
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(NSDUH_2019)<-newnames
NSDUH_2019$attempt_suicide<-Recode(NSDUH_2019$adwrsatp, recodes="1=1; 2=0;else=NA")
## marital status
NSDUH_2019$marst<-Recode(NSDUH_2019$irmarit, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; else=NA", as.factor=T)
NSDUH_2019$marst<-relevel(NSDUH_2019$marst, ref='married')
## education recodes
NSDUH_2019$educ<-Recode(NSDUH_2019$ireduhighst2, recodes="1:7='LssThnHgh'; 8='highschool'; 9='someCollege'; 10='associates'; 11='colgrad';else=NA", as.factor=T)
NSDUH_2019$educ<-relevel(NSDUH_2019$educ, ref='colgrad')
## sexuality recodes
NSDUH_2019$sexuality<-Recode(NSDUH_2019$sexident, recodes="1='Heterosexual'; 2='Les/Gay'; 3='Bisexual';else=NA", as.factor=T)
NSDUH_2019$sexuality<-relevel(NSDUH_2019$sexuality, ref='Heterosexual')
## gender recodes
NSDUH_2019$male<-as.factor(ifelse(NSDUH_2019$irsex==1, "Male", "Female"))
## Race recoded items
NSDUH_2019$black<-Recode(NSDUH_2019$newrace2, recodes="2=1; 9=NA; else=0")
NSDUH_2019$white<-Recode(NSDUH_2019$newrace2, recodes="1=1; 9=NA; else=0")
NSDUH_2019$NatAm_AlNAt<-Recode(NSDUH_2019$newrace2, recodes="3=1; 9=NA; else=0")
NSDUH_2019$NAtHa_PaIsl<-Recode(NSDUH_2019$newrace2, recodes="4=1; 9=NA; else=0")
NSDUH_2019$mult_race<-Recode(NSDUH_2019$newrace2, recodes="6=1; 9=NA; else=0")
NSDUH_2019$asian<-Recode(NSDUH_2019$newrace2, recodes="5=1; 9=NA; else=0")
NSDUH_2019$hispanic<-Recode(NSDUH_2019$newrace2, recodes="7=1; 9=NA; else=0")
NSDUH_2019$race_eth<-Recode(NSDUH_2019$newrace2, recodes="1='white'; 2='black'; 3='NatAm_AlNAt'; 4='NAtHa_PaIsl'; 5='asian'; 6='mult_race'; 7='hispanic'; else=NA", as.factor=T)
NSDUH_2019$daysalc<-Recode(NSDUH_2019$alcdays, recodes = "85=NA; 91=NA; 93=NA; 94=NA; 97=NA; 98=NA ")
NSDUH_2019$weekswrkd<-Recode(NSDUH_2019$wrknjbwks, recodes = "85=NA; 94=NA; 97=NA; 98=NA; 99=NA")
summary(NSDUH_2019$race_eth)
## asian black hispanic mult_race NatAm_AlNAt NAtHa_PaIsl
## 2697 7256 10848 2202 752 292
## white
## 32089
sub<-NSDUH_2019%>%
select(attempt_suicide, marst, race_eth,
educ, daysalc, weekswrkd, white, black, hispanic,NatAm_AlNAt,
NAtHa_PaIsl, asian, hispanic, male, sexuality, analwtc, vestr) %>%
filter( complete.cases(.))
##Using a data set of your choice, do the following: ## 1) define an outcome, this can be of any form, but use an appropriate distribution for your outcome. Also define at least 1 continuous predictor. Outcome Variable defined as if the person atempted suicide, 1=yes, versus 0=no. Two continuous variables were used being worked weekds during the year, and alcohol use in days during the year.
nsduhgam_l<-gam(attempt_suicide~daysalc+weekswrkd+marst+male+educ+sexuality+black+hispanic+NatAm_AlNAt+NAtHa_PaIsl+asian,
data=sub,
weights = analwtc/mean(analwtc, na.rm=T),
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(nsduhgam_l)
##
## Family: binomial
## Link function: logit
##
## Formula:
## attempt_suicide ~ daysalc + weekswrkd + marst + male + educ +
## sexuality + black + hispanic + NatAm_AlNAt + NAtHa_PaIsl +
## asian
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.948e+00 4.348e-01 -4.481 7.44e-06 ***
## daysalc 4.580e-02 1.581e-02 2.897 0.003773 **
## weekswrkd 5.890e-03 9.920e-03 0.594 0.552691
## marstdivorced -1.282e+01 4.095e+02 -0.031 0.975032
## marstseparated -2.874e-01 3.301e-01 -0.871 0.384018
## marstwidowed -1.579e-01 5.659e-01 -0.279 0.780159
## maleMale 1.546e-01 2.733e-01 0.566 0.571712
## educassociates -8.235e-01 6.161e-01 -1.337 0.181325
## educhighschool 9.022e-01 3.733e-01 2.417 0.015655 *
## educLssThnHgh 1.909e+00 5.708e-01 3.345 0.000822 ***
## educsomeCollege 2.776e-04 3.253e-01 0.001 0.999319
## sexualityBisexual 8.219e-01 2.822e-01 2.913 0.003581 **
## sexualityLes/Gay -2.977e-01 7.401e-01 -0.402 0.687481
## black 3.787e-01 4.420e-01 0.857 0.391654
## hispanic 3.151e-01 3.381e-01 0.932 0.351361
## NatAm_AlNAt 0.000e+00 0.000e+00 NA NA
## NAtHa_PaIsl 0.000e+00 0.000e+00 NA NA
## asian -3.949e-01 8.183e-01 -0.483 0.629375
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Rank: 16/18
## R-sq.(adj) = 0.0922 Deviance explained = 10.5%
## UBRE = 0.11301 Scale est. = 1 n = 380
##3) Repeat Step 2, but include a smooth term for at least one continuous variable. As days of alcohol consumed and weeks worked during the year are likely to not be uniform accross these items, a smoothed term was added to the study.
nsduhgam<-gam(attempt_suicide~s(daysalc)+s(weekswrkd)+marst+male+educ+sexuality+race_eth,
data=sub,
weights = analwtc/mean(analwtc, na.rm=T),
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(nsduhgam)
##
## Family: binomial
## Link function: logit
##
## Formula:
## attempt_suicide ~ s(daysalc) + s(weekswrkd) + marst + male +
## educ + sexuality + race_eth
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.077e+00 8.700e-01 -2.387 0.01697 *
## marstdivorced -2.846e+01 1.221e+06 0.000 0.99998
## marstseparated -2.580e-01 3.427e-01 -0.753 0.45161
## marstwidowed -1.380e-01 5.811e-01 -0.238 0.81223
## maleMale 1.566e-01 2.807e-01 0.558 0.57676
## educassociates -8.767e-01 6.398e-01 -1.370 0.17063
## educhighschool 9.538e-01 3.833e-01 2.488 0.01283 *
## educLssThnHgh 1.862e+00 5.841e-01 3.188 0.00143 **
## educsomeCollege 1.162e-01 3.393e-01 0.343 0.73194
## sexualityBisexual 7.934e-01 2.868e-01 2.767 0.00566 **
## sexualityLes/Gay -3.054e-01 7.560e-01 -0.404 0.68624
## race_ethblack 8.062e-01 9.068e-01 0.889 0.37395
## race_ethhispanic 7.640e-01 8.521e-01 0.897 0.36995
## race_ethmult_race 7.965e-01 1.051e+00 0.758 0.44858
## race_ethwhite 4.153e-01 8.250e-01 0.503 0.61469
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(daysalc) 3.235 4.005 18.512 0.000986 ***
## s(weekswrkd) 1.758 2.198 1.599 0.451614
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.137 Deviance explained = 14.5%
## UBRE = 0.088314 Scale est. = 1 n = 380
##4) Test if the model in step 3 is fitting the data better than the purely linear model.
anova( nsduhgam_l, nsduhgam, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: attempt_suicide ~ daysalc + weekswrkd + marst + male + educ +
## sexuality + black + hispanic + NatAm_AlNAt + NAtHa_PaIsl +
## asian
## Model 2: attempt_suicide ~ s(daysalc) + s(weekswrkd) + marst + male +
## educ + sexuality + race_eth
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 364.0 390.94
## 2 358.8 373.57 5.2034 17.37 0.004508 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##5) Produce a plot of the smooth effect from the model in step 3
plot(nsduhgam)