library(mgcv) #one library for GAMs
## Loading required package: nlme
## This is mgcv 1.8-38. For overview type 'help("mgcv-package")'.
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:nlme':
##
## collapse
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(emmeans)
brfss20sm <- readRDS("C:/Users/shahi/Dropbox/PC/Downloads/brfss20sm.rds")
names(brfss20sm) <- tolower(gsub(pattern = "_",replacement = "",x = names(brfss20sm)))
brfss20sm$depression<-Recode(brfss20sm$addepev3, recodes="1=1; 2=0; 7:9=NA")
#employment
brfss20sm$employ<-Recode(brfss20sm$employ1,
recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
as.factor=T)
brfss20sm$employ<-relevel(brfss20sm$employ, ref='Employed')
#marital status
brfss20sm$marst<-Recode(brfss20sm$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
as.factor=T)
brfss20sm$marst<-relevel(brfss20sm$marst, ref='married')
#Age cut into intervals
brfss20sm$agec<-cut(brfss20sm$age80, breaks=c(0,24,39,59,79,99))
#Healthy mental health days
brfss20sm$healthmdays<-Recode(brfss20sm$menthlth, recodes = "88=0; 77=NA; 99=NA")
brfss20sm<-brfss20sm%>%
select( ststr, agec,healthmdays,depression, age80, employ, marst,mmsawt)%>%
filter(complete.cases(.))
#Using the gam() function, estimate a model with only linear terms in your model:
Here is a GAM fit to the BRFSS, using linear terms.
brfgam1<-gam(depression~age80+ healthmdays+ employ+ marst,
data=brfss20sm,
weights = mmsawt/mean(mmsawt, na.rm=T),
family=binomial)
summary(brfgam1)
##
## Family: binomial
## Link function: logit
##
## Formula:
## depression ~ age80 + healthmdays + employ + marst
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.0757343 0.0299512 -69.304 < 2e-16 ***
## age80 -0.0081483 0.0005880 -13.858 < 2e-16 ***
## healthmdays 0.0944230 0.0006865 137.537 < 2e-16 ***
## employnilf 0.2532349 0.0174049 14.550 < 2e-16 ***
## employretired 0.3466200 0.0246916 14.038 < 2e-16 ***
## employunable 1.1835855 0.0254995 46.416 < 2e-16 ***
## marstcohab 0.4026891 0.0295491 13.628 < 2e-16 ***
## marstdivorced 0.5416843 0.0218294 24.814 < 2e-16 ***
## marstnm 0.1435017 0.0188280 7.622 2.50e-14 ***
## marstseparated 0.4968277 0.0399409 12.439 < 2e-16 ***
## marstwidowed 0.1546884 0.0310934 4.975 6.53e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.172 Deviance explained = 16%
## UBRE = -0.21527 Scale est. = 1 n = 185520
#Repeat Step 2, but include a smooth term for at least one continuous variable:
Here is a GAM fit to the BRFSS, using smooth terms for age, and number of poor mental health days
brfgam2<-gam(depression ~ s(age80)+ s(healthmdays)+employ +marst ,
data=brfss20sm,
weights = mmsawt/mean(mmsawt, na.rm=T),
family=binomial)
summary(brfgam2)
##
## Family: binomial
## Link function: logit
##
## Formula:
## depression ~ s(age80) + s(healthmdays) + employ + marst
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.35189 0.01524 -154.365 <2e-16 ***
## employnilf 0.28364 0.01805 15.714 <2e-16 ***
## employretired 0.55700 0.02921 19.070 <2e-16 ***
## employunable 1.18800 0.02634 45.104 <2e-16 ***
## marstcohab 0.41439 0.03019 13.728 <2e-16 ***
## marstdivorced 0.52428 0.02234 23.470 <2e-16 ***
## marstnm 0.19792 0.01988 9.956 <2e-16 ***
## marstseparated 0.54688 0.04084 13.390 <2e-16 ***
## marstwidowed 0.29233 0.03296 8.870 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(age80) 8.735 8.979 439.2 <2e-16 ***
## s(healthmdays) 8.887 8.994 23421.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.209 Deviance explained = 19.9%
## UBRE = -0.25134 Scale est. = 1 n = 185520
#Test if the model in step 3 is fitting the data better than the purely linear model:
anova( brfgam1, brfgam2, test="Chisq")
Comparing the two models, the model with a smooth term for the age variable fits the model better. This is evident because of the very small p-value.
#Produce a plot of the smooth effect from the model in step 3:
plot(brfgam2)