Using a data set of your choice, do the following:

  1. define an outcome, this can be of any form, but use an appropriate distribution for your outcome. Also define at least 1 continuous predictor.
library(mgcv) #one library for GAMs
## Loading required package: nlme
## This is mgcv 1.8-38. For overview type 'help("mgcv-package")'.
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:nlme':
## 
##     collapse
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(emmeans)
brfss20sm <- readRDS("C:/Users/shahi/Dropbox/PC/Downloads/brfss20sm.rds")

names(brfss20sm) <- tolower(gsub(pattern = "_",replacement =  "",x =  names(brfss20sm)))
brfss20sm$depression<-Recode(brfss20sm$addepev3, recodes="1=1; 2=0; 7:9=NA")
#employment
brfss20sm$employ<-Recode(brfss20sm$employ1,
                       recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
                       as.factor=T)
brfss20sm$employ<-relevel(brfss20sm$employ, ref='Employed')

#marital status
brfss20sm$marst<-Recode(brfss20sm$marital,
                      recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
                      as.factor=T)
brfss20sm$marst<-relevel(brfss20sm$marst, ref='married')


#Age cut into intervals
brfss20sm$agec<-cut(brfss20sm$age80, breaks=c(0,24,39,59,79,99))

#Healthy mental health days
brfss20sm$healthmdays<-Recode(brfss20sm$menthlth, recodes = "88=0; 77=NA; 99=NA")
brfss20sm<-brfss20sm%>%
  select(  ststr, agec,healthmdays,depression, age80, employ, marst,mmsawt)%>%
  filter(complete.cases(.))

#Using the gam() function, estimate a model with only linear terms in your model:

Here is a GAM fit to the BRFSS, using linear terms.

brfgam1<-gam(depression~age80+ healthmdays+ employ+ marst,
              data=brfss20sm,
              weights = mmsawt/mean(mmsawt, na.rm=T),
              family=binomial)

summary(brfgam1)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## depression ~ age80 + healthmdays + employ + marst
## 
## Parametric coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -2.0757343  0.0299512 -69.304  < 2e-16 ***
## age80          -0.0081483  0.0005880 -13.858  < 2e-16 ***
## healthmdays     0.0944230  0.0006865 137.537  < 2e-16 ***
## employnilf      0.2532349  0.0174049  14.550  < 2e-16 ***
## employretired   0.3466200  0.0246916  14.038  < 2e-16 ***
## employunable    1.1835855  0.0254995  46.416  < 2e-16 ***
## marstcohab      0.4026891  0.0295491  13.628  < 2e-16 ***
## marstdivorced   0.5416843  0.0218294  24.814  < 2e-16 ***
## marstnm         0.1435017  0.0188280   7.622 2.50e-14 ***
## marstseparated  0.4968277  0.0399409  12.439  < 2e-16 ***
## marstwidowed    0.1546884  0.0310934   4.975 6.53e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## R-sq.(adj) =  0.172   Deviance explained =   16%
## UBRE = -0.21527  Scale est. = 1         n = 185520

#Repeat Step 2, but include a smooth term for at least one continuous variable:

Here is a GAM fit to the BRFSS, using smooth terms for age, and number of poor mental health days

brfgam2<-gam(depression ~ s(age80)+ s(healthmdays)+employ +marst ,
            data=brfss20sm,
            weights = mmsawt/mean(mmsawt, na.rm=T),
            family=binomial)

summary(brfgam2)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## depression ~ s(age80) + s(healthmdays) + employ + marst
## 
## Parametric coefficients:
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)    -2.35189    0.01524 -154.365   <2e-16 ***
## employnilf      0.28364    0.01805   15.714   <2e-16 ***
## employretired   0.55700    0.02921   19.070   <2e-16 ***
## employunable    1.18800    0.02634   45.104   <2e-16 ***
## marstcohab      0.41439    0.03019   13.728   <2e-16 ***
## marstdivorced   0.52428    0.02234   23.470   <2e-16 ***
## marstnm         0.19792    0.01988    9.956   <2e-16 ***
## marstseparated  0.54688    0.04084   13.390   <2e-16 ***
## marstwidowed    0.29233    0.03296    8.870   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                  edf Ref.df  Chi.sq p-value    
## s(age80)       8.735  8.979   439.2  <2e-16 ***
## s(healthmdays) 8.887  8.994 23421.7  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.209   Deviance explained = 19.9%
## UBRE = -0.25134  Scale est. = 1         n = 185520

#Test if the model in step 3 is fitting the data better than the purely linear model:

anova( brfgam1, brfgam2, test="Chisq")

Comparing the two models, the model with a smooth term for the age variable fits the model better. This is evident because of the very small p-value.

#Produce a plot of the smooth effect from the model in step 3:

plot(brfgam2)

LS0tDQp0aXRsZTogIlVudGl0bGVkIg0KYXV0aG9yOiAiTWFobXVkYSBTdWx0YW5hIg0KZGF0ZTogIjQvMTIvMjAyMiINCm91dHB1dDoNCiAgIGh0bWxfZG9jdW1lbnQ6DQogICAgZGZfcHJpbnQ6IHBhZ2VkDQogICAgZmlnX2hlaWdodDogNw0KICAgIGZpZ193aWR0aDogNw0KICAgIHRvYzogeWVzDQogICAgdG9jX2Zsb2F0OiB5ZXMNCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQotLS0NCg0KIyBVc2luZyBhIGRhdGEgc2V0IG9mIHlvdXIgY2hvaWNlLCBkbyB0aGUgZm9sbG93aW5nOg0KMSkgZGVmaW5lIGFuIG91dGNvbWUsIHRoaXMgY2FuIGJlIG9mIGFueSBmb3JtLCBidXQgdXNlIGFuIGFwcHJvcHJpYXRlIGRpc3RyaWJ1dGlvbiBmb3IgeW91ciBvdXRjb21lLiBBbHNvIGRlZmluZSBhdCBsZWFzdCAxIGNvbnRpbnVvdXMgcHJlZGljdG9yLg0KDQpgYGB7cn0NCmxpYnJhcnkobWdjdikgI29uZSBsaWJyYXJ5IGZvciBHQU1zDQpsaWJyYXJ5KGdncGxvdDIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShjYXIpDQpsaWJyYXJ5KGVtbWVhbnMpDQpgYGANCg0KYGBge3J9DQoNCmJyZnNzMjBzbSA8LSByZWFkUkRTKCJDOi9Vc2Vycy9zaGFoaS9Ecm9wYm94L1BDL0Rvd25sb2Fkcy9icmZzczIwc20ucmRzIikNCg0KbmFtZXMoYnJmc3MyMHNtKSA8LSB0b2xvd2VyKGdzdWIocGF0dGVybiA9ICJfIixyZXBsYWNlbWVudCA9ICAiIix4ID0gIG5hbWVzKGJyZnNzMjBzbSkpKQ0KYGBgDQoNCmBgYHtyfQ0KYnJmc3MyMHNtJGRlcHJlc3Npb248LVJlY29kZShicmZzczIwc20kYWRkZXBldjMsIHJlY29kZXM9IjE9MTsgMj0wOyA3Ojk9TkEiKQ0KYGBgDQoNCmBgYHtyfQ0KI2VtcGxveW1lbnQNCmJyZnNzMjBzbSRlbXBsb3k8LVJlY29kZShicmZzczIwc20kZW1wbG95MSwNCiAgICAgICAgICAgICAgICAgICAgICAgcmVjb2Rlcz0iMToyPSdFbXBsb3llZCc7IDI6Nj0nbmlsZic7IDc9J3JldGlyZWQnOyA4PSd1bmFibGUnOyBlbHNlPU5BIiwNCiAgICAgICAgICAgICAgICAgICAgICAgYXMuZmFjdG9yPVQpDQpicmZzczIwc20kZW1wbG95PC1yZWxldmVsKGJyZnNzMjBzbSRlbXBsb3ksIHJlZj0nRW1wbG95ZWQnKQ0KDQojbWFyaXRhbCBzdGF0dXMNCmJyZnNzMjBzbSRtYXJzdDwtUmVjb2RlKGJyZnNzMjBzbSRtYXJpdGFsLA0KICAgICAgICAgICAgICAgICAgICAgIHJlY29kZXM9IjE9J21hcnJpZWQnOyAyPSdkaXZvcmNlZCc7IDM9J3dpZG93ZWQnOyA0PSdzZXBhcmF0ZWQnOyA1PSdubSc7Nj0nY29oYWInOyBlbHNlPU5BIiwNCiAgICAgICAgICAgICAgICAgICAgICBhcy5mYWN0b3I9VCkNCmJyZnNzMjBzbSRtYXJzdDwtcmVsZXZlbChicmZzczIwc20kbWFyc3QsIHJlZj0nbWFycmllZCcpDQoNCg0KI0FnZSBjdXQgaW50byBpbnRlcnZhbHMNCmJyZnNzMjBzbSRhZ2VjPC1jdXQoYnJmc3MyMHNtJGFnZTgwLCBicmVha3M9YygwLDI0LDM5LDU5LDc5LDk5KSkNCg0KI0hlYWx0aHkgbWVudGFsIGhlYWx0aCBkYXlzDQpicmZzczIwc20kaGVhbHRobWRheXM8LVJlY29kZShicmZzczIwc20kbWVudGhsdGgsIHJlY29kZXMgPSAiODg9MDsgNzc9TkE7IDk5PU5BIikNCg0KYGBgDQoNCg0KDQpgYGB7cn0NCmJyZnNzMjBzbTwtYnJmc3MyMHNtJT4lDQogIHNlbGVjdCggIHN0c3RyLCBhZ2VjLGhlYWx0aG1kYXlzLGRlcHJlc3Npb24sIGFnZTgwLCBlbXBsb3ksIG1hcnN0LG1tc2F3dCklPiUNCiAgZmlsdGVyKGNvbXBsZXRlLmNhc2VzKC4pKQ0KDQpgYGANCg0KI1VzaW5nIHRoZSBnYW0oKSBmdW5jdGlvbiwgZXN0aW1hdGUgYSBtb2RlbCB3aXRoIG9ubHkgbGluZWFyIHRlcm1zIGluIHlvdXIgbW9kZWw6DQoNCkhlcmUgaXMgYSBHQU0gZml0IHRvIHRoZSBCUkZTUywgdXNpbmcgbGluZWFyIHRlcm1zLg0KDQpgYGB7ciwgd2FybmluZz1GQUxTRX0NCmJyZmdhbTE8LWdhbShkZXByZXNzaW9ufmFnZTgwKyBoZWFsdGhtZGF5cysgZW1wbG95KyBtYXJzdCwNCiAgICAgICAgICAgICAgZGF0YT1icmZzczIwc20sDQogICAgICAgICAgICAgIHdlaWdodHMgPSBtbXNhd3QvbWVhbihtbXNhd3QsIG5hLnJtPVQpLA0KICAgICAgICAgICAgICBmYW1pbHk9Ymlub21pYWwpDQoNCnN1bW1hcnkoYnJmZ2FtMSkNCmBgYA0KDQojUmVwZWF0IFN0ZXAgMiwgYnV0IGluY2x1ZGUgYSBzbW9vdGggdGVybSBmb3IgYXQgbGVhc3Qgb25lIGNvbnRpbnVvdXMgdmFyaWFibGU6DQoNCkhlcmUgaXMgYSBHQU0gZml0IHRvIHRoZSBCUkZTUywgdXNpbmcgc21vb3RoIHRlcm1zIGZvciBhZ2UsIGFuZCBudW1iZXIgb2YgcG9vciBtZW50YWwgaGVhbHRoIGRheXMNCg0KYGBge3IsIHdhcm5pbmc9RkFMU0V9DQoNCmJyZmdhbTI8LWdhbShkZXByZXNzaW9uIH4gcyhhZ2U4MCkrIHMoaGVhbHRobWRheXMpK2VtcGxveSArbWFyc3QgLA0KICAgICAgICAgICAgZGF0YT1icmZzczIwc20sDQogICAgICAgICAgICB3ZWlnaHRzID0gbW1zYXd0L21lYW4obW1zYXd0LCBuYS5ybT1UKSwNCiAgICAgICAgICAgIGZhbWlseT1iaW5vbWlhbCkNCg0Kc3VtbWFyeShicmZnYW0yKQ0KYGBgDQoNCiNUZXN0IGlmIHRoZSBtb2RlbCBpbiBzdGVwIDMgaXMgZml0dGluZyB0aGUgZGF0YSBiZXR0ZXIgdGhhbiB0aGUgcHVyZWx5IGxpbmVhciBtb2RlbDoNCg0KYGBge3J9DQphbm92YSggYnJmZ2FtMSwgYnJmZ2FtMiwgdGVzdD0iQ2hpc3EiKQ0KYGBgDQoNCkNvbXBhcmluZyB0aGUgdHdvIG1vZGVscywgdGhlIG1vZGVsIHdpdGggYSBzbW9vdGggdGVybSBmb3IgdGhlIGFnZSB2YXJpYWJsZSBmaXRzIHRoZSBtb2RlbCBiZXR0ZXIuIFRoaXMgaXMgZXZpZGVudCBiZWNhdXNlIG9mIHRoZSB2ZXJ5IHNtYWxsIHAtdmFsdWUuIA0KDQoNCiNQcm9kdWNlIGEgcGxvdCBvZiB0aGUgc21vb3RoIGVmZmVjdCBmcm9tIHRoZSBtb2RlbCBpbiBzdGVwIDM6DQoNCmBgYHtyfQ0KcGxvdChicmZnYW0yKQ0KYGBgDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg==