#Packages
library(haven)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(splines)
library(mgcv)
## Loading required package: nlme
## Warning: package 'nlme' was built under R version 4.0.4
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## This is mgcv 1.8-35. For overview type 'help("mgcv-package")'.
#Import Data
BRFSS19<-read_xpt("C:/Users/adolp/Desktop/Statistics 2/BRFSS/LLCP2019.XPT")
#Clean Variables
nams<-names(BRFSS19)
head(BRFSS19, n=10)
## # A tibble: 10 x 342
##    `_STATE` FMONTH IDATE   IMONTH IDAY  IYEAR DISPCODE SEQNO     `_PSU` CTELENM1
##       <dbl>  <dbl> <chr>   <chr>  <chr> <chr>    <dbl> <chr>      <dbl>    <dbl>
##  1        1      1 011820~ 01     18    2019      1100 201900~   2.02e9        1
##  2        1      1 011320~ 01     13    2019      1100 201900~   2.02e9        1
##  3        1      1 011820~ 01     18    2019      1100 201900~   2.02e9        1
##  4        1      1 011820~ 01     18    2019      1200 201900~   2.02e9        1
##  5        1      1 010420~ 01     04    2019      1100 201900~   2.02e9        1
##  6        1      1 011820~ 01     18    2019      1200 201900~   2.02e9        1
##  7        1      1 010420~ 01     04    2019      1100 201900~   2.02e9        1
##  8        1      1 012320~ 01     23    2019      1100 201900~   2.02e9        1
##  9        1      1 012420~ 01     24    2019      1100 201900~   2.02e9        1
## 10        1      1 011320~ 01     13    2019      1100 201900~   2.02e9        1
## # ... with 332 more variables: PVTRESD1 <dbl>, COLGHOUS <dbl>, STATERE1 <dbl>,
## #   CELPHONE <dbl>, LADULT1 <dbl>, COLGSEX <dbl>, NUMADULT <dbl>,
## #   LANDSEX <dbl>, NUMMEN <dbl>, NUMWOMEN <dbl>, RESPSLCT <dbl>,
## #   SAFETIME <dbl>, CTELNUM1 <dbl>, CELLFON5 <dbl>, CADULT1 <dbl>,
## #   CELLSEX <dbl>, PVTRESD3 <dbl>, CCLGHOUS <dbl>, CSTATE1 <dbl>,
## #   LANDLINE <dbl>, HHADULT <dbl>, SEXVAR <dbl>, GENHLTH <dbl>, PHYSHLTH <dbl>,
## #   MENTHLTH <dbl>, POORHLTH <dbl>, HLTHPLN1 <dbl>, PERSDOC2 <dbl>,
## #   MEDCOST <dbl>, CHECKUP1 <dbl>, BPHIGH4 <dbl>, BPMEDS <dbl>, CHOLCHK2 <dbl>,
## #   TOLDHI2 <dbl>, CHOLMED2 <dbl>, CVDINFR4 <dbl>, CVDCRHD4 <dbl>,
## #   CVDSTRK3 <dbl>, ASTHMA3 <dbl>, ASTHNOW <dbl>, CHCSCNCR <dbl>,
## #   CHCOCNCR <dbl>, CHCCOPD2 <dbl>, ADDEPEV3 <dbl>, CHCKDNY2 <dbl>,
## #   DIABETE4 <dbl>, DIABAGE3 <dbl>, HAVARTH4 <dbl>, ARTHEXER <dbl>,
## #   ARTHEDU <dbl>, LMTJOIN3 <dbl>, ARTHDIS2 <dbl>, JOINPAI2 <dbl>,
## #   MARITAL <dbl>, EDUCA <dbl>, RENTHOM1 <dbl>, NUMHHOL3 <dbl>, NUMPHON3 <dbl>,
## #   CPDEMO1B <dbl>, VETERAN3 <dbl>, EMPLOY1 <dbl>, CHILDREN <dbl>,
## #   INCOME2 <dbl>, WEIGHT2 <dbl>, HEIGHT3 <dbl>, PREGNANT <dbl>, DEAF <dbl>,
## #   BLIND <dbl>, DECIDE <dbl>, DIFFWALK <dbl>, DIFFDRES <dbl>, DIFFALON <dbl>,
## #   SMOKE100 <dbl>, SMOKDAY2 <dbl>, STOPSMK2 <dbl>, LASTSMK2 <dbl>,
## #   USENOW3 <dbl>, ALCDAY5 <dbl>, AVEDRNK3 <dbl>, DRNK3GE5 <dbl>,
## #   MAXDRNKS <dbl>, EXERANY2 <dbl>, EXRACT11 <dbl>, EXEROFT1 <dbl>,
## #   EXERHMM1 <dbl>, EXRACT21 <dbl>, EXEROFT2 <dbl>, EXERHMM2 <dbl>,
## #   STRENGTH <dbl>, FRUIT2 <dbl>, FRUITJU2 <dbl>, FVGREEN1 <dbl>,
## #   FRENCHF1 <dbl>, POTATOE1 <dbl>, VEGETAB2 <dbl>, FLUSHOT7 <dbl>,
## #   FLSHTMY3 <dbl>, TETANUS1 <dbl>, PNEUVAC4 <dbl>, HIVTST7 <dbl>, ...
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(BRFSS19)<-newnames
#Recode Variables
#Independent Variables
BRFSS19$MaxDrnk<-Recode(BRFSS19$maxdrnks, recodes = "88=0; 77=NA; 99=NA")
BRFSS19$Alc<-Recode(BRFSS19$acedrink, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Dep<-Recode(BRFSS19$acedeprs, recodes ="7:9=NA; 1=1;2=0") 
BRFSS19$Gen<-Recode(BRFSS19$birthsex, recodes = "7:9=NA; 1=1;2=0")
#Dependent Variable (Mental health days = 1, No mental health days = 0)
BRFSS19$Mhlth<-Recode(BRFSS19$ment14d, recodes ="7:9=NA; 1=0;2=1;3=1")
#Complete Cases
library(dplyr)
BRFSS19sub<-BRFSS19%>%
  select(MaxDrnk,Alc,Dep,Gen,Mhlth,llcpwt, ststr )%>%
  filter(complete.cases(.))
brfgam<-gam(Mhlth~s(MaxDrnk)+ Alc + Dep+ Gen,
            data=BRFSS19,
            weights = ststr/mean(ststr, na.rm=T),
            family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(brfgam)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## Mhlth ~ s(MaxDrnk) + Alc + Dep + Gen
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.41435    0.05371  -7.714 1.22e-14 ***
## Alc          0.39901    0.07965   5.010 5.45e-07 ***
## Dep          0.90511    0.08440  10.724  < 2e-16 ***
## Gen         -0.60030    0.06987  -8.592  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df Chi.sq p-value    
## s(MaxDrnk) 2.975   3.69  76.19  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0821   Deviance explained =  6.3%
## UBRE = 0.78377  Scale est. = 1         n = 2969
plot(brfgam)

brfgam2<-gam(Mhlth~ MaxDrnk+ Alc + Dep+ Gen,
            data=BRFSS19,
            weights = ststr/mean(ststr, na.rm=T),
            family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(brfgam2)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## Mhlth ~ MaxDrnk + Alc + Dep + Gen
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.67209    0.05782 -11.623  < 2e-16 ***
## MaxDrnk      0.06539    0.01055   6.195 5.81e-10 ***
## Alc          0.41011    0.07921   5.177 2.25e-07 ***
## Dep          0.91171    0.08397  10.858  < 2e-16 ***
## Gen         -0.54286    0.06872  -7.900 2.79e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## R-sq.(adj) =  0.0736   Deviance explained = 5.56%
## UBRE = 0.79638  Scale est. = 1         n = 2969
anova( brfgam2, brfgam, test="Chisq")
## Analysis of Deviance Table
## 
## Model 1: Mhlth ~ MaxDrnk + Alc + Dep + Gen
## Model 2: Mhlth ~ s(MaxDrnk) + Alc + Dep + Gen
##   Resid. Df Resid. Dev     Df Deviance  Pr(>Chi)    
## 1    2964.0     5323.5                              
## 2    2961.3     5282.1 2.6897   41.388 3.337e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The continuous independent variable (Most drinks on single occasion past 30 days) on the dichotomous dependent variable (mental health days/no mental health days). There was a smooth non-linear association between the IV and DV with higher probability of having a mental health day at fewer drinks up until around 10 drinks, and then the relationship becomes negative, with the decrease in the probability of having a mental health day at higher drinks. In comparing the linear model and the additive model, the additive model explains the model better than the linear model.