#Packages
library(haven)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(splines)
library(mgcv)
## Loading required package: nlme
## Warning: package 'nlme' was built under R version 4.0.4
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.8-35. For overview type 'help("mgcv-package")'.
#Import Data
BRFSS19<-read_xpt("C:/Users/adolp/Desktop/Statistics 2/BRFSS/LLCP2019.XPT")
#Clean Variables
nams<-names(BRFSS19)
head(BRFSS19, n=10)
## # A tibble: 10 x 342
## `_STATE` FMONTH IDATE IMONTH IDAY IYEAR DISPCODE SEQNO `_PSU` CTELENM1
## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 1 1 011820~ 01 18 2019 1100 201900~ 2.02e9 1
## 2 1 1 011320~ 01 13 2019 1100 201900~ 2.02e9 1
## 3 1 1 011820~ 01 18 2019 1100 201900~ 2.02e9 1
## 4 1 1 011820~ 01 18 2019 1200 201900~ 2.02e9 1
## 5 1 1 010420~ 01 04 2019 1100 201900~ 2.02e9 1
## 6 1 1 011820~ 01 18 2019 1200 201900~ 2.02e9 1
## 7 1 1 010420~ 01 04 2019 1100 201900~ 2.02e9 1
## 8 1 1 012320~ 01 23 2019 1100 201900~ 2.02e9 1
## 9 1 1 012420~ 01 24 2019 1100 201900~ 2.02e9 1
## 10 1 1 011320~ 01 13 2019 1100 201900~ 2.02e9 1
## # ... with 332 more variables: PVTRESD1 <dbl>, COLGHOUS <dbl>, STATERE1 <dbl>,
## # CELPHONE <dbl>, LADULT1 <dbl>, COLGSEX <dbl>, NUMADULT <dbl>,
## # LANDSEX <dbl>, NUMMEN <dbl>, NUMWOMEN <dbl>, RESPSLCT <dbl>,
## # SAFETIME <dbl>, CTELNUM1 <dbl>, CELLFON5 <dbl>, CADULT1 <dbl>,
## # CELLSEX <dbl>, PVTRESD3 <dbl>, CCLGHOUS <dbl>, CSTATE1 <dbl>,
## # LANDLINE <dbl>, HHADULT <dbl>, SEXVAR <dbl>, GENHLTH <dbl>, PHYSHLTH <dbl>,
## # MENTHLTH <dbl>, POORHLTH <dbl>, HLTHPLN1 <dbl>, PERSDOC2 <dbl>,
## # MEDCOST <dbl>, CHECKUP1 <dbl>, BPHIGH4 <dbl>, BPMEDS <dbl>, CHOLCHK2 <dbl>,
## # TOLDHI2 <dbl>, CHOLMED2 <dbl>, CVDINFR4 <dbl>, CVDCRHD4 <dbl>,
## # CVDSTRK3 <dbl>, ASTHMA3 <dbl>, ASTHNOW <dbl>, CHCSCNCR <dbl>,
## # CHCOCNCR <dbl>, CHCCOPD2 <dbl>, ADDEPEV3 <dbl>, CHCKDNY2 <dbl>,
## # DIABETE4 <dbl>, DIABAGE3 <dbl>, HAVARTH4 <dbl>, ARTHEXER <dbl>,
## # ARTHEDU <dbl>, LMTJOIN3 <dbl>, ARTHDIS2 <dbl>, JOINPAI2 <dbl>,
## # MARITAL <dbl>, EDUCA <dbl>, RENTHOM1 <dbl>, NUMHHOL3 <dbl>, NUMPHON3 <dbl>,
## # CPDEMO1B <dbl>, VETERAN3 <dbl>, EMPLOY1 <dbl>, CHILDREN <dbl>,
## # INCOME2 <dbl>, WEIGHT2 <dbl>, HEIGHT3 <dbl>, PREGNANT <dbl>, DEAF <dbl>,
## # BLIND <dbl>, DECIDE <dbl>, DIFFWALK <dbl>, DIFFDRES <dbl>, DIFFALON <dbl>,
## # SMOKE100 <dbl>, SMOKDAY2 <dbl>, STOPSMK2 <dbl>, LASTSMK2 <dbl>,
## # USENOW3 <dbl>, ALCDAY5 <dbl>, AVEDRNK3 <dbl>, DRNK3GE5 <dbl>,
## # MAXDRNKS <dbl>, EXERANY2 <dbl>, EXRACT11 <dbl>, EXEROFT1 <dbl>,
## # EXERHMM1 <dbl>, EXRACT21 <dbl>, EXEROFT2 <dbl>, EXERHMM2 <dbl>,
## # STRENGTH <dbl>, FRUIT2 <dbl>, FRUITJU2 <dbl>, FVGREEN1 <dbl>,
## # FRENCHF1 <dbl>, POTATOE1 <dbl>, VEGETAB2 <dbl>, FLUSHOT7 <dbl>,
## # FLSHTMY3 <dbl>, TETANUS1 <dbl>, PNEUVAC4 <dbl>, HIVTST7 <dbl>, ...
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(BRFSS19)<-newnames
#Recode Variables
#Independent Variables
BRFSS19$MaxDrnk<-Recode(BRFSS19$maxdrnks, recodes = "88=0; 77=NA; 99=NA")
BRFSS19$Alc<-Recode(BRFSS19$acedrink, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Dep<-Recode(BRFSS19$acedeprs, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Gen<-Recode(BRFSS19$birthsex, recodes = "7:9=NA; 1=1;2=0")
#Dependent Variable (Mental health days = 1, No mental health days = 0)
BRFSS19$Mhlth<-Recode(BRFSS19$ment14d, recodes ="7:9=NA; 1=0;2=1;3=1")
#Complete Cases
library(dplyr)
BRFSS19sub<-BRFSS19%>%
select(MaxDrnk,Alc,Dep,Gen,Mhlth,llcpwt, ststr )%>%
filter(complete.cases(.))
brfgam<-gam(Mhlth~s(MaxDrnk)+ Alc + Dep+ Gen,
data=BRFSS19,
weights = ststr/mean(ststr, na.rm=T),
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(brfgam)
##
## Family: binomial
## Link function: logit
##
## Formula:
## Mhlth ~ s(MaxDrnk) + Alc + Dep + Gen
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.41435 0.05371 -7.714 1.22e-14 ***
## Alc 0.39901 0.07965 5.010 5.45e-07 ***
## Dep 0.90511 0.08440 10.724 < 2e-16 ***
## Gen -0.60030 0.06987 -8.592 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(MaxDrnk) 2.975 3.69 76.19 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0821 Deviance explained = 6.3%
## UBRE = 0.78377 Scale est. = 1 n = 2969
plot(brfgam)
brfgam2<-gam(Mhlth~ MaxDrnk+ Alc + Dep+ Gen,
data=BRFSS19,
weights = ststr/mean(ststr, na.rm=T),
family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(brfgam2)
##
## Family: binomial
## Link function: logit
##
## Formula:
## Mhlth ~ MaxDrnk + Alc + Dep + Gen
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.67209 0.05782 -11.623 < 2e-16 ***
## MaxDrnk 0.06539 0.01055 6.195 5.81e-10 ***
## Alc 0.41011 0.07921 5.177 2.25e-07 ***
## Dep 0.91171 0.08397 10.858 < 2e-16 ***
## Gen -0.54286 0.06872 -7.900 2.79e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.0736 Deviance explained = 5.56%
## UBRE = 0.79638 Scale est. = 1 n = 2969
anova( brfgam2, brfgam, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: Mhlth ~ MaxDrnk + Alc + Dep + Gen
## Model 2: Mhlth ~ s(MaxDrnk) + Alc + Dep + Gen
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 2964.0 5323.5
## 2 2961.3 5282.1 2.6897 41.388 3.337e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The continuous independent variable (Most drinks on single occasion past 30 days) on the dichotomous dependent variable (mental health days/no mental health days). There was a smooth non-linear association between the IV and DV with higher probability of having a mental health day at fewer drinks up until around 10 drinks, and then the relationship becomes negative, with the decrease in the probability of having a mental health day at higher drinks. In comparing the linear model and the additive model, the additive model explains the model better than the linear model.