#Load packages
library(car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(sjPlot)
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
library(ggplot2)
library(haven)
#Load BRFSS
BRFSS19<-read_xpt("C:/Users/adolp/Desktop/Statistics 2/BRFSS/LLCP2019.XPT")
#Fix BRFSS names
nams<-names(BRFSS19)
head(BRFSS19, n=10)
## # A tibble: 10 x 342
##    `_STATE` FMONTH IDATE IMONTH IDAY  IYEAR DISPCODE SEQNO `_PSU` CTELENM1
##       <dbl>  <dbl> <chr> <chr>  <chr> <chr>    <dbl> <chr>  <dbl>    <dbl>
##  1        1      1 0118~ 01     18    2019      1100 2019~ 2.02e9        1
##  2        1      1 0113~ 01     13    2019      1100 2019~ 2.02e9        1
##  3        1      1 0118~ 01     18    2019      1100 2019~ 2.02e9        1
##  4        1      1 0118~ 01     18    2019      1200 2019~ 2.02e9        1
##  5        1      1 0104~ 01     04    2019      1100 2019~ 2.02e9        1
##  6        1      1 0118~ 01     18    2019      1200 2019~ 2.02e9        1
##  7        1      1 0104~ 01     04    2019      1100 2019~ 2.02e9        1
##  8        1      1 0123~ 01     23    2019      1100 2019~ 2.02e9        1
##  9        1      1 0124~ 01     24    2019      1100 2019~ 2.02e9        1
## 10        1      1 0113~ 01     13    2019      1100 2019~ 2.02e9        1
## # ... with 332 more variables: PVTRESD1 <dbl>, COLGHOUS <dbl>, STATERE1 <dbl>,
## #   CELPHONE <dbl>, LADULT1 <dbl>, COLGSEX <dbl>, NUMADULT <dbl>,
## #   LANDSEX <dbl>, NUMMEN <dbl>, NUMWOMEN <dbl>, RESPSLCT <dbl>,
## #   SAFETIME <dbl>, CTELNUM1 <dbl>, CELLFON5 <dbl>, CADULT1 <dbl>,
## #   CELLSEX <dbl>, PVTRESD3 <dbl>, CCLGHOUS <dbl>, CSTATE1 <dbl>,
## #   LANDLINE <dbl>, HHADULT <dbl>, SEXVAR <dbl>, GENHLTH <dbl>, PHYSHLTH <dbl>,
## #   MENTHLTH <dbl>, POORHLTH <dbl>, HLTHPLN1 <dbl>, PERSDOC2 <dbl>,
## #   MEDCOST <dbl>, CHECKUP1 <dbl>, BPHIGH4 <dbl>, BPMEDS <dbl>, CHOLCHK2 <dbl>,
## #   TOLDHI2 <dbl>, CHOLMED2 <dbl>, CVDINFR4 <dbl>, CVDCRHD4 <dbl>,
## #   CVDSTRK3 <dbl>, ASTHMA3 <dbl>, ASTHNOW <dbl>, CHCSCNCR <dbl>,
## #   CHCOCNCR <dbl>, CHCCOPD2 <dbl>, ADDEPEV3 <dbl>, CHCKDNY2 <dbl>,
## #   DIABETE4 <dbl>, DIABAGE3 <dbl>, HAVARTH4 <dbl>, ARTHEXER <dbl>,
## #   ARTHEDU <dbl>, LMTJOIN3 <dbl>, ARTHDIS2 <dbl>, JOINPAI2 <dbl>,
## #   MARITAL <dbl>, EDUCA <dbl>, RENTHOM1 <dbl>, NUMHHOL3 <dbl>, NUMPHON3 <dbl>,
## #   CPDEMO1B <dbl>, VETERAN3 <dbl>, EMPLOY1 <dbl>, CHILDREN <dbl>,
## #   INCOME2 <dbl>, WEIGHT2 <dbl>, HEIGHT3 <dbl>, PREGNANT <dbl>, DEAF <dbl>,
## #   BLIND <dbl>, DECIDE <dbl>, DIFFWALK <dbl>, DIFFDRES <dbl>, DIFFALON <dbl>,
## #   SMOKE100 <dbl>, SMOKDAY2 <dbl>, STOPSMK2 <dbl>, LASTSMK2 <dbl>,
## #   USENOW3 <dbl>, ALCDAY5 <dbl>, AVEDRNK3 <dbl>, DRNK3GE5 <dbl>,
## #   MAXDRNKS <dbl>, EXERANY2 <dbl>, EXRACT11 <dbl>, EXEROFT1 <dbl>,
## #   EXERHMM1 <dbl>, EXRACT21 <dbl>, EXEROFT2 <dbl>, EXERHMM2 <dbl>,
## #   STRENGTH <dbl>, FRUIT2 <dbl>, FRUITJU2 <dbl>, FVGREEN1 <dbl>,
## #   FRENCHF1 <dbl>, POTATOE1 <dbl>, VEGETAB2 <dbl>, FLUSHOT7 <dbl>,
## #   FLSHTMY3 <dbl>, TETANUS1 <dbl>, PNEUVAC4 <dbl>, HIVTST7 <dbl>, ...
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(BRFSS19)<-newnames
#Outcome variable
BRFSS19$Drank<-Recode(BRFSS19$drnk3ge5, recodes = "88=0; 77=NA; 99=NA")
hist(BRFSS19$Drank)

summary(BRFSS19$Drank)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00    0.00    0.00    1.25    1.00   76.00  223396

Analyses

#Recode variables
BRFSS19$Black<-Recode(BRFSS19$racegr3, recodes="2=1; 9=NA; else=0")
BRFSS19$White<-Recode(BRFSS19$racegr3, recodes="1=1; 9=NA; else=0")
BRFSS19$Other<-Recode(BRFSS19$racegr3, recodes="3:4=1; 9=NA; else=0")
BRFSS19$Hispanic<-Recode(BRFSS19$racegr3, recodes="5=1; 9=NA; else=0")
BRFSS19$Race_eth<-Recode(BRFSS19$racegr3, 
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)
BRFSS19$Alc<-Recode(BRFSS19$acedrink, recodes ="7:9=NA; 1=1;2=0")
BRFSS19$Dep<-Recode(BRFSS19$acedeprs, recodes ="7:9=NA; 1=1;2=0") 
BRFSS19$Gen<-Recode(BRFSS19$birthsex, recodes = "7:9=NA; 1=1;2=0")
#Subset data
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
subset<-BRFSS19%>%
  select(Drank,Alc,Dep,Gen,Race_eth,Black, White, Other, Hispanic,llcpwt,ststr) %>%
  filter( complete.cases(.))
options(survey.lonely.psu = "adjust")

des<-svydesign(ids=~1, strata=~ststr, 
               weights=~llcpwt,
               data = BRFSS19[is.na(BRFSS19$llcpwt)==F,])

Poisson Regression

#Descriptive Statistics
svyhist(~Drank, des)

svyby(~Drank, ~Race_eth+Alc, des, svymean, na.rm=T)
##                    Race_eth Alc    Drank         se
## hispanic.0         hispanic   0 1.125233 0.10477654
## nh black.0         nh black   0 1.208418 0.10925663
## nh multirace.0 nh multirace   0 2.360275 0.37950153
## nh other.0         nh other   0 1.082813 0.15921920
## nhwhite.0           nhwhite   0 1.315679 0.03702244
## hispanic.1         hispanic   1 2.401547 0.32206941
## nh black.1         nh black   1 2.468785 0.44361630
## nh multirace.1 nh multirace   1 3.322718 0.76812281
## nh other.1         nh other   1 3.616181 0.71655462
## nhwhite.1           nhwhite   1 1.970008 0.08264054
svyby(~Drank, ~Dep, des, svymean, na.rm=T)
##   Dep    Drank         se
## 0   0 1.357362 0.03290095
## 1   1 2.004254 0.09652755
#Poisson GLM Fit Survey Data
fit1<-svyglm(Drank~factor(Race_eth)+factor(Dep)+factor(Alc), design=des, family=poisson)
summary(fit1)
## 
## Call:
## svyglm(formula = Drank ~ factor(Race_eth) + factor(Dep) + factor(Alc), 
##     design = des, family = poisson)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~ststr, weights = ~llcpwt, data = BRFSS19[is.na(BRFSS19$llcpwt) == 
##     F, ])
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   0.17007    0.07959   2.137 0.032623 *  
## factor(Race_eth)nh black      0.07481    0.11981   0.624 0.532379    
## factor(Race_eth)nh multirace  0.57740    0.15555   3.712 0.000206 ***
## factor(Race_eth)nh other      0.17080    0.14818   1.153 0.249075    
## factor(Race_eth)nhwhite       0.03554    0.08226   0.432 0.665652    
## factor(Dep)1                  0.23917    0.05373   4.451 8.56e-06 ***
## factor(Alc)1                  0.41912    0.04788   8.754  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 12.45095)
## 
## Number of Fisher Scoring iterations: 7
#exp(estimates) for percentages
#Poisson Mode Risk Ratios
round(exp(summary(fit1)$coef[-1,1]), 3)
##     factor(Race_eth)nh black factor(Race_eth)nh multirace 
##                        1.078                        1.781 
##     factor(Race_eth)nh other      factor(Race_eth)nhwhite 
##                        1.186                        1.036 
##                 factor(Dep)1                 factor(Alc)1 
##                        1.270                        1.521
mean(subset$Drank)
## [1] 1.198342
var(subset$Drank)
## [1] 13.34087

Overdispersion

#Poisson GLM Fit Non Survey Data
fit2<-glm(Drank~factor(Race_eth)+factor(Dep)+factor(Alc), data=BRFSS19, family=poisson)
summary(fit2)
## 
## Call:
## glm(formula = Drank ~ factor(Race_eth) + factor(Dep) + factor(Alc), 
##     family = poisson, data = BRFSS19)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5988  -1.5871  -1.4358  -0.5833  22.4438  
## 
## Coefficients:
##                               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   0.230685   0.016547  13.941  < 2e-16 ***
## factor(Race_eth)nh black     -0.139972   0.021434  -6.530 6.56e-11 ***
## factor(Race_eth)nh multirace  0.301032   0.029421  10.232  < 2e-16 ***
## factor(Race_eth)nh other      0.126901   0.027354   4.639 3.50e-06 ***
## factor(Race_eth)nhwhite      -0.200350   0.016770 -11.947  < 2e-16 ***
## factor(Dep)1                  0.287727   0.009760  29.480  < 2e-16 ***
## factor(Alc)1                  0.397535   0.009003  44.155  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 247048  on 49431  degrees of freedom
## Residual deviance: 242397  on 49425  degrees of freedom
##   (368836 observations deleted due to missingness)
## AIC: 280257
## 
## Number of Fisher Scoring iterations: 7
#ChiSquare test
scale<-sqrt(fit2$deviance/fit2$df.residual)
scale
## [1] 2.214577
1-pchisq(fit2$deviance, df = fit2$df.residual)
## [1] 0

Quasi distribution

#QuasiPoisson
fit3<-glm(Drank~factor(Alc)+factor(Dep)+factor(Race_eth), data=BRFSS19, family=quasipoisson)
summary(fit3)
## 
## Call:
## glm(formula = Drank ~ factor(Alc) + factor(Dep) + factor(Race_eth), 
##     family = quasipoisson, data = BRFSS19)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5988  -1.5871  -1.4358  -0.5833  22.4438  
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   0.23069    0.05991   3.851 0.000118 ***
## factor(Alc)1                  0.39754    0.03259  12.196  < 2e-16 ***
## factor(Dep)1                  0.28773    0.03533   8.143 3.94e-16 ***
## factor(Race_eth)nh black     -0.13997    0.07760  -1.804 0.071269 .  
## factor(Race_eth)nh multirace  0.30103    0.10651   2.826 0.004712 ** 
## factor(Race_eth)nh other      0.12690    0.09903   1.281 0.200038    
## factor(Race_eth)nhwhite      -0.20035    0.06071  -3.300 0.000968 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 13.10665)
## 
##     Null deviance: 247048  on 49431  degrees of freedom
## Residual deviance: 242397  on 49425  degrees of freedom
##   (368836 observations deleted due to missingness)
## AIC: NA
## 
## Number of Fisher Scoring iterations: 7

Negative binomial models

library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)
coeftest(fit2, vcov=vcovHC(fit2, type="HC1",cluster="ststr"))
## 
## z test of coefficients:
## 
##                               Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)                   0.230685   0.057885  3.9852 6.742e-05 ***
## factor(Race_eth)nh black     -0.139972   0.076469 -1.8304 0.0671857 .  
## factor(Race_eth)nh multirace  0.301032   0.109523  2.7486 0.0059855 ** 
## factor(Race_eth)nh other      0.126901   0.104766  1.2113 0.2257882    
## factor(Race_eth)nhwhite      -0.200350   0.059116 -3.3891 0.0007012 ***
## factor(Dep)1                  0.287727   0.036446  7.8947 2.911e-15 ***
## factor(Alc)1                  0.397535   0.033765 11.7735 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Negative binomial fit
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
fit.nb1<-glm.nb(Drank~factor(Race_eth),
              data=subset,
              weights=llcpwt/mean(llcpwt, na.rm=T))

fit.nb2<-glm.nb(Drank~factor(Race_eth)+factor(Dep)+factor(Alc),
              data=subset,
              weights=llcpwt/mean(llcpwt, na.rm=T))

tests1<-coeftest(fit.nb1, vcov=vcovHC(fit.nb2, type="HC1",cluster="llcpwt"))
tests<-coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type="HC1",cluster="llcpwt"))
library(stargazer)

stargazer(fit.nb1, fit.nb2,style="demography", type = "text", t.auto=F,p.auto=F,coef=list(tests1[, 1],tests[,1]),  se =list(tests1[, 2], tests[, 2]), p=list(tests1[,4],tests[, 4])   )
## 
## --------------------------------------------------------------
##                                            Drank              
##                                  Model 1          Model 2     
## --------------------------------------------------------------
## factor(Race_eth)nh black          -0.510           -0.353     
##                                  (0.330)          (0.330)     
## factor(Race_eth)nh multirace      0.076            0.238      
##                                  (0.516)          (0.516)     
## factor(Race_eth)nh other         -1.015*           -0.588     
##                                  (0.484)          (0.484)     
## factor(Race_eth)nhwhite           -0.413           -0.178     
##                                  (0.289)          (0.289)     
## factor(Dep)1                                      0.418**     
##                                                   (0.147)     
## factor(Alc)1                                      0.386**     
##                                                   (0.142)     
## Constant                         0.712**           0.264      
##                                  (0.275)          (0.275)     
## N                                 3,015            3,015      
## Log Likelihood                  -3,964.059       -3,944.901   
## theta                        0.166*** (0.007) 0.172*** (0.008)
## AIC                             7,938.118        7,903.803    
## --------------------------------------------------------------
## *p < .05; **p < .01; ***p < .001

Count Outcome: Binge Drinking; Considering all types of alcoholic beverages, how many times during the past 30 days did you have 5 or more drinks for men or 4 or more drinks for women on an occasion? Research Question: Does growing up with a problem drinker or alcoholic or growing up with anyone who was depressed, mentally ill, or suicidal, and race/ethnicity predict the number of times respondents binged drink during the past 30 days. It is a count model, so, no, an offset term necessary because each respondent has the same time reference (30 days) when asked to recall to answer the question. Dispersion: In evaluating the level of dispersion in the outcome, the mean and variance of the Poisson regression model are not equal with variance being higher than the mean: over dispersion. Based on the Chi-square fit test, the Poisson model is not a good choice. Rather, Negative Binomial GLM is a better suited model. In comparing the two Negative Binomial GLMs, the two models have similar SEs but differ in betas. Also, the second model has smaller AIC, indicating a better fitted model. Results: Respondents who grew up with a problem drinker or alcoholic had 37% more binge drinking days than respondents who did not grow up with a problem drinker or alcoholic. Respondents who grew up with anyone who was depressed, mentally ill, or suicidal had 42% more binge drinking days than respondents who did not grow up with anyone who was depressed, mentally ill, or suicidal.